Created
June 7, 2022 16:57
-
-
Save ocefpaf/9774090e9f2ea7dd8b549d2c8c61d9ed to your computer and use it in GitHub Desktop.
bad_metadata
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "# Check why some gliders we find fail when trying to download data" | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true | |
| }, | |
| "cell_type": "code", | |
| "source": "from erddapy import ERDDAP\n\n\ne = ERDDAP(\n server=\"https://data.ioos.us/gliders/erddap\",\n protocol=\"tabledap\"\n)\n\ne.variables = [\n \"depth\",\n \"latitude\",\n \"longitude\",\n \"salinity\",\n \"temperature\",\n \"time\",\n]", | |
| "execution_count": 1, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "## Search gliders" | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true | |
| }, | |
| "cell_type": "code", | |
| "source": "min_time, max_time = \"2016-07-10T00:00:00Z\", \"2017-02-10T00:00:00Z\"\nmin_lon, max_lon = -72.0, -69.0\nmin_lat, max_lat = 38.0, 41.0\n\nkw = {\n \"standard_name\": \"sea_water_temperature\",\n \"min_lon\": min_lon,\n \"max_lon\": max_lon,\n \"min_lat\": min_lat,\n \"max_lat\": max_lat,\n \"min_time\": min_time,\n \"max_time\": max_time,\n \"cdm_data_type\": \"trajectoryprofile\",\n}", | |
| "execution_count": 2, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true | |
| }, | |
| "cell_type": "code", | |
| "source": "import pandas as pd\n\n\nsearch_url = e.get_search_url(response=\"csv\", **kw)\nsearch = pd.read_csv(search_url)\ngliders = search[\"Dataset ID\"].values\n\njoined = \"\\n\".join(gliders)\nprint(f\"Found {len(gliders)} Glider Datasets:\\n\\n{joined}\")", | |
| "execution_count": 3, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": "Found 19 Glider Datasets:\n\nblue-20160818T1448\ncp_335-20170116T1459-delayed\ncp_336-20160809T1354-delayed\ncp_336-20161011T0058-delayed\ncp_336-20170116T1254-delayed\ncp_339-20170116T2353-delayed\ncp_340-20160809T0621-delayed\ncp_374-20140416T1634-delayed\ncp_374-20150509T1256-delayed\ncp_374-20160529T0026-delayed\ncp_376-20160527T2050-delayed\ncp_380-20161011T2046-delayed\ncp_387-20160404T1858-delayed\ncp_388-20160809T1406-delayed\ncp_388-20170116T1324-delayed\ncp_389-20161011T2040-delayed\nsilbo-20160413T1534\nsp022-20170209T1616\nwhoi_406-20160902T1700\n", | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "### sp022-20170209T1616" | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true | |
| }, | |
| "cell_type": "code", | |
| "source": "# Same as the search... Should work, right?\ne.constraints = {\n \"time>=\": min_time,\n \"time<=\": max_time,\n \"latitude>=\": min_lat,\n \"latitude<=\": max_lat,\n \"longitude>=\": min_lon,\n \"longitude<=\": max_lon,\n}\n\n\ne.dataset_id = \"sp022-20170209T1616\"\n\ntry:\n df = e.to_pandas(index_col=\"time (UTC)\", parse_dates=True)\nexcept Exception as err:\n print(f\"{err}\")", | |
| "execution_count": 4, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": "Error {\n code=404;\n message=\"Not Found: Your query produced no matching results. (nRows = 0)\";\n}\n\n", | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true | |
| }, | |
| "cell_type": "code", | |
| "source": "# Remove the bbox constraint.\ne.constraints = {\n \"time>=\": min_time,\n \"time<=\": max_time,\n}\n\n\ndf = e.to_pandas(index_col=\"time (UTC)\", parse_dates=True)\n\nprint(f\"{min_lon=}, {max_lon=}, {min_lat=}, {max_lat=}\")", | |
| "execution_count": 5, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": "min_lon=-72.0, max_lon=-69.0, min_lat=38.0, max_lat=41.0\n", | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true | |
| }, | |
| "cell_type": "code", | |
| "source": "df[[\"longitude (degrees_east)\", \"latitude (degrees_north)\"]].describe().T[[\"min\", \"max\"]]", | |
| "execution_count": 6, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "execution_count": 6, | |
| "data": { | |
| "text/plain": " min max\nlongitude (degrees_east) -80.03365 -79.97467\nlatitude (degrees_north) 25.77845 25.93868", | |
| "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>min</th>\n <th>max</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>longitude (degrees_east)</th>\n <td>-80.03365</td>\n <td>-79.97467</td>\n </tr>\n <tr>\n <th>latitude (degrees_north)</th>\n <td>25.77845</td>\n <td>25.93868</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
| }, | |
| "metadata": {} | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "Looks like latitudes and longitudes are off here too but the metadata is probably throwing the search off." | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true | |
| }, | |
| "cell_type": "code", | |
| "source": "info = pd.read_csv(e.get_info_url(response=\"csv\"))\n\nlon = info.loc[info[\"Variable Name\"] == \"longitude\"]\nlon.loc[lon[\"Attribute Name\"] == \"actual_range\"]", | |
| "execution_count": 7, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "execution_count": 7, | |
| "data": { | |
| "text/plain": " Row Type Variable Name Attribute Name Data Type Value\n108 attribute longitude actual_range double -80.15482, -66.89955", | |
| "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Row Type</th>\n <th>Variable Name</th>\n <th>Attribute Name</th>\n <th>Data Type</th>\n <th>Value</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>108</th>\n <td>attribute</td>\n <td>longitude</td>\n <td>actual_range</td>\n <td>double</td>\n <td>-80.15482, -66.89955</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
| }, | |
| "metadata": {} | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true | |
| }, | |
| "cell_type": "code", | |
| "source": "lat = info.loc[info[\"Variable Name\"] == \"latitude\"]\nlat.loc[lat[\"Attribute Name\"] == \"actual_range\"]", | |
| "execution_count": 8, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "execution_count": 8, | |
| "data": { | |
| "text/plain": " Row Type Variable Name Attribute Name Data Type Value\n91 attribute latitude actual_range double 25.77845, 40.79168", | |
| "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Row Type</th>\n <th>Variable Name</th>\n <th>Attribute Name</th>\n <th>Data Type</th>\n <th>Value</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>91</th>\n <td>attribute</td>\n <td>latitude</td>\n <td>actual_range</td>\n <td>double</td>\n <td>25.77845, 40.79168</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
| }, | |
| "metadata": {} | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "# Let's get the whole dataset and plot it against the bounding box" | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true | |
| }, | |
| "cell_type": "code", | |
| "source": "e.constraints = {}\n\ndf = e.to_pandas(index_col=\"time (UTC)\", parse_dates=True)\n\nconstrained = df[min_time:max_time]", | |
| "execution_count": 9, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true | |
| }, | |
| "cell_type": "code", | |
| "source": "import folium\n\n\nlon = (min_lon + max_lon) / 2\nlat = (min_lat + max_lat) / 2\n\nm = folium.Map(location=[lat, lon])\n\nfolium.Polygon([(min_lat,min_lon), (max_lat,min_lon), (max_lat,max_lon), (min_lat,max_lon)],\n fill=True).add_to(m)\n\n\n# Whole track\nx, y = df[\"longitude (degrees_east)\"].to_numpy(), df[\"latitude (degrees_north)\"].to_numpy()\nlocations = list(zip(y, x))\n\nfolium.PolyLine(\n locations=locations, color=\"orange\", weight=8, opacity=0.75,\n).add_to(m)\n\n# Within the time constraint\nx, y = constrained[\"longitude (degrees_east)\"].to_numpy(), constrained[\"latitude (degrees_north)\"].to_numpy()\nlocations = list(zip(y, x))\n\nfolium.PolyLine(\n locations=locations, color=\"red\", weight=8, opacity=0.75,\n).add_to(m)\n\n\nm.fit_bounds(m.get_bounds())\nm", | |
| "execution_count": 10, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "execution_count": 10, | |
| "data": { | |
| "text/plain": "<folium.folium.Map at 0x7f0ddac476a0>", |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment