Last active
November 30, 2022 01:04
-
-
Save kk7ds/e47b50bd80c405dcdfb9cf44c1448137 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 -u | |
# | |
# Copyright Dan Smith <[email protected]> | |
# | |
# This downloads USGS topo GeoPDF maps in bulk, by default the current | |
# high-resolution 7.5-minute maps. For reference, all of Oregon comes to | |
# 96GB, all of Washington is 78G. For each state, the index will be downloaded | |
# into the root of the per-state directory, which is how you find which | |
# section map you need, by name. | |
# | |
# To use this, you need python3 and python3-requests. On debian or ubuntu, | |
# | |
# sudo apt-get install python3-requests | |
# | |
# Should be enough. Anything else, probably: | |
# | |
# pip install requests | |
# | |
# Run this with -h to get some help on how to use this, where to get a | |
# CSV map dump, etc. | |
# | |
# Super quick way to get a region (use your own lat/lon): | |
# | |
# $ curl -O http://thor-f5.er.usgs.gov/ngtoc/metadata/misc/topomaps_all.zip | |
# $ unzip topomaps_all.zip | |
# $ unzip ustopo_current.zip | |
# $ python3 ustopo_fetch.py --center 45.5,-122.0 --radius 100 ustopo_current.csv | |
# | |
# | |
import argparse | |
import csv | |
import functools | |
import math | |
import os | |
import sys | |
import textwrap | |
import requests | |
MiB = 1024 * 1024 | |
INDEX_BASE = 'https://store.usgs.gov/assets/MOD/StoreFiles/zoom/pdf/' | |
TYPES = ('7.5', '15', '30') | |
INDEX_OVERRIDES = { | |
'MA': 'MA-RI-CT', | |
'RI': 'MA-RI-CT', | |
'CT': 'MA-RI-CT', | |
'MD': 'MD-DE-DC', | |
'DE': 'MD-DE-DC', | |
'DC': 'MD-DE-DC', | |
'NH': 'NH-VT', | |
'VT': 'NH-VT', | |
} | |
def tempfile(fn): | |
return '%s.part' % fn | |
def purge_temp_on_error(func): | |
@functools.wraps(func) | |
def inner(url, fn): | |
try: | |
return func(url, fn) | |
except: | |
tempfn = tempfile(fn) | |
if os.path.exists(tempfn): | |
print('Removing partial file %s' % tempfn) | |
os.remove(tempfn) | |
raise | |
return inner | |
@purge_temp_on_error | |
def download_file(url, fn): | |
tmpfn = tempfile(fn) | |
with requests.get(url, stream=True) as r: | |
if r.status_code == 200: | |
size = 0 | |
with open(tmpfn, 'wb') as outf: | |
for chunk in r.iter_content(chunk_size=65536): | |
if os.isatty(1) and (size == 0 or size > MiB): | |
sys.stdout.write( | |
'Writing %s...%i MiB\r' % (fn, size / MiB)) | |
sys.stdout.flush() | |
outf.write(chunk) | |
size += len(chunk) | |
os.rename(tmpfn, fn) | |
if os.isatty(1): | |
sys.stdout.write('%s\r' % (' ' * 80)) | |
sys.stdout.flush() | |
print('%s %i MiB done' % (fn, size / (1024*1024))) | |
return True | |
else: | |
print('Failed to fetch %s' % url) | |
return False | |
def great_circle_distance(latlong_a, latlong_b): | |
""" | |
>>> coord_pairs = [ | |
... # between eighth and 31st and eighth and 30th | |
... [(40.750307,-73.994819), (40.749641,-73.99527)], | |
... # sanfran to NYC ~2568 miles | |
... [(37.784750,-122.421180), (40.714585,-74.007202)], | |
... # about 10 feet apart | |
... [(40.714732,-74.008091), (40.714753,-74.008074)], | |
... # inches apart | |
... [(40.754850,-73.975560), (40.754851,-73.975561)], | |
... ] | |
>>> for pair in coord_pairs: | |
... great_circle_distance(pair[0], pair[1]) # doctest: +ELLIPSIS | |
83.325362855055... | |
4133342.6554530... | |
2.7426970360283... | |
0.1396525521278... | |
""" | |
EARTH_RADIUS = 6378137 # earth radius in meters | |
lat1, lon1 = latlong_a | |
lat2, lon2 = latlong_b | |
dLat = math.radians(lat2 - lat1) | |
dLon = math.radians(lon2 - lon1) | |
a = (math.sin(dLat / 2) * math.sin(dLat / 2) + | |
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * | |
math.sin(dLon / 2) * math.sin(dLon / 2)) | |
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) | |
d = EARTH_RADIUS * c | |
# Return in miles | |
return d * 0.000621371 | |
def should_get_tile(center, radius, n_lat, s_lat, e_lon, w_lon): | |
ne_corner = (n_lat, e_lon) | |
sw_corner = (s_lat, w_lon) | |
nw_corner = (n_lat, w_lon) | |
se_corner = (n_lat, e_lon) | |
ne_dist = great_circle_distance(center, ne_corner) | |
sw_dist = great_circle_distance(center, sw_corner) | |
nw_dist = great_circle_distance(center, nw_corner) | |
se_dist = great_circle_distance(center, se_corner) | |
closest_corner = min(ne_dist, sw_dist, nw_dist, se_dist) | |
if closest_corner <= radius: | |
return True | |
def process_csv(args): | |
f = csv.reader(open(args.csv_file)) | |
header = next(f) | |
# The first format is the mapviewer subset; the second is the | |
# full database dump | |
header_options = ('downloadURL', 'Download Product S3', 'product_url') | |
extent_options = ('extent', 'Grid Size', 'grid_size') | |
type_options = ('datasets', 'Version', 'edition') | |
url_index = None | |
for i, header_option in enumerate(header_options): | |
try: | |
url_index = header.index(header_option) | |
extent_index = header.index(extent_options[i]) | |
type_index = header.index(type_options[i]) | |
break | |
except (ValueError, IndexError): | |
pass | |
if url_index is None: | |
print('Unknown CSV format. The USGS has changed their format multiple') | |
print('times in the past, so this may not be your fault. Comment here:') | |
print('https://gist.github.com/kk7ds/e47b50bd80c405dcdfb9cf44c1448137') | |
print('to let me know.') | |
return False | |
if not os.path.isdir(args.dest): | |
os.mkdir(args.dest) | |
if args.center and 'N Lat' not in header: | |
print('--center is compatible only with a full dump CSV ' | |
'(i.e. topomaps_all.csv)') | |
return False | |
if args.center: | |
try: | |
lat, lon = args.center.split(',') | |
center = (float(lat), float(lon)) | |
except ValueError: | |
print('Invalid --center=%r ; ' | |
'use something like 45.125,-122.876' % ( | |
args.center)) | |
return False | |
else: | |
center = None | |
warnings = [] | |
for row in f: | |
if args.explain_headers: | |
for k, v in dict(zip(header, row)).items(): | |
print('%s = %s' % (k, v)) | |
return | |
try: | |
url = row[url_index] | |
extent = row[extent_index] | |
type_ = row[type_index] | |
except IndexError: | |
# Empty line, likely at the end of the file | |
continue | |
if args.type not in type_: | |
if type_ not in warnings: | |
print('Skipping map types %s' % type_) | |
warnings.append(type_) | |
continue | |
if '%s minute' % args.extent not in extent.lower(): | |
if extent not in warnings: | |
print('Skipping map extents %s' % extent) | |
warnings.append(extent) | |
continue | |
basefn = os.path.basename(url) | |
state = basefn.split('_')[0] | |
if args.state and args.state.upper() != state: | |
if state not in warnings: | |
print('Skipping maps for state %s' % state) | |
warnings.append(state) | |
continue | |
if center: | |
try: | |
n_lat = float(row[header.index('N Lat')]) | |
s_lat = float(row[header.index('S Lat')]) | |
e_lon = float(row[header.index('E Long')]) | |
w_lon = float(row[header.index('W Long')]) | |
except ValueError as e: | |
print('CSV file has unparseable Lat/Lon bounding: %s' % e) | |
return False | |
if not should_get_tile(center, args.radius, | |
n_lat, s_lat, e_lon, w_lon): | |
if 'bounding' not in warnings: | |
print('Skipping maps outside %i mi bounding box' % ( | |
args.radius)) | |
warnings.append('bounding') | |
continue | |
if not os.path.isdir(os.path.join(args.dest, state)): | |
os.mkdir(os.path.join(args.dest, state)) | |
state_for_index = INDEX_OVERRIDES.get(state, state) | |
indexfn = os.path.join(args.dest, state, 'index-%s.pdf' % state) | |
if not os.path.exists(indexfn) and not args.no_index: | |
download_file('%s/%s.pdf' % (INDEX_BASE, state_for_index), indexfn) | |
fn = os.path.join(args.dest, state, basefn) | |
if os.path.exists(fn): | |
print('Skipping existing %s' % fn) | |
continue | |
download_file(url, fn) | |
def main(): | |
parser = argparse.ArgumentParser( | |
formatter_class=argparse.RawTextHelpFormatter, | |
description=textwrap.dedent(""" | |
This is a bulk downloader for USGS 7.5 minute Topographical maps in | |
GeoPDF format. It is intentionally verbose, and is designed to be | |
run interactively, providing information about what it is doing along | |
the way. At a minimum, you need to provide it with a CSV file of map | |
information (see below) for it to process. By default, the results | |
will be stored in pdf/$state, along with the map index. | |
"""), | |
epilog=textwrap.dedent(""" | |
You can get a CSV file to feed this in one of two ways. | |
First (and recommended), you can download the entire database dump, | |
by getting the zip file here: | |
http://thor-f5.er.usgs.gov/ngtoc/metadata/misc/topomaps_all.zip | |
Unzip this file and you will find a number of ZIP files inside. You | |
likely want the 'ustopo_current.zip'. Unzip that as well and use the | |
'ustopo_current.csv' file inside. | |
Second, you can query a subset of the database visually by going here: | |
https://viewer.nationalmap.gov/basic/?basemap=b1&category=histtopo%2Custopo | |
Zoom and pan the map to a region, click "Find Products" and then | |
"Save as CSV" after results are displayed. | |
By default, every map in the provided CSV file will be fetched. To | |
limit to just one state, use "--state XX" with a two-letter | |
abbreviation. To limit to an area, provide a lat/lon coordinate | |
via --center and adjust --radius to control how far around that | |
center point we go for maps (note the radius defines a square, not | |
a circle). | |
""")) | |
parser.add_argument('csv_file', | |
help='Path to a CSV file of map data') | |
parser.add_argument('--dest', default='pdf', | |
help='Where to save the PDF files (default: "pdf/")') | |
parser.add_argument('--state', default=None, | |
help='Only download maps for this state ' | |
'(two-letter abbreviation; ex: "WA")') | |
parser.add_argument('--type', default='Current', | |
choices=('Current', 'Historical'), | |
help='Type of map to get (default: Current') | |
parser.add_argument('--extent', default=TYPES[0], | |
choices=TYPES, | |
help='Type of map to get ' | |
'default: %s)' % TYPES[0]) | |
parser.add_argument('--center', default=None, | |
help=('Only download maps around this center lat/lon ' | |
'(example: 45.125,-121.876)')) | |
parser.add_argument('--radius', default=100, type=int, | |
help=('Radius (in miles) around center to fetch ' | |
'(default: 100')) | |
parser.add_argument('--no-index', action='store_true', | |
help='Do not download state indexes') | |
parser.add_argument('--explain_headers', action='store_true', | |
help='For debugging only: explain headers') | |
args = parser.parse_args() | |
try: | |
return 1 if process_csv(args) else 0 | |
except KeyboardInterrupt: | |
return 2 | |
if __name__ == '__main__': | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment