Skip to content

Instantly share code, notes, and snippets.

@tinesubic
Last active June 4, 2018 09:45
Show Gist options
  • Select an option

  • Save tinesubic/203c3acc7178b2b5eb3b55a380aae052 to your computer and use it in GitHub Desktop.

Select an option

Save tinesubic/203c3acc7178b2b5eb3b55a380aae052 to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
import tqdm
import hashlib
from os import listdir
from os.path import isfile, join
root = 'http://download.geofabrik.de/'
import shutil
import sys
regions = ['africa']
from functools import partial
def md5sum(filename):
with open(filename, mode='rb') as f:
d = hashlib.md5()
for buf in iter(partial(f.read, 128), b''):
d.update(buf)
return d.hexdigest()
def download_file(url, path):
# NOTE the stream=True parameter
r = requests.get(url, stream=True, )
sys.stdout.flush()
with open(path, 'wb') as f:
shutil.copyfileobj(r.raw, f)
for r in regions:
url = '{}{}/'.format(root,r)
html = requests.get(url)
html = BeautifulSoup(html.content, 'html.parser')
tags = html.findAll('a', href=True)
tags = list(filter(lambda x:'-latest.osm.pbf' in x.attrs['href'], tags))
for tag in tags:
if 'href' not in tag.attrs.keys() :
continue
urlfile = '{}{}/{}'.format(root,r,tag.attrs['href'])
print(urlfile)
sys.stdout.flush()
if '-latest.osm.pbf' in urlfile:
if '.md5' in urlfile:
download_file(urlfile, 'data/checksum/' + tag.attrs['href'])
else:
download_file(urlfile, 'data/pbf/' + tag.attrs['href'])
checksums = [f for f in listdir('data/checksum') if isfile(join('data/checksum', f))]
pbffiles = [f for f in listdir('data/pbf') if isfile(join('data/pbf', f))]
if len(checksums) != len(pbffiles):
print('Invalid length!')
exit(1)
checksums = sorted(checksums)
pbffiles = sorted(pbffiles)
for i in range(len(checksums)):
print('Comparing', checksums[i], pbffiles[i])
hashcode = md5sum(join('data/pbf', pbffiles[i]))
with open(join('data/checksum/', checksums[i]),'rb') as hashFile:
precomputeHash = hashFile.read()
if hashcode in precomputeHash.decode('utf-8'):
print(pbffiles[i], 'OK')
else:
print(pbffiles[i],' FAIL')
##remove files
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment