Last active
February 5, 2020 01:05
-
-
Save maxerickson/eafdf72f34dc908bd4c373521e958794 to your computer and use it in GitHub Desktop.
Split an osm xml file into pieces of a given size, outputting reasonably compact, referentially complete chunks.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!python3 | |
import os.path | |
import argparse | |
import collections | |
import operator | |
import xml.etree.ElementTree as ElementTree | |
import geohash_hilbert | |
def geohash(node): | |
lat=float(node.element.attrib['lat']) | |
lon=float(node.element.attrib['lon']) | |
return geohash_hilbert.encode(lon,lat) | |
class Item: | |
def __init__(self, osmelement): | |
self.element=osmelement | |
self.osmid=osmelement.attrib["id"] | |
self.osmtype=osmelement.tag | |
self.uid=self.osmtype+"/"+self.osmid | |
self.modified=False | |
if osmelement.attrib.get('action', None)=='modify': | |
self.modified=True | |
self.refs=list() | |
self.tags=dict() | |
for grandkid in osmelement: | |
if grandkid.tag=="tag": | |
k=grandkid.attrib['k'] | |
v=grandkid.attrib['v'] | |
self.tags[k]=v | |
if grandkid.tag=="nd": | |
self.refs.append("node/"+grandkid.attrib["ref"]) | |
if grandkid.tag=="member": | |
self.refs.append(grandkid.attrib["type"]+"/"+grandkid.attrib["ref"]) | |
def __str__(self): | |
return self.uid | |
def add_tag(self, key, value, overwrite=False): | |
if not overwrite and key in self.tags: | |
raise ValueError("Key already exists.") | |
e=ElementTree.Element("tag",attrib={"k":key,"v":value}) | |
self.element.append(e) | |
self.element.set('action', 'modify') | |
self.modified=True | |
def remove_tag(self, key): | |
e=self.element.find("./tag[@k='{}']".format(key)) | |
if e is not None: | |
self.element.remove(e) | |
self.element.set('action', 'modify') | |
self.modified=True | |
class OSMTree: | |
def __init__(self, tree=None): | |
self.items=dict() | |
self.parent_map=collections.defaultdict(list) | |
if tree is not None: | |
for child in tree: | |
if child.tag in ['node','way','relation']: | |
nit=Item(child) | |
self.add(nit) | |
def add(self, item): | |
self.items[item.uid]=item | |
for element in item.refs: | |
self.parent_map[element].append(item.uid) | |
def remove(self, item): | |
del self.items[item.uid] | |
def get_related(self, item): | |
related=set() | |
visited=set() | |
tocheck=[item] | |
while tocheck: | |
curitem=tocheck.pop(0) | |
if curitem in visited: | |
continue | |
else: | |
visited.add(curitem) | |
for parent in self.parent_map.get(curitem.uid,[]): | |
pitem=self.items[parent] | |
related.add(pitem) | |
tocheck.append(pitem) | |
for ref in curitem.refs: | |
if ref in self.items: | |
ritem=self.items[ref] | |
related.add(ritem) | |
tocheck.append(ritem) | |
return related | |
def split(source, targetsize): | |
worklist=[i for i in source.items.values() if i.osmtype=="node"] | |
worklist.sort(key=geohash) | |
target=OSMTree() | |
while source.items: | |
while worklist: | |
item=worklist.pop(0) | |
target.add(item) | |
source.remove(item) | |
for sitem in source.get_related(item): | |
if sitem.osmtype=="node": | |
worklist.remove(sitem) | |
target.add(sitem) | |
source.remove(sitem) | |
if len(target.items.keys()) >= targetsize: | |
yield target | |
target=OSMTree() | |
if target.items: | |
yield target | |
def write_osm(items, filename): | |
root=ElementTree.Element("osm", generator="osm_chunker.py", version="0.6", upload="never") | |
note=ElementTree.SubElement(root, "note") | |
note.text="The data included in this document is from www.openstreetmap.org. The data is made available under ODbL." | |
item_map={"node":list(), | |
"way":list(), | |
"relation":list()} | |
for item in items: | |
item_map[item.osmtype].append(item) | |
for key in ["node","way","relation"]: | |
for item in sorted(item_map[key], key=operator.attrgetter("osmid")): | |
root.append(item.element) | |
ElementTree.ElementTree(root).write(filename) | |
if __name__=="__main__": | |
parser = argparse.ArgumentParser(description='Split an osm file into reasonably compact chunks.') | |
parser.add_argument('infile', | |
help='Source data') | |
parser.add_argument('--size', type=int, default=5000, help='Number of objects per output file.') | |
args = parser.parse_args() | |
tree = ElementTree.parse(args.infile) | |
root = tree.getroot() | |
source=OSMTree(root) | |
objectcount=len(source.items) | |
print(objectcount, "OSM objects.") | |
print("Approximately {:d} output files.".format(objectcount//args.size+1)) | |
fbase=os.path.splitext(os.path.basename(args.infile))[0] | |
count=0 | |
for osmtree in split(source, args.size): | |
write_osm(osmtree.items.values(),"{}_{}.osm".format(fbase,count)) | |
count+=1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment