Last active
November 8, 2020 22:46
-
-
Save d3d9/263abc2b1b44bc62469ff352d5690619 to your computer and use it in GitHub Desktop.
Hausnummernvergleich zwischen amtlichen Daten der Stadt Hagen und Daten von OpenStreetMap.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[Sources] | |
AmtUrl = http://www.stadtplan.hagen.de/StrVz/Hauskoordinaten.csv | |
AmtSep = ; | |
AmtEnc = cp1252 | |
AmtCRS = EPSG:4647 | |
OsmUrl = http://overpass-api.de/api/interpreter?data=%5Bout%3Acsv%28%3A%3Alat%2C%3A%3Alon%2C%3A%3Aid%2C%3A%3Atype%2C%22addr%3Apostcode%22%2C%22addr%3Astreet%22%2C%22addr%3Ahousenumber%22%2C%22addr%3Aplace%22%3Btrue%3B%22%3B%22%29%5D%5Btimeout%3A200%5D%3B%0Aarea%283601800297%29-%3E.searchArea%3B%0A%28%0A%20%20node%5B%22addr%3Ahousenumber%22%5D%28area.searchArea%29%3B%0A%20%20way%5B%22addr%3Ahousenumber%22%5D%28area.searchArea%29%3B%0A%29%3B%0Aout%20center%3B | |
;OsmPlace = Hagen | |
OsmSep = ; | |
OsmEnc = utf-8 | |
OsmCRS = EPSG:4326 | |
[Columns] | |
AmtPLZ = Postleitzahl | |
AmtStr = Straßenname | |
AmtNr = Hausnummer | |
AmtZus = Zusatz | |
AmtX = Rechtswert | |
AmtY = Hochwert | |
[Data] | |
Replacements = (("Dr.-Lammert-Weg","Doktor-Lammert-Weg"), | |
("Joh.-Gottlieb-Fichte-Str.","Johann-Gottlieb-Fichte-Straße"), | |
("Johann-F.-Oberlin-Str.","Johann-Friedrich-Oberlin-Straße"), | |
("Prof.-Dr.-Schemann-Weg","Professor-Doktor-Schemann-Weg")) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
import pandas | |
import csv | |
import argparse | |
import configparser | |
from ast import literal_eval | |
from urllib.request import urlopen | |
from datetime import datetime | |
from pyproj import Transformer | |
from scipy.spatial import distance | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-c', '--config', action='store', help='Config file name', type=str, required=True) | |
args = parser.parse_args() | |
config = configparser.ConfigParser(interpolation=None) | |
config.read(args.config) | |
''' | |
for section in config.sections(): | |
print(section) | |
for c in config[section]: | |
print(c,config[section][c]) | |
''' | |
''' | |
Query: | |
[out:csv(::lat,::lon,::id,::type,"addr:postcode","addr:street","addr:housenumber","addr:place";true;";")][timeout:200]; | |
{{geocodeArea:"????????"}}->.searchArea; | |
( | |
node["addr:housenumber"](area.searchArea); | |
way["addr:housenumber"](area.searchArea); | |
); | |
out center; | |
''' | |
print("Hausnummern werden geladen ...") | |
r_amt = urlopen(config["Sources"]["AmtUrl"]) | |
r_osm = urlopen(config["Sources"]["OsmUrl"]) | |
amt = pandas.read_csv(r_amt, sep=config["Sources"]["AmtSep"], encoding=config["Sources"]["AmtEnc"], dtype=str) | |
osm = pandas.read_csv(r_osm, sep=config["Sources"]["OsmSep"], encoding=config["Sources"]["OsmEnc"], dtype=str) | |
print("Vergleich wird durchgeführt ...") | |
amtStr = config["Columns"]["AmtStr"] | |
amtNr = config["Columns"]["AmtNr"] | |
# todo: Manche Quellen haben Hausnummer und Zusatz möglicherweise in einem Feld | |
amtZus = config["Columns"]["AmtZus"] | |
replacements = literal_eval(config["Data"]["Replacements"]) | |
kombinierte = [] | |
for index, row in amt.iterrows(): | |
strasse = row[amtStr] | |
for x in replacements: | |
strasse = strasse.replace(x[0], x[1]) | |
adresse = strasse + ' ' + row[amtNr] | |
zusatz = row[amtZus] | |
if(pandas.notnull(zusatz)): | |
adresse += zusatz | |
kombinierte.append(adresse) | |
amt = amt.assign(kombiniert=kombinierte) | |
kombinierte = [] | |
for index, row in osm.iterrows(): | |
adresse = row["addr:street"] + ' ' + row["addr:housenumber"] | |
kombinierte.append(adresse) | |
osm = osm.assign(kombiniert=kombinierte) | |
merged = pandas.merge(amt, osm, on='kombiniert', how="outer", indicator=True) | |
amtX = config["Columns"]["AmtX"] | |
amtY = config["Columns"]["AmtY"] | |
amtPLZ = config["Columns"]["AmtPLZ"] | |
transformer = Transformer.from_crs(config["Sources"]["OsmCRS"], config["Sources"]["AmtCRS"]) | |
treffer = [] | |
zuwenig = [] | |
zuviel = [] | |
for index, row in merged.iterrows(): | |
if row['_merge'] == "both": | |
zusatz = row[amtZus] | |
if pandas.isnull(zusatz): | |
zusatz = "" | |
# todo: wie will man mit duplicates umgehen? | |
diff = round(distance.euclidean((float(row[amtX].replace(",", ".")), | |
float(row[amtY].replace(",", "."))), | |
transformer.transform(row["@lat"], row["@lon"])), 2) | |
treffer.append([row[amtX], row[amtY], row["@lat"], row["@lon"], diff, row["@type"][0]+row["@id"], row[amtPLZ], row[amtStr], row[amtNr], zusatz]) | |
elif row['_merge'] == "left_only": | |
zusatz = row[amtZus] | |
if pandas.isnull(zusatz): | |
zusatz = "" | |
zuwenig.append([row[amtX], row[amtY], row[amtPLZ], row[amtStr], row[amtNr], zusatz]) | |
elif row['_merge'] == "right_only": | |
zuviel.append([row["@lat"], row["@lon"], row["@type"][0]+row["@id"], row["addr:postcode"], row["addr:street"], row["addr:housenumber"]]) | |
day = datetime.now().strftime("%d%m%y") | |
trefferfn = "auswertung" + day + "-treffer.csv" | |
with open(trefferfn, 'w', newline='') as f: | |
writer = csv.writer(f, delimiter=';', quoting=csv.QUOTE_MINIMAL) | |
writer.writerow([amtX, amtY, "lat_osm", "lon_osm", "distance", "full_id", amtPLZ, amtStr, amtNr, amtZus]) | |
writer.writerows(treffer) | |
print("Datei " + trefferfn + " geschrieben") | |
zuwenigfn = "auswertung" + day + "-zuwenig.csv" | |
with open(zuwenigfn, 'w', newline='') as f: | |
writer = csv.writer(f, delimiter=';', quoting=csv.QUOTE_MINIMAL) | |
writer.writerow([amtX, amtY, amtPLZ, amtStr, amtNr, amtZus]) | |
writer.writerows(zuwenig) | |
print("Datei " + zuwenigfn + " geschrieben") | |
zuvielfn = "auswertung" + day + "-zuviel.csv" | |
with open(zuvielfn, 'w', newline='') as f: | |
writer = csv.writer(f, delimiter=';', quoting=csv.QUOTE_MINIMAL) | |
writer.writerow(["@lat", "@lon", "full_id", "addr:postcode", "addr:street", "addr:housenumber"]) | |
writer.writerows(zuviel) | |
print("Datei " + zuvielfn + " geschrieben") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Für meine csv-Verbesserungen und Ergänzungen hab ich https://github.com/grischard/csventrifuge gebaut. Dein Code scheint aber viel eleganter zu sein!