Created
February 13, 2020 09:27
-
-
Save grischard/a108be225a41df50ce04a081fe7c8eeb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
Return GeoJSON centroids with population for each locality in Luxembourg. | |
BD-Adresses doesn't include communes, so we use the OpenStreetMap community's | |
csventrifuge output, which has been enriched. The output of this script is | |
© OpenStreetMap Contributors, see https://openstreetmap.org/copyright. | |
Some localities are spread over communes (yeah), e.g. Dirbach which is in three. | |
- Downloads the latest addresses produced by csventrifuge (enriched by commune) | |
- Average the position of all places | |
- Conflate population from rnpp | |
- Spit out geojson | |
Run like : | |
python3 place-centroid.py > place-centroids.geojson | |
""" | |
import pandas as pd | |
import geojson | |
def data2geojson(df): | |
# Converts our dataframe to geojson. Column names hardcoded. | |
features = [] | |
insert_features = lambda X: features.append( | |
geojson.Feature( | |
geometry=geojson.Point( | |
(round(X["lon_wgs84"], 4), round(X["lat_wgs84"], 4)) | |
), | |
properties=dict( | |
localite=X["localite"], commune=X["commune"], population=X["population"] | |
), | |
) | |
) | |
df.apply(insert_features, axis=1) | |
return geojson.dumps( | |
geojson.FeatureCollection(features), sort_keys=True, ensure_ascii=False | |
) | |
# The endpoint that redirects to the most recent version of the | |
# addresses in geojson. | |
ADDRESSES_CSV = "https://openstreetmap.lu/luxembourg-addresses.csv" | |
df = pd.read_csv(ADDRESSES_CSV) | |
# Drop useless labels | |
df = df.drop( | |
labels=[ | |
"rue", | |
"numero", | |
"code_postal", | |
"id_caclr_rue", | |
"id_caclr_bat", | |
"coord_est_luref", | |
"coord_nord_luref", | |
"id_geoportail", | |
"code_commune", | |
], | |
axis=1, | |
) | |
# Remove commune in parentheses, information is redundant and makes conflation | |
# more difficult | |
df = df.replace(to_replace=r" \(.*\)", value="", regex=True) | |
# People live there, but there's no address in BD-Addresses. Hardcode a point. | |
missingaddresses = [ | |
["Helperknapp", "Kuelbecherhaff", 49.72131, 6.06763], | |
["Larochette", "Meysembourg", 49.76963, 6.18718], | |
["Waldbillig", "Grundhof", 49.81823, 6.32157], | |
["Wormeldange", "Dreiborn", 49.61974, 6.39383], | |
] | |
df = df.append( | |
pd.DataFrame( | |
missingaddresses, columns=["commune", "localite", "lat_wgs84", "lon_wgs84"] | |
) | |
) | |
# Group by the mean lat/lon to get centre | |
df = df.groupby(["commune", "localite"]).mean() | |
POPULATION_CSV = ( | |
"https://data.public.lu/fr/datasets/r/cd165f0a-feb7-401f-a440-d42695e63738" | |
) | |
# cp437 is the encoding of the original IBM PC. Why they use it is anyone's guess. | |
popdf = pd.read_csv(POPULATION_CSV, encoding="cp437") | |
# Drop useless labels | |
popdf = popdf.drop(labels="COMMUNE_CODE", axis=1) | |
# Remove commune in parentheses, information is redundant and makes conflation | |
# more difficult | |
popdf = popdf.replace(to_replace=r" \(.*\)", value="", regex=True) | |
# Rename stuff to match csventrifuge output | |
popdf = popdf.rename( | |
columns={"COMMUNE": "commune", "LOCALITE": "localite", "POPULATION": "population"} | |
) | |
popdf["commune"].replace({"Redange/Attert": "Redange-sur-Attert"}, inplace=True) | |
popdf["localite"].replace( | |
{ | |
"Redange/Attert": "Redange", | |
"Roodt/Eisch": "Roodt-sur-Eisch", | |
"Goebelsmuehle": "Goebelsmühle", | |
}, | |
inplace=True, | |
) | |
# Join both together | |
richdf = pd.merge(df, popdf, on=["commune", "localite"], how="outer") | |
# Places with no population get a 0 | |
richdf["population"].fillna(0, inplace=True) | |
print(data2geojson(richdf)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment