Created
May 4, 2025 19:00
-
-
Save amotl/348235b535a62cf9f37b4066f2bb853d to your computer and use it in GitHub Desktop.
Miniature geocoder using a dedicated instance of Nominatim.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# /// script | |
# requires-python = ">=3.9" | |
# dependencies = [ | |
# "click", | |
# "geopandas", | |
# "geopy", | |
# ] | |
# /// | |
""" | |
Basic geocoder on CSV file. | |
Prerequisites: | |
{apt,brew,pip} install uv | |
cat addresses.txt:: | |
id;addr | |
1000;Itämerenkatu 14, 00101 Helsinki, Finland | |
1001;Kampinkuja 1, 00100 Helsinki, Finland | |
1002;Kaivokatu 8, 00101 Helsinki, Finland | |
1003;Hermannin rantatie 1, 00580 Helsinki, Finland | |
Usage: | |
uv run minigeocode.py -i addresses.txt | |
References: | |
- https://docs.astral.sh/uv/ | |
- https://autogis-site.readthedocs.io/en/2019/notebooks/L3/geocoding_in_geopandas.html | |
""" | |
from pathlib import Path | |
import click | |
import pandas as pd | |
from geopy.geocoders import Nominatim | |
class Geocoder: | |
""" | |
Basic geocoder using a dedicated instance of Nominatim. | |
""" | |
def __init__( | |
self, | |
data: pd.DataFrame, | |
address_column: str = "addr", | |
coords_column: str = "coords", | |
): | |
self.data = data | |
self.address_column = address_column | |
self.coords_column = coords_column | |
@property | |
def geolocator(self): | |
""" | |
Initiate geocoder. | |
""" | |
return Nominatim(domain="nominatim.hiveeyes.org", user_agent="minigeocode") | |
def forward(self): | |
# Get point coordinates from the GeoPy location object on each record. | |
self.data[self.coords_column] = ( | |
self.data[self.address_column] | |
.apply(self.geolocator.geocode) | |
.apply(lambda loc: tuple(loc.point) if loc else None) | |
) | |
@click.command() | |
@click.option( | |
"--input", "-i", "input_file", type=Path, required=True, help="Input file" | |
) | |
@click.option( | |
"--address-column", | |
type=str, | |
required=True, | |
default="addr", | |
help="Name of the address column", | |
) | |
@click.option( | |
"--coords-column", | |
type=str, | |
required=True, | |
default="coords", | |
help="Name of the coords column", | |
) | |
def main(input_file: Path, address_column: str, coords_column: str): | |
data = pd.read_csv(input_file, sep=";") | |
geocoder = Geocoder( | |
data=data, | |
address_column=address_column, | |
coords_column=coords_column, | |
) | |
geocoder.forward() | |
print(geocoder.data.to_csv(sep=";", index=False)) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment