Skip to content

Instantly share code, notes, and snippets.

@akhenakh
Last active September 10, 2024 13:04
Show Gist options
  • Save akhenakh/ac25fea055bd1731f63bb1ba4d53b6be to your computer and use it in GitHub Desktop.
Save akhenakh/ac25fea055bd1731f63bb1ba4d53b6be to your computer and use it in GitHub Desktop.
Querying into Overturemap data using duckdb
-- rename files to parquet: find . -type f -exec mv {} {}.parquet \;
-- easier for tools detection like tad
LOAD spatial;
-- Update with Geoparquet
FROM read_parquet('/opt/public_files/overturemaps/theme=places/type=place/*', filename=true, hive_partitioning=1)
where bbox.xmin > -71.2 and bbox.xmax < -71.1 and bbox.ymin > 46.7 and bbox.ymax < 46.9
LIMIT 1;
FROM read_parquet('/opt/public_files/overturemaps/theme=places/type=place/*', filename=true, hive_partitioning=1)
where bbox.xmin > -71.2 and bbox.xmax < -71.1 and bbox.ymin > 46.7 and bbox.ymax < 46.9 AND categories.primary = 'hospital'
LIMIT 1000;
SELECT count(*)
from read_parquet('./theme=places/type=*/*.parquet', filename=true, hive_partitioning=1);
SELECT ST_AsText(ST_GeomFromWkb(geometry)) AS geometry, JSON(names) AS names, JSON(categories)->'$.main' AS category
from read_parquet('./theme=places/type=*/*.parquet', filename=true, hive_partitioning=1)
where bbox.minX > -71.2 and bbox.maxX < -71.1 and bbox.minY > 46.7 and bbox.maxY < 46.9 and category = '"hospital"' LIMIT 200;
SELECT ST_AsText(ST_GeomFromWkb(geometry)) AS geometry,
from read_parquet('./theme=buildings/type=building/*.parquet', filename=true, hive_partitioning=1)
where bbox.minX > -71.2 and bbox.maxX < -71.1 and bbox.minY > 46.7 and bbox.maxY < 46.9 LIMIT 20000;
COPY (
SELECT
type,
subType,
localityType,
adminLevel,
isoCountryCodeAlpha2,
JSON(names) AS names,
JSON(sources) AS sources,
ST_GeomFromWkb(geometry) AS geometry
FROM read_parquet('./theme=admins/type=*/*', filename=true, hive_partitioning=1)
WHERE adminLevel = 2
AND ST_GeometryType(ST_GeomFromWkb(geometry)) IN ('POLYGON','MULTIPOLYGON')
) TO 'countries.geojson'
WITH (FORMAT GDAL, DRIVER 'GeoJSON');
COPY (
SELECT
type,
version,
CAST(updatetime as varchar) as updateTime,
height,
numfloors as numFloors,
level,
class,
JSON(names) as names,
JSON(sources) as sources,
ST_GeomFromWKB(geometry) as geometry
FROM
read_parquet('./theme=admins/type=*/*', filename=true, hive_partitioning=1)
where bbox.minX > -71.2 and bbox.maxX < -71.1 and bbox.minY > 46.7 and bbox.maxY < 46.9
LIMIT
100
) TO 'buildings.geojsonseq'
WITH (FORMAT GDAL, DRIVER 'GeoJSONSeq');
LOAD spatial;
COPY (
SELECT
type,
height,
numfloors as numFloors,
level,
class,
JSON(names) as names,
JSON(sources) as sources,
ST_GeomFromWKB(geometry) as wkb_geometry
FROM
read_parquet('./theme=buildings/type=*/*', hive_partitioning=1)
)
TO 'buildings_quebec.gpkg'
WITH (FORMAT GDAL, DRIVER 'GPKG');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment