Last active
September 17, 2020 16:45
-
-
Save melanieimfeld/b69869f838158f8dffa98a333c1e06a7 to your computer and use it in GitHub Desktop.
Floodplain analysis NYC Geopandas
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import geopandas as gpd | |
import pandas as pd | |
from shapely.geometry import Polygon, shape | |
from sodapy import Socrata | |
import os | |
#Run this file to get the data from open data NYC | |
#Below are the details, such as identifiers, for the datasets we want to download. Change limit to get a smaller subset | |
querylist = [ | |
{'identifier':'myk6-g6eq','limit': 10000, 'queries' : [["The_geom"], [""]], 'tolerance': 0.0001, 'geom' : 'The_geom'}, #floodplains | |
{'identifier':'jp9i-3b7y','limit': 100, 'queries' : [["The_geom,boro_cd"],[" "]], 'tolerance': 0, 'geom' : 'The_geom'}, #community districts | |
{'identifier':'xi7c-iiu2','limit': 100, 'queries' : [["*"],[" "]], 'tolerance': None, 'geom' : None}, #population | |
{'identifier':'qjdb-bxqz','limit': 1000000, 'queries' : [["CNSTRCT_YR,the_geom"],["FEAT_CODE==2100"]], 'tolerance': 0.0001, 'geom' : 'the_geom'}, #buildings | |
] | |
outdir = "data" | |
# Create data folder if it does no exist | |
if not os.path.exists(outdir): | |
os.makedirs(outdir) | |
def getBasedata(identifier, limit, queries, tolerance, geom): | |
domain = 'data.cityofnewyork.us' | |
client = Socrata(domain, None) | |
results = client.get(identifier,limit=limit, select=queries[0], where=queries[1]) | |
results = pd.DataFrame.from_dict(results) | |
if geom != None: #spatial files | |
gdf = createGdf(results,tolerance, geom) | |
gdf.to_file("{}/{}.geojson".format(outdir, identifier),driver='GeoJSON') | |
else: #non spatial | |
results.to_csv("{}/{}.csv".format(outdir, identifier)) | |
print("download finished") | |
def createGdf(json,tolerance,name): | |
geometry = [shape(x).simplify(tolerance, preserve_topology=True) for x in json[name]] | |
json = json.drop(name, axis=1) | |
gdf = gpd.GeoDataFrame(json, crs='EPSG:4326', geometry=geometry) | |
gdf = gdf.to_crs(epsg=2263) #reproject to NAD83 | |
return gdf | |
for i in querylist: | |
getBasedata(i["identifier"],i["limit"], i["queries"], i["tolerance"], i["geom"]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import geopandas as gpd | |
import pandas as pd | |
from shapely.geometry import Polygon, shape | |
#Functions for the basic analysis. Only works if data was downloaded with getdata.py | |
floodplains = gpd.read_file("data/myk6-g6eq.geojson") | |
cd = gpd.read_file("data/jp9i-3b7y.geojson") | |
buildings = gpd.read_file("data/qjdb-bxqz.geojson") | |
pop = pd.read_csv("data/xi7c-iiu2.csv", index_col = 0) | |
def checkFloodplain(cd,fp): | |
cd["boro_cd"] = cd["boro_cd"].astype(int) | |
intersection = gpd.overlay(cd, fp, how='intersection') | |
cd_subset = intersection.dissolve(by = 'boro_cd') | |
merged = pd.merge(cd["boro_cd"], cd_subset, how = "left", on = "boro_cd") | |
merged["contains_fldplain"] = merged.geometry.apply(lambda x: "No" if x==None else "Yes") | |
merged[["boro_cd", "contains_fldplain"]].to_csv('data/output.csv') | |
print("Files saved") | |
checkFloodplain(cd,floodplains) | |
#Functions for the extended analysis. It takes a bit of time, uncomment to run | |
def checkFloodplain2(cd,fp): | |
cd["boro_cd"] = cd["boro_cd"].astype(int) | |
intersection = gpd.overlay(cd, fp, how='intersection') | |
cd_subset = intersection.dissolve(by = 'boro_cd', as_index = False) | |
#get percentage data | |
cd_subset["fld_area_sqkm"] = cd_subset.geometry.area/10764000 | |
cd["tot_area_sqkm"] = cd.geometry.area/10764000 | |
merged = pd.merge(cd.loc[:,cd.columns != "geometry"], cd_subset, how = "left", on = "boro_cd") | |
merged["contains_fldplain"] = merged.geometry.apply(lambda x: "No" if x==None else "Yes") | |
merged["perc_fldplain"] = (merged["fld_area_sqkm"] / merged["tot_area_sqkm"] * 100).fillna(0).round(1) | |
print("percentage data added, wait...") | |
#get building data | |
merged = merged.merge(selectBuildings(cd_subset,buildings), on = "boro_cd", how = "left") | |
merged["building_density"] = merged.index_right / merged.fld_area_sqkm | |
print("building data added, wait...") | |
#get population data | |
merged = merged.merge(preparePopdata(pop), how="left", on="boro_cd") | |
print("pop growth rate data added") | |
export(merged) | |
def preparePopdata(pop): | |
pop["boro_cd"] = pop.apply(lambda x: boroCD(x. borough, x.cd_number), axis = 1) | |
pop["pop_growth_rate"] = (pop._2010_population - pop._1970_population) / pop._1970_population * 100 | |
pop = pop[["boro_cd", "pop_growth_rate"]] | |
return pop | |
def boroCD(b, no): | |
if b == "Bronx": | |
return 200 + no | |
elif b == "Brooklyn": | |
return 300 + no | |
elif b == "Queens": | |
return 400 + no | |
elif b == "Manhattan": | |
return 100 + no | |
elif b == "Staten Island": | |
return 500 + no | |
else: | |
return 0 | |
def selectBuildings(subset, buildings): | |
buildings.geometry = buildings.geometry.centroid | |
buildings.CNSTRCT_YR = pd.to_numeric(buildings.CNSTRCT_YR).fillna(0).astype(int) | |
b_intersection = gpd.sjoin(buildings, subset, op="within").dissolve(by = "boro_cd", aggfunc = {"CNSTRCT_YR":"median", "index_right": "count"}) | |
return b_intersection[["CNSTRCT_YR", "index_right"]] | |
def export(merged): | |
merged.to_file("data/output.gpkg", layer='floodplains', driver="GPKG") | |
print("Files saved") | |
#checkFloodplain2(cd,floodplains) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment