melanieimfeld · September 17, 2020 16:45
diff --git a/getdata.py b/getdata.py
 import geopandas as gpd
 import pandas as pd
 from shapely.geometry import Polygon, shape
 from sodapy import Socrata
 import os

 #Run this file to get the data from open data NYC
 #Below are the details, such as identifiers, for the datasets we want to download. Change limit to get a smaller subset


 querylist = [
 {'identifier':'myk6-g6eq','limit': 10000, 'queries' : [["The_geom"], [""]], 'tolerance': 0.0001, 'geom' : 'The_geom'}, #floodplains
 {'identifier':'jp9i-3b7y','limit': 100, 'queries' : [["The_geom,boro_cd"],[" "]], 'tolerance': 0, 'geom' : 'The_geom'}, #community districts
 {'identifier':'xi7c-iiu2','limit': 100, 'queries' : [["*"],[" "]], 'tolerance': None, 'geom' : None}, #population
 {'identifier':'qjdb-bxqz','limit': 1000000, 'queries' : [["CNSTRCT_YR,the_geom"],["FEAT_CODE==2100"]], 'tolerance': 0.0001, 'geom' : 'the_geom'}, #buildings
 ]

 outdir = "data"
 # Create data folder if it does no exist
 if not os.path.exists(outdir):
    os.makedirs(outdir)


 def getBasedata(identifier, limit, queries, tolerance, geom):
    domain = 'data.cityofnewyork.us'
    client = Socrata(domain, None)
    results = client.get(identifier,limit=limit, select=queries[0], where=queries[1])
    results = pd.DataFrame.from_dict(results)

    if geom != None: #spatial files
    	gdf = createGdf(results,tolerance, geom)
    	gdf.to_file("{}/{}.geojson".format(outdir, identifier),driver='GeoJSON')
    else: #non spatial
    	results.to_csv("{}/{}.csv".format(outdir, identifier))
    print("download finished")


 def createGdf(json,tolerance,name): 
    geometry = [shape(x).simplify(tolerance, preserve_topology=True) for x in json[name]]
    json = json.drop(name, axis=1)
    gdf = gpd.GeoDataFrame(json, crs='EPSG:4326', geometry=geometry) 
    gdf = gdf.to_crs(epsg=2263) #reproject to NAD83
    return gdf


 for i in querylist:
 	getBasedata(i["identifier"],i["limit"], i["queries"], i["tolerance"], i["geom"])
diff --git a/main.py b/main.py
 import geopandas as gpd
 import pandas as pd
 from shapely.geometry import Polygon, shape

 #Functions for the basic analysis. Only works if data was downloaded with getdata.py

 floodplains = gpd.read_file("data/myk6-g6eq.geojson")
 cd = gpd.read_file("data/jp9i-3b7y.geojson")
 buildings = gpd.read_file("data/qjdb-bxqz.geojson")
 pop = pd.read_csv("data/xi7c-iiu2.csv", index_col = 0)

 def checkFloodplain(cd,fp):
    cd["boro_cd"] = cd["boro_cd"].astype(int)
    intersection = gpd.overlay(cd, fp, how='intersection')
    cd_subset = intersection.dissolve(by = 'boro_cd')
    merged = pd.merge(cd["boro_cd"], cd_subset, how = "left", on = "boro_cd")
    merged["contains_fldplain"] = merged.geometry.apply(lambda x: "No" if x==None else "Yes")
    merged[["boro_cd", "contains_fldplain"]].to_csv('data/output.csv')
    print("Files saved")

 checkFloodplain(cd,floodplains)

 #Functions for the extended analysis. It takes a bit of time, uncomment to run

 def checkFloodplain2(cd,fp):
    cd["boro_cd"] = cd["boro_cd"].astype(int)
    intersection = gpd.overlay(cd, fp, how='intersection')
    cd_subset = intersection.dissolve(by = 'boro_cd', as_index = False)

    #get percentage data
    cd_subset["fld_area_sqkm"] = cd_subset.geometry.area/10764000
    cd["tot_area_sqkm"] = cd.geometry.area/10764000
    merged = pd.merge(cd.loc[:,cd.columns != "geometry"], cd_subset, how = "left", on = "boro_cd")
    merged["contains_fldplain"] = merged.geometry.apply(lambda x: "No" if x==None else "Yes")
    merged["perc_fldplain"] = (merged["fld_area_sqkm"] / merged["tot_area_sqkm"] * 100).fillna(0).round(1)
    print("percentage data added, wait...")

    #get building data
    merged = merged.merge(selectBuildings(cd_subset,buildings), on = "boro_cd", how = "left")
    merged["building_density"] = merged.index_right / merged.fld_area_sqkm
    print("building data added, wait...")

    #get population data
    merged = merged.merge(preparePopdata(pop), how="left", on="boro_cd")
    print("pop growth rate data added")

    export(merged)


 def preparePopdata(pop):
 	pop["boro_cd"] = pop.apply(lambda x: boroCD(x. borough, x.cd_number), axis = 1)
 	pop["pop_growth_rate"] = (pop._2010_population -  pop._1970_population) / pop._1970_population * 100
 	pop = pop[["boro_cd", "pop_growth_rate"]]
 	return pop


 def boroCD(b, no):
 	if b == "Bronx":
 	    return 200 + no
 	elif b == "Brooklyn":
 	    return 300 + no
 	elif b == "Queens":
 	    return 400 + no
 	elif b == "Manhattan":
 	    return 100 + no
 	elif b == "Staten Island":
 	    return 500 + no
 	else:
 	    return 0

 def selectBuildings(subset, buildings):
 	buildings.geometry = buildings.geometry.centroid
 	buildings.CNSTRCT_YR = pd.to_numeric(buildings.CNSTRCT_YR).fillna(0).astype(int)
 	b_intersection = gpd.sjoin(buildings, subset, op="within").dissolve(by = "boro_cd", aggfunc = {"CNSTRCT_YR":"median", "index_right": "count"})
 	return b_intersection[["CNSTRCT_YR", "index_right"]]

 def export(merged):
 	merged.to_file("data/output.gpkg", layer='floodplains', driver="GPKG")
 	print("Files saved")

 #checkFloodplain2(cd,floodplains)
	import geopandas as gpd
	import pandas as pd
	from shapely.geometry import Polygon, shape
	from sodapy import Socrata
	import os

	#Run this file to get the data from open data NYC
	#Below are the details, such as identifiers, for the datasets we want to download. Change limit to get a smaller subset


	querylist = [
	{'identifier':'myk6-g6eq','limit': 10000, 'queries' : [["The_geom"], [""]], 'tolerance': 0.0001, 'geom' : 'The_geom'}, #floodplains
	{'identifier':'jp9i-3b7y','limit': 100, 'queries' : [["The_geom,boro_cd"],[" "]], 'tolerance': 0, 'geom' : 'The_geom'}, #community districts
	{'identifier':'xi7c-iiu2','limit': 100, 'queries' : [["*"],[" "]], 'tolerance': None, 'geom' : None}, #population
	{'identifier':'qjdb-bxqz','limit': 1000000, 'queries' : [["CNSTRCT_YR,the_geom"],["FEAT_CODE==2100"]], 'tolerance': 0.0001, 'geom' : 'the_geom'}, #buildings
	]

	outdir = "data"
	# Create data folder if it does no exist
	if not os.path.exists(outdir):
	os.makedirs(outdir)


	def getBasedata(identifier, limit, queries, tolerance, geom):
	domain = 'data.cityofnewyork.us'
	client = Socrata(domain, None)
	results = client.get(identifier,limit=limit, select=queries[0], where=queries[1])
	results = pd.DataFrame.from_dict(results)

	if geom != None: #spatial files
	gdf = createGdf(results,tolerance, geom)
	gdf.to_file("{}/{}.geojson".format(outdir, identifier),driver='GeoJSON')
	else: #non spatial
	results.to_csv("{}/{}.csv".format(outdir, identifier))
	print("download finished")


	def createGdf(json,tolerance,name):
	geometry = [shape(x).simplify(tolerance, preserve_topology=True) for x in json[name]]
	json = json.drop(name, axis=1)
	gdf = gpd.GeoDataFrame(json, crs='EPSG:4326', geometry=geometry)
	gdf = gdf.to_crs(epsg=2263) #reproject to NAD83
	return gdf


	for i in querylist:
	getBasedata(i["identifier"],i["limit"], i["queries"], i["tolerance"], i["geom"])
No results found