Created
August 28, 2023 19:47
-
-
Save emlys/a6663b8ce6176064a2a13760083e0fe6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "453b8b46-180d-4015-a72c-131ba7a18a87", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from collections import deque\n", | |
"\n", | |
"import geopandas as gpd\n", | |
"import numpy\n", | |
"import pandas as pd\n", | |
"import scipy\n", | |
"import shapely" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "979b009a-d5d3-4555-89d3-28669998529b", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Read in points\n", | |
"df = pd.read_csv('preprocessed_assets.csv')\n", | |
"df = df[df['facility_category'].apply( # filter to linear asset types\n", | |
" lambda val: val in {'Natural Gas Pipeline', 'Transmission Line'})]\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "57958f9a-3d66-4c51-bd20-8cd749f58c69", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def multilinestring_traversal(graph):\n", | |
" \"\"\"\n", | |
" Traverse a connected, undirected tree graph to produce a multilinestring representing the tree shape.\n", | |
" \n", | |
" Similar to pre-order depth-first traversal, but revisits branch points.\n", | |
" \n", | |
" Example:\n", | |
" \n", | |
" [[0 1 0 0 0 0 0] 0\n", | |
" [1 0 1 0 1 0 0] |\n", | |
" [0 1 0 1 0 0 0] 1\n", | |
" [0 0 1 0 0 0 0] = / \\ -> [[0, 1, 2, 3], [1, 4, 5], [4, 6]]\n", | |
" [0 1 0 0 0 1 1] 2 4\n", | |
" [0 0 0 0 1 0 0] / / \\\n", | |
" [0 0 0 0 1 0 0]] 3 5 6\n", | |
" \n", | |
"\n", | |
" Args:\n", | |
" graph (numpy.array): NxN adjacency matrix representing the graph\n", | |
" \n", | |
" Returns:\n", | |
" list[list[int]]. List (representing a multilinestring) of lists (representing linestrings) of \n", | |
" integer indices (0..(N-1))\n", | |
" \"\"\"\n", | |
" # make a copy because we'll modify it\n", | |
" graph = numpy.copy(graph)\n", | |
" \n", | |
" # start on an arbitrary leaf node \n", | |
" for i, row in enumerate(graph):\n", | |
" if row.sum() == 1:\n", | |
" current_node = i\n", | |
" break\n", | |
" \n", | |
" branch_points = deque([current_node])\n", | |
" linestrings = []\n", | |
"\n", | |
" # while there are branches we haven't traversed yet\n", | |
" while branch_points:\n", | |
" \n", | |
" linestring = [] # start a new linestring representing this branch\n", | |
" current_node = branch_points.pop()\n", | |
" next_available_nodes = set([current_node])\n", | |
" \n", | |
" # if at least one neighbor is available, continue along this branch\n", | |
" while next_available_nodes:\n", | |
"\n", | |
" # if more than one neighbor is available, store this node to revisit later\n", | |
" if len(next_available_nodes) > 1:\n", | |
" branch_points.append(current_node)\n", | |
" \n", | |
" current_node = next_available_nodes.pop()\n", | |
" \n", | |
" linestring.append(current_node)\n", | |
" \n", | |
" # mark this node as visited\n", | |
" # after this, it won't be available as a neighbor of any other node\n", | |
" graph[:, current_node] = False\n", | |
" \n", | |
" # get the set of nodes that are adjacent to current_node and not yet visited\n", | |
" next_available_nodes = set(list(numpy.argwhere(graph[current_node]).flatten()))\n", | |
" \n", | |
" linestrings.append(linestring)\n", | |
"\n", | |
" return linestrings\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "c7cbe211-d5c0-470a-91f0-d6befa4eee57", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Group points together by facility category\n", | |
"for category, category_group in df.groupby('facility_category'):\n", | |
" print(category)\n", | |
"\n", | |
" names, geoms = [], []\n", | |
"\n", | |
" # Group points together by their asset_name attribute\n", | |
" for name, group in category_group.groupby('asset_name'):\n", | |
" print(name, group.shape[0])\n", | |
" if group.shape[0] == 1:\n", | |
" print(f'Warning: group {name} has only one point, skipping')\n", | |
" continue\n", | |
"\n", | |
" # convert to a projected coordinate system for accurate distance calculations\n", | |
" gs = gpd.GeoSeries(\n", | |
" gpd.points_from_xy(group.longitude, group.latitude, crs='EPSG:4326')\n", | |
" ).to_crs('ESRI:54012')\n", | |
"\n", | |
" # calculate distance matrix: distance from each point to each other point\n", | |
" distance_matrix = gs.apply(lambda point: gs.distance(point))\n", | |
"\n", | |
" # calculate minimum spanning tree\n", | |
" mst = scipy.sparse.csgraph.minimum_spanning_tree(distance_matrix).toarray()\n", | |
"\n", | |
" # convert to an undirected, binary adjacency matrix\n", | |
" mst = (mst + mst.T).astype(bool)\n", | |
"\n", | |
" multilinestring = multilinestring_traversal(mst)\n", | |
"\n", | |
" # convert point indices to point coordinates\n", | |
" multilinestring = shapely.MultiLineString([\n", | |
" shapely.LineString([\n", | |
" shapely.Point(gs[idx].x, gs[idx].y) for idx in linestring\n", | |
" ]) for linestring in multilinestring])\n", | |
"\n", | |
" names.append(name)\n", | |
" geoms.append(multilinestring)\n", | |
"\n", | |
" new_gdf = gpd.GeoDataFrame(\n", | |
" pd.DataFrame({'name': names}), \n", | |
" geometry=gpd.GeoSeries(geoms), \n", | |
" crs='ESRI:54012')\n", | |
" new_gdf.to_file('linear_asset_multilinestrings.gpkg', layer=category)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment