Skip to content

Instantly share code, notes, and snippets.

@emlys
Created August 28, 2023 19:47
Show Gist options
  • Save emlys/a6663b8ce6176064a2a13760083e0fe6 to your computer and use it in GitHub Desktop.
Save emlys/a6663b8ce6176064a2a13760083e0fe6 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "453b8b46-180d-4015-a72c-131ba7a18a87",
"metadata": {},
"outputs": [],
"source": [
"from collections import deque\n",
"\n",
"import geopandas as gpd\n",
"import numpy\n",
"import pandas as pd\n",
"import scipy\n",
"import shapely"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "979b009a-d5d3-4555-89d3-28669998529b",
"metadata": {},
"outputs": [],
"source": [
"# Read in points\n",
"df = pd.read_csv('preprocessed_assets.csv')\n",
"df = df[df['facility_category'].apply( # filter to linear asset types\n",
" lambda val: val in {'Natural Gas Pipeline', 'Transmission Line'})]\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "57958f9a-3d66-4c51-bd20-8cd749f58c69",
"metadata": {},
"outputs": [],
"source": [
"def multilinestring_traversal(graph):\n",
" \"\"\"\n",
" Traverse a connected, undirected tree graph to produce a multilinestring representing the tree shape.\n",
" \n",
" Similar to pre-order depth-first traversal, but revisits branch points.\n",
" \n",
" Example:\n",
" \n",
" [[0 1 0 0 0 0 0] 0\n",
" [1 0 1 0 1 0 0] |\n",
" [0 1 0 1 0 0 0] 1\n",
" [0 0 1 0 0 0 0] = / \\ -> [[0, 1, 2, 3], [1, 4, 5], [4, 6]]\n",
" [0 1 0 0 0 1 1] 2 4\n",
" [0 0 0 0 1 0 0] / / \\\n",
" [0 0 0 0 1 0 0]] 3 5 6\n",
" \n",
"\n",
" Args:\n",
" graph (numpy.array): NxN adjacency matrix representing the graph\n",
" \n",
" Returns:\n",
" list[list[int]]. List (representing a multilinestring) of lists (representing linestrings) of \n",
" integer indices (0..(N-1))\n",
" \"\"\"\n",
" # make a copy because we'll modify it\n",
" graph = numpy.copy(graph)\n",
" \n",
" # start on an arbitrary leaf node \n",
" for i, row in enumerate(graph):\n",
" if row.sum() == 1:\n",
" current_node = i\n",
" break\n",
" \n",
" branch_points = deque([current_node])\n",
" linestrings = []\n",
"\n",
" # while there are branches we haven't traversed yet\n",
" while branch_points:\n",
" \n",
" linestring = [] # start a new linestring representing this branch\n",
" current_node = branch_points.pop()\n",
" next_available_nodes = set([current_node])\n",
" \n",
" # if at least one neighbor is available, continue along this branch\n",
" while next_available_nodes:\n",
"\n",
" # if more than one neighbor is available, store this node to revisit later\n",
" if len(next_available_nodes) > 1:\n",
" branch_points.append(current_node)\n",
" \n",
" current_node = next_available_nodes.pop()\n",
" \n",
" linestring.append(current_node)\n",
" \n",
" # mark this node as visited\n",
" # after this, it won't be available as a neighbor of any other node\n",
" graph[:, current_node] = False\n",
" \n",
" # get the set of nodes that are adjacent to current_node and not yet visited\n",
" next_available_nodes = set(list(numpy.argwhere(graph[current_node]).flatten()))\n",
" \n",
" linestrings.append(linestring)\n",
"\n",
" return linestrings\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c7cbe211-d5c0-470a-91f0-d6befa4eee57",
"metadata": {},
"outputs": [],
"source": [
"# Group points together by facility category\n",
"for category, category_group in df.groupby('facility_category'):\n",
" print(category)\n",
"\n",
" names, geoms = [], []\n",
"\n",
" # Group points together by their asset_name attribute\n",
" for name, group in category_group.groupby('asset_name'):\n",
" print(name, group.shape[0])\n",
" if group.shape[0] == 1:\n",
" print(f'Warning: group {name} has only one point, skipping')\n",
" continue\n",
"\n",
" # convert to a projected coordinate system for accurate distance calculations\n",
" gs = gpd.GeoSeries(\n",
" gpd.points_from_xy(group.longitude, group.latitude, crs='EPSG:4326')\n",
" ).to_crs('ESRI:54012')\n",
"\n",
" # calculate distance matrix: distance from each point to each other point\n",
" distance_matrix = gs.apply(lambda point: gs.distance(point))\n",
"\n",
" # calculate minimum spanning tree\n",
" mst = scipy.sparse.csgraph.minimum_spanning_tree(distance_matrix).toarray()\n",
"\n",
" # convert to an undirected, binary adjacency matrix\n",
" mst = (mst + mst.T).astype(bool)\n",
"\n",
" multilinestring = multilinestring_traversal(mst)\n",
"\n",
" # convert point indices to point coordinates\n",
" multilinestring = shapely.MultiLineString([\n",
" shapely.LineString([\n",
" shapely.Point(gs[idx].x, gs[idx].y) for idx in linestring\n",
" ]) for linestring in multilinestring])\n",
"\n",
" names.append(name)\n",
" geoms.append(multilinestring)\n",
"\n",
" new_gdf = gpd.GeoDataFrame(\n",
" pd.DataFrame({'name': names}), \n",
" geometry=gpd.GeoSeries(geoms), \n",
" crs='ESRI:54012')\n",
" new_gdf.to_file('linear_asset_multilinestrings.gpkg', layer=category)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment