Created
May 3, 2025 14:02
-
-
Save koorukuroo/430141241afb63f5cb7fcbcc1801e8ce to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import urllib.request | |
import zipfile | |
import pandas as pd | |
import networkx as nx | |
import matplotlib.pyplot as plt | |
# 1. Set URL and file paths | |
url = "https://aaronclauset.github.io/hierarchy/terrorists.zip" | |
zip_path = "terrorists.zip" | |
extract_dir = "terrorist_data" | |
# 2. Download dataset if not already present | |
if not os.path.exists(zip_path): | |
print("📥 Downloading dataset...") | |
urllib.request.urlretrieve(url, zip_path) | |
else: | |
print("✅ Dataset already downloaded.") | |
# 3. Extract ZIP archive if not already extracted | |
if not os.path.exists(extract_dir): | |
print("📦 Extracting zip file...") | |
with zipfile.ZipFile(zip_path, 'r') as zip_ref: | |
zip_ref.extractall(extract_dir) | |
else: | |
print("✅ Files already extracted.") | |
# 4. Load edge list from 'terrorist.pairs' | |
edge_list_path = os.path.join(extract_dir, "terrorists", "terrorist.pairs") | |
data = pd.read_csv(edge_list_path, sep=r"\s+", header=None, names=["Source", "Target"], engine="python") | |
# 5. Load name mapping from 'terrorist.names' | |
name_file_path = os.path.join(extract_dir, "terrorists", "terrorist.names") | |
name_map = {} | |
with open(name_file_path, 'r') as f: | |
for line in f: | |
parts = line.strip().split(" ", 1) | |
if len(parts) == 2: | |
idx, name = parts | |
name_map[int(idx)] = name.strip() | |
# 6. Build the graph and rename nodes using actual names | |
G = nx.from_pandas_edgelist(data, source="Source", target="Target") | |
G_named = nx.relabel_nodes(G, name_map) | |
# 7. Calculate betweenness centrality | |
bc = nx.betweenness_centrality(G_named, normalized=True) | |
# 8. Prepare for visualization | |
fig, ax = plt.subplots(figsize=(14, 12)) | |
pos = nx.spring_layout(G_named, seed=42) | |
# Node sizes scaled by betweenness centrality | |
node_size = [500 + 8000 * bc.get(node, 0) for node in G_named.nodes()] | |
# Node colors mapped to centrality values using a color map | |
bc_values = list(bc.values()) | |
norm = plt.Normalize(vmin=min(bc_values), vmax=max(bc_values)) | |
cmap = plt.cm.Reds | |
node_colors = [cmap(norm(bc[node])) for node in G_named.nodes()] | |
# 9. Draw the network | |
nx.draw_networkx(G_named, | |
pos=pos, | |
with_labels=True, | |
node_size=node_size, | |
node_color=node_colors, | |
edge_color='gray', | |
font_size=9, | |
ax=ax) | |
# 10. Add a colorbar to represent centrality | |
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm) | |
sm.set_array([]) | |
cbar = fig.colorbar(sm, ax=ax, orientation='vertical', shrink=0.7) | |
cbar.set_label('Betweenness Centrality') | |
# 11. Final display settings | |
ax.set_title("Terrorist Network with Betweenness Centrality (Node Size = Importance)") | |
ax.axis("off") | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment