Skip to content

Instantly share code, notes, and snippets.

@koorukuroo
Created May 3, 2025 14:02
Show Gist options
  • Save koorukuroo/430141241afb63f5cb7fcbcc1801e8ce to your computer and use it in GitHub Desktop.
Save koorukuroo/430141241afb63f5cb7fcbcc1801e8ce to your computer and use it in GitHub Desktop.
import os
import urllib.request
import zipfile
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
# 1. Set URL and file paths
url = "https://aaronclauset.github.io/hierarchy/terrorists.zip"
zip_path = "terrorists.zip"
extract_dir = "terrorist_data"
# 2. Download dataset if not already present
if not os.path.exists(zip_path):
print("📥 Downloading dataset...")
urllib.request.urlretrieve(url, zip_path)
else:
print("✅ Dataset already downloaded.")
# 3. Extract ZIP archive if not already extracted
if not os.path.exists(extract_dir):
print("📦 Extracting zip file...")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(extract_dir)
else:
print("✅ Files already extracted.")
# 4. Load edge list from 'terrorist.pairs'
edge_list_path = os.path.join(extract_dir, "terrorists", "terrorist.pairs")
data = pd.read_csv(edge_list_path, sep=r"\s+", header=None, names=["Source", "Target"], engine="python")
# 5. Load name mapping from 'terrorist.names'
name_file_path = os.path.join(extract_dir, "terrorists", "terrorist.names")
name_map = {}
with open(name_file_path, 'r') as f:
for line in f:
parts = line.strip().split(" ", 1)
if len(parts) == 2:
idx, name = parts
name_map[int(idx)] = name.strip()
# 6. Build the graph and rename nodes using actual names
G = nx.from_pandas_edgelist(data, source="Source", target="Target")
G_named = nx.relabel_nodes(G, name_map)
# 7. Calculate betweenness centrality
bc = nx.betweenness_centrality(G_named, normalized=True)
# 8. Prepare for visualization
fig, ax = plt.subplots(figsize=(14, 12))
pos = nx.spring_layout(G_named, seed=42)
# Node sizes scaled by betweenness centrality
node_size = [500 + 8000 * bc.get(node, 0) for node in G_named.nodes()]
# Node colors mapped to centrality values using a color map
bc_values = list(bc.values())
norm = plt.Normalize(vmin=min(bc_values), vmax=max(bc_values))
cmap = plt.cm.Reds
node_colors = [cmap(norm(bc[node])) for node in G_named.nodes()]
# 9. Draw the network
nx.draw_networkx(G_named,
pos=pos,
with_labels=True,
node_size=node_size,
node_color=node_colors,
edge_color='gray',
font_size=9,
ax=ax)
# 10. Add a colorbar to represent centrality
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = fig.colorbar(sm, ax=ax, orientation='vertical', shrink=0.7)
cbar.set_label('Betweenness Centrality')
# 11. Final display settings
ax.set_title("Terrorist Network with Betweenness Centrality (Node Size = Importance)")
ax.axis("off")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment