Skip to content

Instantly share code, notes, and snippets.

@earthboundkid
Last active January 2, 2026 22:32
Show Gist options
  • Select an option

  • Save earthboundkid/7f2c2a54fb50cb4e8ed16d65847a59fb to your computer and use it in GitHub Desktop.

Select an option

Save earthboundkid/7f2c2a54fb50cb4e8ed16d65847a59fb to your computer and use it in GitHub Desktop.
Count Spotlight PA articles over time
import csv
from collections import defaultdict
from pathlib import Path
def count_files_by_month(filenames):
counts = defaultdict(lambda: defaultdict(int))
for filename in filenames:
path = Path(filename)
folder = path.parent.name
date_part = path.stem.split("-")[0:2] # Get YYYY-MM from filename
if len(date_part) == 2 and date_part[0].isdigit() and date_part[1].isdigit():
year_month = f"{date_part[0]}-{date_part[1]}"
counts[folder][year_month] += 1
return counts
def write_csv(counts, output_file="output.csv"):
running_totals = {}
folders = ["news", "statecollege", "berks"]
for folder in folders:
running_totals[folder] = 0
all_months = sorted(
set(month for months in counts.values() for month in months.keys())
)
rows = []
for folder in folders:
row = [folder]
for month in all_months:
if month in counts[folder]:
running_totals[folder] += counts[folder][month]
row.append(running_totals[folder])
rows.append(row)
with open(output_file, "w", newline="") as f:
writer = csv.writer(f)
writer.writerow(["folder"] + all_months)
writer.writerows(rows)
# Read filenames from file or stdin
with open("filenames.txt", "r") as f:
filenames = [line.strip() for line in f if line.strip()]
counts = count_files_by_month(filenames)
write_csv(counts)
  • Run find ./content -iname '*-*-*.md' | tee filenames.txt
  • Remove newsletters and other folders you don't care about
  • Run counts.py over it
  • Make stacked bar chart in DataWrapper
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment