Skip to content

Instantly share code, notes, and snippets.

@MaxGhenis
Created April 20, 2026 13:48
Show Gist options
  • Select an option

  • Save MaxGhenis/3f10cc98b643e0a76e74eae9afe35082 to your computer and use it in GitHub Desktop.

Select an option

Save MaxGhenis/3f10cc98b643e0a76e74eae9afe35082 to your computer and use it in GitHub Desktop.
PolicyEngine-US Enhanced CPS analysis: do the wealthiest households appear among federal nonpayers? (Supporting analysis for https://maxghenis.com/blog/madoff-billionaire-nonpayers/)
"""Are the wealthiest U.S. households in the top 1% of federal income+payroll tax payers?
Uses PolicyEngine-US Enhanced CPS microdata. Net worth is imputed from the Federal
Reserve's Survey of Consumer Finances and caps at ~$190M in the 2026 file — true
centibillionaires are not represented.
"""
import numpy as np
import pandas as pd
from policyengine_us import Microsimulation
YEAR = 2026
print("Loading Enhanced CPS microsimulation...")
sim = Microsimulation()
print("Computing net worth, federal income tax, employee payroll tax...")
net_worth = sim.calc("net_worth", period=YEAR)
income_tax_hh = sim.calc("income_tax", period=YEAR, map_to="household")
payroll_hh = sim.calc("employee_payroll_tax", period=YEAR, map_to="household")
hh_weight = sim.calc("household_weight", period=YEAR)
hh = pd.DataFrame({
"net_worth": np.asarray(net_worth),
"income_tax": np.asarray(income_tax_hh),
"payroll_tax": np.asarray(payroll_hh),
"weight": np.asarray(hh_weight),
})
hh["fed_tax"] = hh["income_tax"] + hh["payroll_tax"]
print(f" {len(hh):,} household records ({(hh['weight'] > 0).sum():,} with positive weight)")
print(f" {hh['weight'].sum() / 1e6:.1f}M weighted households")
def weighted_quantile(values, weights, q):
order = np.argsort(values)
v = values[order]
w = weights[order]
cum = np.cumsum(w)
return v[np.searchsorted(cum, q * cum[-1], side="right").clip(max=len(v) - 1)]
top_1pct_thresh = weighted_quantile(hh["fed_tax"].to_numpy(), hh["weight"].to_numpy(), 0.99)
median_fed_tax = weighted_quantile(hh["fed_tax"].to_numpy(), hh["weight"].to_numpy(), 0.50)
print(f"\nHousehold-level federal income+payroll tax")
print(f" median: ${median_fed_tax:>12,.0f}")
print(f" 99th pct (top 1%): ${top_1pct_thresh:>12,.0f}")
share_above_all = float(((hh["fed_tax"] >= top_1pct_thresh) * hh["weight"]).sum() / hh["weight"].sum())
share_zero_all = float(((hh["fed_tax"] <= 0) * hh["weight"]).sum() / hh["weight"].sum())
print(f" share of all hhs at or above threshold: {share_above_all:.2%} (sanity, should be ≈1%)")
print(f" share of all hhs paying ≤ $0 fed income+payroll tax: {share_zero_all:.2%}")
# --- Top-N by net worth ----------------------------------------------------------
hh_sorted = hh.sort_values("net_worth", ascending=False).reset_index(drop=True)
hh_sorted["cum_weight"] = hh_sorted["weight"].cumsum()
print("\n" + "=" * 88)
print(
f"{'Top N (weighted)':<22}{'Records used':>14}"
f"{'Net-worth range':<26}{'Share in top 1%':>18}{'Share ≤ $0':>14}"
)
print("-" * 88)
results = []
for N in [100, 1_000, 10_000, 100_000, 1_000_000]:
crossed = hh_sorted["cum_weight"] >= N
end_idx = int(crossed.idxmax()) + 1 if crossed.any() else len(hh_sorted)
top = hh_sorted.iloc[:end_idx].copy()
overshoot = top["cum_weight"].iloc[-1] - N
top.iloc[-1, top.columns.get_loc("weight")] = top["weight"].iloc[-1] - overshoot
# Count records: the ones with positive *trimmed* weight actually contribute
records_total = len(top)
records_contributing = int((top["weight"] > 0).sum())
w = top["weight"].to_numpy()
wt_sum = w.sum()
share_in_top1 = float(((top["fed_tax"] >= top_1pct_thresh) * w).sum() / wt_sum)
share_zero = float(((top["fed_tax"] <= 0) * w).sum() / wt_sum)
nw_min = float(top.loc[top["weight"] > 0, "net_worth"].min())
nw_max = float(top.loc[top["weight"] > 0, "net_worth"].max())
print(
f"Top {N:>10,}"
f"{records_contributing:>14,}"
f" ${nw_min/1e6:>5,.1f}M – ${nw_max/1e6:>6,.1f}M"
f"{share_in_top1:>18.1%}"
f"{share_zero:>14.1%}"
)
results.append({
"N": N,
"records": records_contributing,
"nw_min_m": nw_min / 1e6,
"nw_max_m": nw_max / 1e6,
"share_top1": share_in_top1,
"share_zero": share_zero,
})
print("\nAll US households:")
print(
f" 1% in top-1% fed-tax payers (by construction);"
f" {share_zero_all:.1%} pay ≤ $0 fed income+payroll tax"
)
# Save for the post
import json
with open("/tmp/wealth_tax_results.json", "w") as f:
json.dump({"threshold": float(top_1pct_thresh), "share_zero_all": share_zero_all, "results": results}, f, indent=2)
print("\nWrote /tmp/wealth_tax_results.json")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment