Created
April 20, 2026 13:48
-
-
Save MaxGhenis/3f10cc98b643e0a76e74eae9afe35082 to your computer and use it in GitHub Desktop.
PolicyEngine-US Enhanced CPS analysis: do the wealthiest households appear among federal nonpayers? (Supporting analysis for https://maxghenis.com/blog/madoff-billionaire-nonpayers/)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """Are the wealthiest U.S. households in the top 1% of federal income+payroll tax payers? | |
| Uses PolicyEngine-US Enhanced CPS microdata. Net worth is imputed from the Federal | |
| Reserve's Survey of Consumer Finances and caps at ~$190M in the 2026 file — true | |
| centibillionaires are not represented. | |
| """ | |
| import numpy as np | |
| import pandas as pd | |
| from policyengine_us import Microsimulation | |
| YEAR = 2026 | |
| print("Loading Enhanced CPS microsimulation...") | |
| sim = Microsimulation() | |
| print("Computing net worth, federal income tax, employee payroll tax...") | |
| net_worth = sim.calc("net_worth", period=YEAR) | |
| income_tax_hh = sim.calc("income_tax", period=YEAR, map_to="household") | |
| payroll_hh = sim.calc("employee_payroll_tax", period=YEAR, map_to="household") | |
| hh_weight = sim.calc("household_weight", period=YEAR) | |
| hh = pd.DataFrame({ | |
| "net_worth": np.asarray(net_worth), | |
| "income_tax": np.asarray(income_tax_hh), | |
| "payroll_tax": np.asarray(payroll_hh), | |
| "weight": np.asarray(hh_weight), | |
| }) | |
| hh["fed_tax"] = hh["income_tax"] + hh["payroll_tax"] | |
| print(f" {len(hh):,} household records ({(hh['weight'] > 0).sum():,} with positive weight)") | |
| print(f" {hh['weight'].sum() / 1e6:.1f}M weighted households") | |
| def weighted_quantile(values, weights, q): | |
| order = np.argsort(values) | |
| v = values[order] | |
| w = weights[order] | |
| cum = np.cumsum(w) | |
| return v[np.searchsorted(cum, q * cum[-1], side="right").clip(max=len(v) - 1)] | |
| top_1pct_thresh = weighted_quantile(hh["fed_tax"].to_numpy(), hh["weight"].to_numpy(), 0.99) | |
| median_fed_tax = weighted_quantile(hh["fed_tax"].to_numpy(), hh["weight"].to_numpy(), 0.50) | |
| print(f"\nHousehold-level federal income+payroll tax") | |
| print(f" median: ${median_fed_tax:>12,.0f}") | |
| print(f" 99th pct (top 1%): ${top_1pct_thresh:>12,.0f}") | |
| share_above_all = float(((hh["fed_tax"] >= top_1pct_thresh) * hh["weight"]).sum() / hh["weight"].sum()) | |
| share_zero_all = float(((hh["fed_tax"] <= 0) * hh["weight"]).sum() / hh["weight"].sum()) | |
| print(f" share of all hhs at or above threshold: {share_above_all:.2%} (sanity, should be ≈1%)") | |
| print(f" share of all hhs paying ≤ $0 fed income+payroll tax: {share_zero_all:.2%}") | |
| # --- Top-N by net worth ---------------------------------------------------------- | |
| hh_sorted = hh.sort_values("net_worth", ascending=False).reset_index(drop=True) | |
| hh_sorted["cum_weight"] = hh_sorted["weight"].cumsum() | |
| print("\n" + "=" * 88) | |
| print( | |
| f"{'Top N (weighted)':<22}{'Records used':>14}" | |
| f"{'Net-worth range':<26}{'Share in top 1%':>18}{'Share ≤ $0':>14}" | |
| ) | |
| print("-" * 88) | |
| results = [] | |
| for N in [100, 1_000, 10_000, 100_000, 1_000_000]: | |
| crossed = hh_sorted["cum_weight"] >= N | |
| end_idx = int(crossed.idxmax()) + 1 if crossed.any() else len(hh_sorted) | |
| top = hh_sorted.iloc[:end_idx].copy() | |
| overshoot = top["cum_weight"].iloc[-1] - N | |
| top.iloc[-1, top.columns.get_loc("weight")] = top["weight"].iloc[-1] - overshoot | |
| # Count records: the ones with positive *trimmed* weight actually contribute | |
| records_total = len(top) | |
| records_contributing = int((top["weight"] > 0).sum()) | |
| w = top["weight"].to_numpy() | |
| wt_sum = w.sum() | |
| share_in_top1 = float(((top["fed_tax"] >= top_1pct_thresh) * w).sum() / wt_sum) | |
| share_zero = float(((top["fed_tax"] <= 0) * w).sum() / wt_sum) | |
| nw_min = float(top.loc[top["weight"] > 0, "net_worth"].min()) | |
| nw_max = float(top.loc[top["weight"] > 0, "net_worth"].max()) | |
| print( | |
| f"Top {N:>10,}" | |
| f"{records_contributing:>14,}" | |
| f" ${nw_min/1e6:>5,.1f}M – ${nw_max/1e6:>6,.1f}M" | |
| f"{share_in_top1:>18.1%}" | |
| f"{share_zero:>14.1%}" | |
| ) | |
| results.append({ | |
| "N": N, | |
| "records": records_contributing, | |
| "nw_min_m": nw_min / 1e6, | |
| "nw_max_m": nw_max / 1e6, | |
| "share_top1": share_in_top1, | |
| "share_zero": share_zero, | |
| }) | |
| print("\nAll US households:") | |
| print( | |
| f" 1% in top-1% fed-tax payers (by construction);" | |
| f" {share_zero_all:.1%} pay ≤ $0 fed income+payroll tax" | |
| ) | |
| # Save for the post | |
| import json | |
| with open("/tmp/wealth_tax_results.json", "w") as f: | |
| json.dump({"threshold": float(top_1pct_thresh), "share_zero_all": share_zero_all, "results": results}, f, indent=2) | |
| print("\nWrote /tmp/wealth_tax_results.json") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment