MaxGhenis · April 20, 2026 13:48
diff --git a/wealth_tax_analysis.py b/wealth_tax_analysis.py
 """Are the wealthiest U.S. households in the top 1% of federal income+payroll tax payers?

 Uses PolicyEngine-US Enhanced CPS microdata. Net worth is imputed from the Federal
 Reserve's Survey of Consumer Finances and caps at ~$190M in the 2026 file — true
 centibillionaires are not represented.
 """

 import numpy as np
 import pandas as pd
 from policyengine_us import Microsimulation

 YEAR = 2026

 print("Loading Enhanced CPS microsimulation...")
 sim = Microsimulation()

 print("Computing net worth, federal income tax, employee payroll tax...")
 net_worth = sim.calc("net_worth", period=YEAR)
 income_tax_hh = sim.calc("income_tax", period=YEAR, map_to="household")
 payroll_hh = sim.calc("employee_payroll_tax", period=YEAR, map_to="household")
 hh_weight = sim.calc("household_weight", period=YEAR)

 hh = pd.DataFrame({
    "net_worth": np.asarray(net_worth),
    "income_tax": np.asarray(income_tax_hh),
    "payroll_tax": np.asarray(payroll_hh),
    "weight": np.asarray(hh_weight),
 })
 hh["fed_tax"] = hh["income_tax"] + hh["payroll_tax"]

 print(f"  {len(hh):,} household records  ({(hh['weight'] > 0).sum():,} with positive weight)")
 print(f"  {hh['weight'].sum() / 1e6:.1f}M weighted households")


 def weighted_quantile(values, weights, q):
    order = np.argsort(values)
    v = values[order]
    w = weights[order]
    cum = np.cumsum(w)
    return v[np.searchsorted(cum, q * cum[-1], side="right").clip(max=len(v) - 1)]


 top_1pct_thresh = weighted_quantile(hh["fed_tax"].to_numpy(), hh["weight"].to_numpy(), 0.99)
 median_fed_tax = weighted_quantile(hh["fed_tax"].to_numpy(), hh["weight"].to_numpy(), 0.50)
 print(f"\nHousehold-level federal income+payroll tax")
 print(f"  median:            ${median_fed_tax:>12,.0f}")
 print(f"  99th pct (top 1%): ${top_1pct_thresh:>12,.0f}")
 share_above_all = float(((hh["fed_tax"] >= top_1pct_thresh) * hh["weight"]).sum() / hh["weight"].sum())
 share_zero_all = float(((hh["fed_tax"] <= 0) * hh["weight"]).sum() / hh["weight"].sum())
 print(f"  share of all hhs at or above threshold: {share_above_all:.2%} (sanity, should be ≈1%)")
 print(f"  share of all hhs paying ≤ $0 fed income+payroll tax: {share_zero_all:.2%}")

 # --- Top-N by net worth ----------------------------------------------------------
 hh_sorted = hh.sort_values("net_worth", ascending=False).reset_index(drop=True)
 hh_sorted["cum_weight"] = hh_sorted["weight"].cumsum()

 print("\n" + "=" * 88)
 print(
    f"{'Top N (weighted)':<22}{'Records used':>14}"
    f"{'Net-worth range':<26}{'Share in top 1%':>18}{'Share ≤ $0':>14}"
 )
 print("-" * 88)

 results = []
 for N in [100, 1_000, 10_000, 100_000, 1_000_000]:
    crossed = hh_sorted["cum_weight"] >= N
    end_idx = int(crossed.idxmax()) + 1 if crossed.any() else len(hh_sorted)
    top = hh_sorted.iloc[:end_idx].copy()
    overshoot = top["cum_weight"].iloc[-1] - N
    top.iloc[-1, top.columns.get_loc("weight")] = top["weight"].iloc[-1] - overshoot

    # Count records: the ones with positive *trimmed* weight actually contribute
    records_total = len(top)
    records_contributing = int((top["weight"] > 0).sum())

    w = top["weight"].to_numpy()
    wt_sum = w.sum()
    share_in_top1 = float(((top["fed_tax"] >= top_1pct_thresh) * w).sum() / wt_sum)
    share_zero = float(((top["fed_tax"] <= 0) * w).sum() / wt_sum)

    nw_min = float(top.loc[top["weight"] > 0, "net_worth"].min())
    nw_max = float(top.loc[top["weight"] > 0, "net_worth"].max())

    print(
        f"Top {N:>10,}"
        f"{records_contributing:>14,}"
        f"   ${nw_min/1e6:>5,.1f}M – ${nw_max/1e6:>6,.1f}M"
        f"{share_in_top1:>18.1%}"
        f"{share_zero:>14.1%}"
    )
    results.append({
        "N": N,
        "records": records_contributing,
        "nw_min_m": nw_min / 1e6,
        "nw_max_m": nw_max / 1e6,
        "share_top1": share_in_top1,
        "share_zero": share_zero,
    })

 print("\nAll US households:")
 print(
    f"  1% in top-1% fed-tax payers (by construction);"
    f" {share_zero_all:.1%} pay ≤ $0 fed income+payroll tax"
 )

 # Save for the post
 import json
 with open("/tmp/wealth_tax_results.json", "w") as f:
    json.dump({"threshold": float(top_1pct_thresh), "share_zero_all": share_zero_all, "results": results}, f, indent=2)
 print("\nWrote /tmp/wealth_tax_results.json")
	"""Are the wealthiest U.S. households in the top 1% of federal income+payroll tax payers?

	Uses PolicyEngine-US Enhanced CPS microdata. Net worth is imputed from the Federal
	Reserve's Survey of Consumer Finances and caps at ~$190M in the 2026 file — true
	centibillionaires are not represented.
	"""

	import numpy as np
	import pandas as pd
	from policyengine_us import Microsimulation

	YEAR = 2026

	print("Loading Enhanced CPS microsimulation...")
	sim = Microsimulation()

	print("Computing net worth, federal income tax, employee payroll tax...")
	net_worth = sim.calc("net_worth", period=YEAR)
	income_tax_hh = sim.calc("income_tax", period=YEAR, map_to="household")
	payroll_hh = sim.calc("employee_payroll_tax", period=YEAR, map_to="household")
	hh_weight = sim.calc("household_weight", period=YEAR)

	hh = pd.DataFrame({
	"net_worth": np.asarray(net_worth),
	"income_tax": np.asarray(income_tax_hh),
	"payroll_tax": np.asarray(payroll_hh),
	"weight": np.asarray(hh_weight),
	})
	hh["fed_tax"] = hh["income_tax"] + hh["payroll_tax"]

	print(f" {len(hh):,} household records ({(hh['weight'] > 0).sum():,} with positive weight)")
	print(f" {hh['weight'].sum() / 1e6:.1f}M weighted households")


	def weighted_quantile(values, weights, q):
	order = np.argsort(values)
	v = values[order]
	w = weights[order]
	cum = np.cumsum(w)
	return v[np.searchsorted(cum, q * cum[-1], side="right").clip(max=len(v) - 1)]


	top_1pct_thresh = weighted_quantile(hh["fed_tax"].to_numpy(), hh["weight"].to_numpy(), 0.99)
	median_fed_tax = weighted_quantile(hh["fed_tax"].to_numpy(), hh["weight"].to_numpy(), 0.50)
	print(f"\nHousehold-level federal income+payroll tax")
	print(f" median: ${median_fed_tax:>12,.0f}")
	print(f" 99th pct (top 1%): ${top_1pct_thresh:>12,.0f}")
	share_above_all = float(((hh["fed_tax"] >= top_1pct_thresh) * hh["weight"]).sum() / hh["weight"].sum())
	share_zero_all = float(((hh["fed_tax"] <= 0) * hh["weight"]).sum() / hh["weight"].sum())
	print(f" share of all hhs at or above threshold: {share_above_all:.2%} (sanity, should be ≈1%)")
	print(f" share of all hhs paying ≤ $0 fed income+payroll tax: {share_zero_all:.2%}")

	# --- Top-N by net worth ----------------------------------------------------------
	hh_sorted = hh.sort_values("net_worth", ascending=False).reset_index(drop=True)
	hh_sorted["cum_weight"] = hh_sorted["weight"].cumsum()

	print("\n" + "=" * 88)
	print(
	f"{'Top N (weighted)':<22}{'Records used':>14}"
	f"{'Net-worth range':<26}{'Share in top 1%':>18}{'Share ≤ $0':>14}"
	)
	print("-" * 88)

	results = []
	for N in [100, 1_000, 10_000, 100_000, 1_000_000]:
	crossed = hh_sorted["cum_weight"] >= N
	end_idx = int(crossed.idxmax()) + 1 if crossed.any() else len(hh_sorted)
	top = hh_sorted.iloc[:end_idx].copy()
	overshoot = top["cum_weight"].iloc[-1] - N
	top.iloc[-1, top.columns.get_loc("weight")] = top["weight"].iloc[-1] - overshoot

	# Count records: the ones with positive trimmed weight actually contribute
	records_total = len(top)
	records_contributing = int((top["weight"] > 0).sum())

	w = top["weight"].to_numpy()
	wt_sum = w.sum()
	share_in_top1 = float(((top["fed_tax"] >= top_1pct_thresh) * w).sum() / wt_sum)
	share_zero = float(((top["fed_tax"] <= 0) * w).sum() / wt_sum)

	nw_min = float(top.loc[top["weight"] > 0, "net_worth"].min())
	nw_max = float(top.loc[top["weight"] > 0, "net_worth"].max())

	print(
	f"Top {N:>10,}"
	f"{records_contributing:>14,}"
	f" ${nw_min/1e6:>5,.1f}M – ${nw_max/1e6:>6,.1f}M"
	f"{share_in_top1:>18.1%}"
	f"{share_zero:>14.1%}"
	)
	results.append({
	"N": N,
	"records": records_contributing,
	"nw_min_m": nw_min / 1e6,
	"nw_max_m": nw_max / 1e6,
	"share_top1": share_in_top1,
	"share_zero": share_zero,
	})

	print("\nAll US households:")
	print(
	f" 1% in top-1% fed-tax payers (by construction);"
	f" {share_zero_all:.1%} pay ≤ $0 fed income+payroll tax"
	)

	# Save for the post
	import json
	with open("/tmp/wealth_tax_results.json", "w") as f:
	json.dump({"threshold": float(top_1pct_thresh), "share_zero_all": share_zero_all, "results": results}, f, indent=2)
	print("\nWrote /tmp/wealth_tax_results.json")
No results found