|
******************************************************************************** |
|
* Using PolicyEngine from Stata |
|
* |
|
* Three approaches shown below: |
|
* 1. Direct Python integration (Stata 16+) |
|
* 2. API calls via curl |
|
* 3. Batch processing via Python script |
|
******************************************************************************** |
|
|
|
*=============================================================================== |
|
* APPROACH 1: Direct Python Integration (Stata 16+) |
|
* Requires: Stata 16+, Python 3.9+, policyengine-us package |
|
*=============================================================================== |
|
|
|
* First, ensure Python is configured in Stata: |
|
* python query |
|
|
|
* Install policyengine-us (run once): |
|
* shell pip install policyengine-us |
|
|
|
* Example: Calculate federal income tax for a single filer |
|
python: |
|
from policyengine_us import Simulation |
|
|
|
# Define a household |
|
situation = { |
|
"people": { |
|
"person": { |
|
"age": {2025: 40}, |
|
"employment_income": {2025: 50_000} |
|
} |
|
}, |
|
"tax_units": { |
|
"tax_unit": { |
|
"members": ["person"] |
|
} |
|
}, |
|
"households": { |
|
"household": { |
|
"members": ["person"], |
|
"state_code": {2025: "CA"} |
|
} |
|
} |
|
} |
|
|
|
sim = Simulation(situation=situation) |
|
|
|
# Calculate various outputs |
|
fed_tax = sim.calculate("income_tax", 2025) |
|
state_tax = sim.calculate("ca_income_tax", 2025) |
|
eitc = sim.calculate("eitc", 2025) |
|
ctc = sim.calculate("ctc", 2025) |
|
|
|
# Store results in Stata scalars |
|
from sfi import Scalar |
|
Scalar.setValue("fed_tax", float(fed_tax[0])) |
|
Scalar.setValue("state_tax", float(state_tax[0])) |
|
Scalar.setValue("eitc", float(eitc[0])) |
|
Scalar.setValue("ctc", float(ctc[0])) |
|
end |
|
|
|
* Display results |
|
di "Federal Income Tax: $" %10.2f scalar(fed_tax) |
|
di "State Income Tax: $" %10.2f scalar(state_tax) |
|
di "EITC: $" %10.2f scalar(eitc) |
|
di "CTC: $" %10.2f scalar(ctc) |
|
|
|
|
|
*=============================================================================== |
|
* APPROACH 2: API Calls via curl (works with any Stata version) |
|
* No Python installation required on Stata machine |
|
*=============================================================================== |
|
|
|
* Using PolicyEngine's hosted API |
|
tempfile response |
|
shell curl -s -X POST "https://household.api.policyengine.org/us/calculate" \ |
|
-H "Content-Type: application/json" \ |
|
-d '{"household": {"people": {"person": {"age": {"2025": 40}, "employment_income": {"2025": 50000}}}, "tax_units": {"tax_unit": {"members": ["person"]}}, "households": {"household": {"members": ["person"], "state_code": {"2025": "CA"}}}}}' \ |
|
> `response' |
|
|
|
* Parse JSON response (requires Stata 17+ for native JSON, or use insheetjson for earlier) |
|
* For Stata 17+: |
|
* jsonio using `response', elem("result.income_tax.2025") |
|
|
|
|
|
*=============================================================================== |
|
* APPROACH 3: Batch Processing via External Python Script |
|
* Best for processing many observations |
|
*=============================================================================== |
|
|
|
* Step 1: Export your data to CSV |
|
* sysuse auto, clear |
|
* export delimited using "input_data.csv", replace |
|
|
|
* Step 2: Run Python script (see policyengine_batch.py in this gist) |
|
* shell python policyengine_batch.py input_data.csv output_results.csv |
|
|
|
* Step 3: Merge results back |
|
* import delimited using "output_results.csv", clear |
|
|
|
|
|
*=============================================================================== |
|
* EXAMPLE: Loop over dataset observations |
|
* Calculate PolicyEngine results for each row |
|
*=============================================================================== |
|
|
|
* Create example dataset |
|
clear |
|
input id age income str2 state |
|
1 35 30000 "TX" |
|
2 42 75000 "CA" |
|
3 28 45000 "NY" |
|
4 55 120000 "FL" |
|
end |
|
|
|
* Generate empty results variables |
|
gen double fed_tax = . |
|
gen double eitc = . |
|
gen double net_income = . |
|
|
|
* Loop and calculate (note: slow for large datasets - use batch approach instead) |
|
forvalues i = 1/`=_N' { |
|
local age_i = age[`i'] |
|
local income_i = income[`i'] |
|
local state_i = state[`i'] |
|
|
|
python: |
|
from policyengine_us import Simulation |
|
from sfi import Scalar, Data |
|
|
|
situation = { |
|
"people": { |
|
"person": { |
|
"age": {2025: `age_i'}, |
|
"employment_income": {2025: `income_i'} |
|
} |
|
}, |
|
"tax_units": { |
|
"tax_unit": { |
|
"members": ["person"] |
|
} |
|
}, |
|
"households": { |
|
"household": { |
|
"members": ["person"], |
|
"state_code": {2025: "`state_i'"} |
|
} |
|
} |
|
} |
|
|
|
sim = Simulation(situation=situation) |
|
Scalar.setValue("_fed_tax", float(sim.calculate("income_tax", 2025)[0])) |
|
Scalar.setValue("_eitc", float(sim.calculate("eitc", 2025)[0])) |
|
Scalar.setValue("_net_income", float(sim.calculate("household_net_income", 2025)[0])) |
|
end |
|
|
|
quietly replace fed_tax = scalar(_fed_tax) in `i' |
|
quietly replace eitc = scalar(_eitc) in `i' |
|
quietly replace net_income = scalar(_net_income) in `i' |
|
} |
|
|
|
list |
|
|
|
******************************************************************************** |
|
* Notes: |
|
* - PolicyEngine is open source: github.com/policyengine/policyengine-us |
|
* - API documentation: policyengine.org/us/api |
|
* - For large-scale analysis, consider the batch Python approach |
|
* - Questions? Contact [email protected] |
|
******************************************************************************** |