From 20032087e3b7bb8c2d0ad7a8f3a57b8509a4680c Mon Sep 17 00:00:00 2001 From: martinholmer Date: Thu, 19 Mar 2026 19:18:25 -0400 Subject: [PATCH 1/6] Remove jax package from setup.py --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index da7cda09..9812f62a 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,6 @@ "scikit-learn", "scipy", "xlrd", - "jax", "black>=26.1.0", "pycodestyle>=2.14.0", "pylint>=3.3.8", From 9010d085b195fc182d63ae035ea8a37308049434 Mon Sep 17 00:00:00 2001 From: martinholmer Date: Fri, 20 Mar 2026 10:44:26 -0400 Subject: [PATCH 2/6] Eliminate use of tc abbreviation for taxcalc package --- tests/conftest.py | 10 ++++++---- tests/test_imputed_variables.py | 10 +++++----- tests/test_misc.py | 6 +++--- tests/test_tax_revenue.py | 10 +++++----- tmd/create_taxcalc_cached_files.py | 10 +++++----- tmd/create_taxcalc_input_variables.py | 4 ++-- tmd/datasets/cps.py | 10 +++++----- tmd/datasets/puf.py | 6 +++--- tmd/datasets/tmd.py | 16 ++++++++-------- tmd/examination/2022/bootstrap_sampling.py | 2 +- tmd/utils/reweight.py | 14 +++++++------- tmd/utils/soi_replication.py | 10 +++++----- tmd/utils/taxcalc_utils.py | 10 +++++----- 13 files changed, 60 insertions(+), 58 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index eb926b1b..c86cffce 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,7 +6,7 @@ import pytest import numpy as np import pandas as pd -import taxcalc as tc +import taxcalc from tmd.storage import STORAGE_FOLDER from tmd.imputation_assumptions import TAXYEAR @@ -18,13 +18,15 @@ def create_tmd_records( data_path, weights_path, growfactors_path, exact_calculations=True ): """ - Create tc.Records with start_year=TAXYEAR. + Create taxcalc.Records with start_year=TAXYEAR. Bypasses tmd_constructor() which hardcodes start_year=2021. """ - return tc.Records( + return taxcalc.Records( data=pd.read_csv(data_path), start_year=TAXYEAR, - gfactors=tc.GrowFactors(growfactors_filename=str(growfactors_path)), + gfactors=taxcalc.GrowFactors( + growfactors_filename=str(growfactors_path) + ), weights=pd.read_csv(weights_path), adjust_ratios=None, exact_calculations=exact_calculations, diff --git a/tests/test_imputed_variables.py b/tests/test_imputed_variables.py index 6bd97471..2a4dfca3 100644 --- a/tests/test_imputed_variables.py +++ b/tests/test_imputed_variables.py @@ -6,7 +6,7 @@ import numpy as np import pytest -import taxcalc as tc +import taxcalc from tmd.imputation_assumptions import TAXYEAR, CREDIT_CLAIMING from tests.conftest import create_tmd_records @@ -123,9 +123,9 @@ def actual_results(rdf, bdf): growfactors_path=tmd_growfactors_path, ) # create baseline_sim Calculator object for simyear and get its output - pol = tc.Policy() + pol = taxcalc.Policy() pol.implement_reform(CREDIT_CLAIMING) - baseline_sim = tc.Calculator(policy=pol, records=recs) + baseline_sim = taxcalc.Calculator(policy=pol, records=recs) baseline_sim.advance_to_year(simyear) baseline_sim.calc_all() bdf = baseline_sim.dataframe(output_variables) @@ -140,10 +140,10 @@ def actual_results(rdf, bdf): tolerance_scale = 1.0 for ded, info in deductions.items(): # create reform Calculator object for simyear - reform_policy = tc.Policy() + reform_policy = taxcalc.Policy() reform_policy.implement_reform(CREDIT_CLAIMING) reform_policy.implement_reform(info["reform_dict"]) - reform_sim = tc.Calculator(policy=reform_policy, records=recs) + reform_sim = taxcalc.Calculator(policy=reform_policy, records=recs) reform_sim.advance_to_year(simyear) # get reform Calculator object's output reform_sim.calc_all() diff --git a/tests/test_misc.py b/tests/test_misc.py index 847f2bcd..6ccb82b3 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -3,7 +3,7 @@ """ import pytest -import taxcalc as tc +import taxcalc from tmd.storage import STORAGE_FOLDER from tmd.imputation_assumptions import TAXYEAR, CREDIT_CLAIMING from tests.conftest import create_tmd_records @@ -52,14 +52,14 @@ def compare(name, act, exp, tol): ), f"{name}:act,exp,tol= {act} {exp} {tol}" # use national tmd files to compute various TAXYEAR income tax statistics - pol = tc.Policy() + pol = taxcalc.Policy() pol.implement_reform(CREDIT_CLAIMING) rec = create_tmd_records( data_path=STORAGE_FOLDER / "output" / "tmd.csv.gz", weights_path=STORAGE_FOLDER / "output" / "tmd_weights.csv.gz", growfactors_path=STORAGE_FOLDER / "output" / "tmd_growfactors.csv", ) - sim = tc.Calculator(policy=pol, records=rec) + sim = taxcalc.Calculator(policy=pol, records=rec) sim.advance_to_year(TAXYEAR) sim.calc_all() wght = sim.array("s006") diff --git a/tests/test_tax_revenue.py b/tests/test_tax_revenue.py index 608ad4e0..7134d951 100644 --- a/tests/test_tax_revenue.py +++ b/tests/test_tax_revenue.py @@ -5,7 +5,7 @@ import yaml import numpy as np import pytest -import taxcalc as tc +import taxcalc from tmd.imputation_assumptions import TAXYEAR, CREDIT_CLAIMING FIRST_CYR = 2023 @@ -45,11 +45,11 @@ def test_tax_revenue( exp_itax[year] = round(fy2cy(fy_itax[year], fy_itax[year + 1]), 3) exp_ptax[year] = round(fy2cy(fy_ptax[year], fy_ptax[year + 1]), 3) # calculate actual tax revenues for each calendar year - pol = tc.Policy() + pol = taxcalc.Policy() pol.implement_reform(CREDIT_CLAIMING) wghts = str(tmd_weights_path) - growf = tc.GrowFactors(growfactors_filename=str(tmd_growfactors_path)) - input_data = tc.Records( + growf = taxcalc.GrowFactors(growfactors_filename=str(tmd_growfactors_path)) + input_data = taxcalc.Records( data=tmd_variables, start_year=TAXYEAR, weights=wghts, @@ -58,7 +58,7 @@ def test_tax_revenue( exact_calculations=True, weights_scale=1.0, ) - sim = tc.Calculator(policy=pol, records=input_data) + sim = taxcalc.Calculator(policy=pol, records=input_data) act_itax = {} act_ptax = {} for year in range(FIRST_CYR, LAST_CYR + 1): diff --git a/tmd/create_taxcalc_cached_files.py b/tmd/create_taxcalc_cached_files.py index efecd2ca..2f23d648 100644 --- a/tmd/create_taxcalc_cached_files.py +++ b/tmd/create_taxcalc_cached_files.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -import taxcalc as tc +import taxcalc from tmd.storage import STORAGE_FOLDER, CACHED_TAXCALC_VARIABLES from tmd.imputation_assumptions import TAXYEAR, CREDIT_CLAIMING @@ -21,18 +21,18 @@ def create_cached_files(): # calculate all Tax-Calculator variables for TAXYEAR # Construct Records directly (bypassing tmd_constructor which # hardcodes start_year=2021 in the taxcalc library). - pol = tc.Policy() + pol = taxcalc.Policy() pol.implement_reform(CREDIT_CLAIMING) - rec = tc.Records( + rec = taxcalc.Records( data=pd.read_csv(INFILE_PATH), start_year=TAXYEAR, - gfactors=tc.GrowFactors(growfactors_filename=str(GFFILE_PATH)), + gfactors=taxcalc.GrowFactors(growfactors_filename=str(GFFILE_PATH)), weights=pd.read_csv(WTFILE_PATH), adjust_ratios=None, exact_calculations=True, weights_scale=1.0, ) - calc = tc.Calculator(policy=pol, records=rec) + calc = taxcalc.Calculator(policy=pol, records=rec) calc.advance_to_year(TAXYEAR) calc.calc_all() diff --git a/tmd/create_taxcalc_input_variables.py b/tmd/create_taxcalc_input_variables.py index e9451616..e2777844 100644 --- a/tmd/create_taxcalc_input_variables.py +++ b/tmd/create_taxcalc_input_variables.py @@ -2,7 +2,7 @@ Construct tmd.csv, a Tax-Calculator-style input variable file for TAXYEAR. """ -import taxcalc as tc +import taxcalc from tmd.datasets.tmd import create_tmd_dataframe from tmd.imputation_assumptions import ( TAXYEAR, @@ -41,7 +41,7 @@ def create_variable_file(write_file=True): vdf.to_csv(fname, index=False) # streamline dataframe so that it includes only input variables print("Removing output variables from PUF+CPS dataframe...") - rec = tc.Records( + rec = taxcalc.Records( data=vdf, start_year=TAXYEAR, gfactors=None, diff --git a/tmd/datasets/cps.py b/tmd/datasets/cps.py index f70424e6..b5965629 100644 --- a/tmd/datasets/cps.py +++ b/tmd/datasets/cps.py @@ -138,7 +138,7 @@ "WKSWORK", ] -TC_CPS_AGED_RNG = np.random.default_rng(seed=374651932) +TAXCALC_CPS_AGED_RNG = np.random.default_rng(seed=374651932) CPS_URL_BY_YEAR = { 2018: ( @@ -305,7 +305,7 @@ def _derive_age(person: pd.DataFrame) -> np.ndarray: """ return np.where( person.A_AGE == 80, - TC_CPS_AGED_RNG.integers( + TAXCALC_CPS_AGED_RNG.integers( low=80, high=85, endpoint=False, size=len(person) ), person.A_AGE.values, @@ -373,7 +373,7 @@ def _is_tax_filer(tcdf: pd.DataFrame, taxyear: int) -> pd.Series: return filer -def create_tc_cps(taxyear: int) -> (pd.DataFrame, pd.Series): +def create_taxcalc_cps(taxyear: int) -> (pd.DataFrame, pd.Series): """ Create a Tax-Calculator-compatible CPS DataFrame for the given taxyear directly from the Census raw CPS data. @@ -583,7 +583,7 @@ def map_spouse(values): var["f2441"] = sum_all((age < 13) * dep_flag).astype(int) # variables with no CPS source are set to zero - zero_tc_names = [ + zero_taxcalc_names = [ "a_lineno", "agi_bin", "h_seq", @@ -646,7 +646,7 @@ def map_spouse(values): "e09700", # recapture of investment credit "e09800", # unreported payroll tax ] - for tcname in zero_tc_names: + for tcname in zero_taxcalc_names: if tcname not in var: var[tcname] = zeros diff --git a/tmd/datasets/puf.py b/tmd/datasets/puf.py index 0a79443f..939e8f23 100644 --- a/tmd/datasets/puf.py +++ b/tmd/datasets/puf.py @@ -170,7 +170,7 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame: return puf -def create_tc_puf(taxyear: int) -> pd.DataFrame: +def create_taxcalc_puf(taxyear: int) -> pd.DataFrame: """ Create a Tax-Calculator-compatible PUF DataFrame for the given taxyear directly from raw PUF data. @@ -309,7 +309,7 @@ def _decode_dep_ages(agerange_vals, rng): # mapping from TC variable name to PE-named column in pre-processed PUF: # for person-level variables, the tax-unit total is scaled by # person_scale (= head_frac + (1-head_frac)*is_joint) - tc_to_pe = { + taxcalc_to_policyengine = { "RECID": "household_id", "S006": "household_weight", "E03500": "alimony_expense", @@ -413,7 +413,7 @@ def _decode_dep_ages(agerange_vals, rng): # create dictionary that will be used to create Tax-Calculator DataFrame NO_SCALE = {"RECID", "S006", "XTOT"} var = {} - for tcname, pename in tc_to_pe.items(): + for tcname, pename in taxcalc_to_policyengine.items(): if tcname in NO_SCALE: var[tcname] = puf[pename].values elif pename in PERSON_LEVEL_VARS: diff --git a/tmd/datasets/tmd.py b/tmd/datasets/tmd.py index 02a7b3d8..fc3a6b86 100644 --- a/tmd/datasets/tmd.py +++ b/tmd/datasets/tmd.py @@ -1,8 +1,8 @@ import numpy as np import pandas as pd from tmd.imputation_assumptions import TAXYEAR, CPS_WEIGHTS_SCALE -from tmd.datasets.puf import create_tc_puf -from tmd.datasets.cps import create_tc_cps +from tmd.datasets.puf import create_taxcalc_puf +from tmd.datasets.cps import create_taxcalc_cps from tmd.utils.taxcalc_utils import add_taxcalc_outputs from tmd.utils.reweight import reweight @@ -11,17 +11,17 @@ def create_tmd_dataframe(taxyear: int) -> pd.DataFrame: """ Create DataFrame for given taxyear containing PUF filers and CPS nonfilers. """ - # always call create_tc_puf and create_tc_cps + # always call create_taxcalc_puf and create_taxcalc_cps # (because imputation assumptions may have changed) - tc_puf = create_tc_puf(taxyear) - tc_cps, nonfiler = create_tc_cps(taxyear) - tc_cps = tc_cps[nonfiler].reset_index(drop=True) + taxcalc_puf = create_taxcalc_puf(taxyear) + taxcalc_cps, nonfiler = create_taxcalc_cps(taxyear) + taxcalc_cps = taxcalc_cps[nonfiler].reset_index(drop=True) # scale CPS weights to get sensible combined population count - tc_cps["s006"] *= CPS_WEIGHTS_SCALE[TAXYEAR] + taxcalc_cps["s006"] *= CPS_WEIGHTS_SCALE[TAXYEAR] print("Combining PUF filers and CPS nonfilers...") - combined = pd.concat([tc_puf, tc_cps], ignore_index=True) + combined = pd.concat([taxcalc_puf, taxcalc_cps], ignore_index=True) # ensure RECID values are unique combined["RECID"] = np.arange(1, len(combined) + 1, dtype=int) diff --git a/tmd/examination/2022/bootstrap_sampling.py b/tmd/examination/2022/bootstrap_sampling.py index 61cffac5..8d57ae2d 100644 --- a/tmd/examination/2022/bootstrap_sampling.py +++ b/tmd/examination/2022/bootstrap_sampling.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd -USAGE = "USAGE: python bootstrap_sampling.py tc_dump_output_csv_file_name\n" +USAGE = "USAGE: python bootstrap_sampling.py taxcalc_dump_output_csv_fname\n" SS_FRAC = 1.00 # 0.1 SS_RNSEED = 902345678 diff --git a/tmd/utils/reweight.py b/tmd/utils/reweight.py index 9515ac28..518b710d 100644 --- a/tmd/utils/reweight.py +++ b/tmd/utils/reweight.py @@ -41,7 +41,7 @@ ) import clarabel from tmd.storage import STORAGE_FOLDER -from tmd.utils.soi_replication import tc_to_soi +from tmd.utils.soi_replication import taxcalc_to_soi from tmd.imputation_assumptions import ( TAXYEAR, REWEIGHT_MULTIPLIER_MIN, @@ -77,7 +77,7 @@ def build_loss_matrix(df, targets, time_period): corresponding SOI target values. """ columns = {} - df = tc_to_soi(df, time_period) + df = taxcalc_to_soi(df, time_period) agi = df["adjusted_gross_income"].values filer = df["is_tax_filer"].values targets_array = [] @@ -100,13 +100,13 @@ def build_loss_matrix(df, targets, time_period): "business_net_losses", "capital_gains_distributions", "capital_gains_losses", - # "estate_income", # all zeros in tc_to_soi (not in Tax-Calculator) - # "estate_losses", # all zeros in tc_to_soi (not in Tax-Calculator) + # "estate_income", # all zeros in Tax-Calculator + # "estate_losses", # all zeros in Tax-Calculator "exempt_interest", "ira_distributions", "partnership_and_s_corp_losses", - # "rent_and_royalty_net_income", # all zeros in tc_to_soi (not in TC) - # "rent_and_royalty_net_losses", # all zeros in tc_to_soi (not in TC) + # "rent_and_royalty_net_income", # all zeros in Tax-Calculator + # "rent_and_royalty_net_losses", # all zeros in Tax-Calculator "taxable_pension_income", "taxable_social_security", "unemployment_compensation", @@ -561,7 +561,7 @@ def reweight( ] if len(soi_filer_total_row) == 1: target_filer_total = soi_filer_total_row["Value"].values[0] - soi_df = tc_to_soi(flat_file.copy(), time_period) + soi_df = taxcalc_to_soi(flat_file.copy(), time_period) filer_mask = soi_df["is_tax_filer"].values.astype(bool) current_filer_total = (flat_file.s006.values * filer_mask).sum() prescale = target_filer_total / current_filer_total diff --git a/tmd/utils/soi_replication.py b/tmd/utils/soi_replication.py index 5fa39e54..5ab7b8f9 100644 --- a/tmd/utils/soi_replication.py +++ b/tmd/utils/soi_replication.py @@ -1,12 +1,12 @@ import pandas as pd -import taxcalc as tc +import taxcalc from tmd.imputation_assumptions import CREDIT_CLAIMING -def tc_to_soi(puf: pd.DataFrame, year: int) -> pd.DataFrame: - pol = tc.Policy() +def taxcalc_to_soi(puf: pd.DataFrame, year: int) -> pd.DataFrame: + pol = taxcalc.Policy() pol.implement_reform(CREDIT_CLAIMING) - rec = tc.Records( + rec = taxcalc.Records( data=puf, start_year=year, gfactors=None, @@ -14,7 +14,7 @@ def tc_to_soi(puf: pd.DataFrame, year: int) -> pd.DataFrame: adjust_ratios=None, exact_calculations=True, ) - calculator = tc.Calculator(policy=pol, records=rec) + calculator = taxcalc.Calculator(policy=pol, records=rec) calculator.advance_to_year(year) calculator.calc_all() pdf = calculator.dataframe(None, all_vars=True) diff --git a/tmd/utils/taxcalc_utils.py b/tmd/utils/taxcalc_utils.py index ba92dba9..64560c12 100644 --- a/tmd/utils/taxcalc_utils.py +++ b/tmd/utils/taxcalc_utils.py @@ -5,7 +5,7 @@ import pathlib import numpy as np import pandas as pd -import taxcalc as tc +import taxcalc from tmd.storage import STORAGE_FOLDER from tmd.imputation_assumptions import TAXYEAR, CREDIT_CLAIMING @@ -34,10 +34,10 @@ def add_taxcalc_outputs( else: wghts = weights if isinstance(growfactors, pathlib.PosixPath): - growf = tc.GrowFactors(growfactors_filename=str(growfactors)) + growf = taxcalc.GrowFactors(growfactors_filename=str(growfactors)) else: growf = growfactors - rec = tc.Records( + rec = taxcalc.Records( data=flat_file, start_year=input_data_year, gfactors=growf, @@ -46,11 +46,11 @@ def add_taxcalc_outputs( exact_calculations=True, weights_scale=1.0, ) - pol = tc.Policy() + pol = taxcalc.Policy() pol.implement_reform(CREDIT_CLAIMING) if reform: pol.implement_reform(reform) - simulation = tc.Calculator(records=rec, policy=pol) + simulation = taxcalc.Calculator(records=rec, policy=pol) simulation.advance_to_year(simulation_year) simulation.calc_all() output = simulation.dataframe(None, all_vars=True) From c674d609f0684304839308bad13eb1087e78b2e2 Mon Sep 17 00:00:00 2001 From: martinholmer Date: Fri, 20 Mar 2026 11:24:31 -0400 Subject: [PATCH 3/6] Split tmd/utils/taxcalc_utils.py into two seperate files --- tests/test_tax_expenditures.py | 4 +- tmd/datasets/tmd.py | 2 +- .../{taxcalc_utils.py => tax_expenditures.py} | 78 ++++--------------- tmd/utils/taxcalc_output.py | 59 ++++++++++++++ 4 files changed, 78 insertions(+), 65 deletions(-) rename tmd/utils/{taxcalc_utils.py => tax_expenditures.py} (51%) create mode 100644 tmd/utils/taxcalc_output.py diff --git a/tests/test_tax_expenditures.py b/tests/test_tax_expenditures.py index 833e1de7..c7a457de 100644 --- a/tests/test_tax_expenditures.py +++ b/tests/test_tax_expenditures.py @@ -1,5 +1,5 @@ """ -Test 2023 tax expenditures calculated using tmd files +Test 2023 tax expenditures calculated using TMD files against expected tax expenditure values in the tests folder. """ @@ -7,7 +7,7 @@ import numpy as np from tmd.storage import STORAGE_FOLDER from tmd.imputation_assumptions import TAXYEAR -from tmd.utils.taxcalc_utils import get_tax_expenditure_results +from tmd.utils.tax_expenditures import get_tax_expenditure_results @pytest.mark.taxexp diff --git a/tmd/datasets/tmd.py b/tmd/datasets/tmd.py index fc3a6b86..91298176 100644 --- a/tmd/datasets/tmd.py +++ b/tmd/datasets/tmd.py @@ -3,7 +3,7 @@ from tmd.imputation_assumptions import TAXYEAR, CPS_WEIGHTS_SCALE from tmd.datasets.puf import create_taxcalc_puf from tmd.datasets.cps import create_taxcalc_cps -from tmd.utils.taxcalc_utils import add_taxcalc_outputs +from tmd.utils.taxcalc_output import add_taxcalc_outputs from tmd.utils.reweight import reweight diff --git a/tmd/utils/taxcalc_utils.py b/tmd/utils/tax_expenditures.py similarity index 51% rename from tmd/utils/taxcalc_utils.py rename to tmd/utils/tax_expenditures.py index 64560c12..2529da4c 100644 --- a/tmd/utils/taxcalc_utils.py +++ b/tmd/utils/tax_expenditures.py @@ -1,65 +1,15 @@ """ -This module provides utilities for working with Tax-Calculator. +This module provides a utility function that calculates +selected 2023 tax expenditue estimates using Tax-Calculator. """ import pathlib -import numpy as np import pandas as pd -import taxcalc from tmd.storage import STORAGE_FOLDER -from tmd.imputation_assumptions import TAXYEAR, CREDIT_CLAIMING +from tmd.imputation_assumptions import TAXYEAR +from tmd.utils.taxcalc_output import add_taxcalc_outputs - -def add_taxcalc_outputs( - flat_file: pd.DataFrame, - input_data_year: int, - simulation_year: int, - reform: dict = None, - weights=None, - growfactors=None, -) -> pd.DataFrame: - """ - Run a flat file through Tax-Calculator. - - Args: - flat_file (pd.DataFrame): The flat file to run through Tax-Calculator. - time_period (int): The year to run the simulation for. - reform (dict, optional): The reform to apply. Defaults to None. - - Returns: - pd.DataFrame: The Tax-Calculator output. - """ - if isinstance(weights, pathlib.PosixPath): - wghts = str(weights) - else: - wghts = weights - if isinstance(growfactors, pathlib.PosixPath): - growf = taxcalc.GrowFactors(growfactors_filename=str(growfactors)) - else: - growf = growfactors - rec = taxcalc.Records( - data=flat_file, - start_year=input_data_year, - gfactors=growf, - weights=wghts, - adjust_ratios=None, - exact_calculations=True, - weights_scale=1.0, - ) - pol = taxcalc.Policy() - pol.implement_reform(CREDIT_CLAIMING) - if reform: - pol.implement_reform(reform) - simulation = taxcalc.Calculator(records=rec, policy=pol) - simulation.advance_to_year(simulation_year) - simulation.calc_all() - output = simulation.dataframe(None, all_vars=True) - if weights is None and growfactors is None: - assert np.allclose(output.s006, flat_file.s006) - return output - - -te_reforms = { +TAX_EXPENDITURE_REFORMS = { "ctc": {"CTC_c": {"2023": 0}, "ODC_c": {"2023": 0}, "ACTC_c": {"2023": 0}}, "eitc": {"EITC_c": {"2023": [0, 0, 0, 0]}}, "social_security_partial_taxability": {"SS_all_in_agi": {"2023": True}}, @@ -68,6 +18,7 @@ def add_taxcalc_outputs( "qbid": {"PT_qbid_rt": {"2023": 0}}, "salt": {"ID_AllTaxes_hc": {"2023": 1}}, } +TAX_EXPENDITURE_PATH = STORAGE_FOLDER / "output" / "tax_expenditures" def get_tax_expenditure_results( @@ -77,6 +28,10 @@ def get_tax_expenditure_results( weights_file_path: pathlib.Path, growfactors_file_path: pathlib.Path, ) -> dict: + """ + Returns a dictionary containing tax expenditure estimates and + writes estimates in the TAX_EXPENDITURE_PATH file. + """ assert input_data_year == TAXYEAR assert simulation_year in [2023, 2026] baseline = add_taxcalc_outputs( @@ -91,8 +46,8 @@ def get_tax_expenditure_results( itax_baseline = (baseline.iitax * baseline.s006).sum() / 1e9 itax_baseline_refcredits = (baseline.refund * baseline.s006).sum() / 1e9 - te_results = {} - for reform_name, reform in te_reforms.items(): + taxexp_results = {} + for reform_name, reform in TAX_EXPENDITURE_REFORMS.items(): reform_results = add_taxcalc_outputs( flat_file, input_data_year, @@ -105,22 +60,21 @@ def get_tax_expenditure_results( reform_results.iitax * reform_results.s006 ).sum() / 1e9 revenue_effect = itax_baseline - tax_revenue_reform - te_results[reform_name] = round(-revenue_effect, 1) + taxexp_results[reform_name] = round(-revenue_effect, 1) - taxexp_path = STORAGE_FOLDER / "output" / "tax_expenditures" if simulation_year == 2023: open_mode = "w" else: open_mode = "a" year = simulation_year - with open(taxexp_path, open_mode, encoding="utf-8") as tefile: + with open(TAX_EXPENDITURE_PATH, open_mode, encoding="utf-8") as tefile: res = f"YR,KIND,EST= {year} paytax {ptax_baseline:.1f}\n" tefile.write(res) omb_itax_revenue = itax_baseline + itax_baseline_refcredits res = f"YR,KIND,EST= {year} iitax {omb_itax_revenue:.1f}\n" tefile.write(res) - for reform, estimate in te_results.items(): + for reform, estimate in taxexp_results.items(): res = f"YR,KIND,EST= {year} {reform} {estimate}\n" tefile.write(res) - return te_results + return taxexp_results diff --git a/tmd/utils/taxcalc_output.py b/tmd/utils/taxcalc_output.py new file mode 100644 index 00000000..636cc995 --- /dev/null +++ b/tmd/utils/taxcalc_output.py @@ -0,0 +1,59 @@ +""" +This module provides a function that adds Tax-Calculator output variables to +a Tax-Calculator input DataFrame. +""" + +import pathlib +import numpy as np +import pandas as pd +import taxcalc +from tmd.imputation_assumptions import CREDIT_CLAIMING + + +def add_taxcalc_outputs( + flat_file: pd.DataFrame, + input_data_year: int, + simulation_year: int, + reform: dict = None, + weights=None, + growfactors=None, +) -> pd.DataFrame: + """ + Run a flat file through Tax-Calculator. + + Args: + flat_file (pd.DataFrame): The flat file to run through Tax-Calculator. + time_period (int): The year to run the simulation for. + reform (dict, optional): The reform to apply. Defaults to None. + + Returns: + pd.DataFrame: The Tax-Calculator output. + """ + if isinstance(weights, pathlib.PosixPath): + wghts = str(weights) + else: + wghts = weights + if isinstance(growfactors, pathlib.PosixPath): + growf = taxcalc.GrowFactors(growfactors_filename=str(growfactors)) + else: + growf = growfactors + rec = taxcalc.Records( + data=flat_file, + start_year=input_data_year, + gfactors=growf, + weights=wghts, + adjust_ratios=None, + exact_calculations=True, + weights_scale=1.0, + ) + pol = taxcalc.Policy() + pol.implement_reform(CREDIT_CLAIMING) + if reform: + pol.implement_reform(reform) + simulation = taxcalc.Calculator(records=rec, policy=pol) + simulation.advance_to_year(simulation_year) + simulation.calc_all() + output = simulation.dataframe(None, all_vars=True) + if weights is None and growfactors is None: + assert np.allclose(output.s006, flat_file.s006) + return output From 1cb277f8333ce29660f772c9cd46385f4ab8546b Mon Sep 17 00:00:00 2001 From: martinholmer Date: Fri, 20 Mar 2026 12:13:08 -0400 Subject: [PATCH 4/6] Remove unused tmd/storage/output/__init__.py file --- tmd/storage/output/__init__.py | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 tmd/storage/output/__init__.py diff --git a/tmd/storage/output/__init__.py b/tmd/storage/output/__init__.py deleted file mode 100644 index e98acc5b..00000000 --- a/tmd/storage/output/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from pathlib import Path - -output = Path(__file__).parent From 19bc47f854a79fe8ef00b39ec056282c6ef883b0 Mon Sep 17 00:00:00 2001 From: martinholmer Date: Fri, 20 Mar 2026 14:04:53 -0400 Subject: [PATCH 5/6] Remove several unneeded empty __init__.py files --- tests/__init__.py | 0 tmd/areas/prepare/__init__.py | 10 ---------- tmd/datasets/__init__.py | 4 ---- tmd/national_targets/config/__init__.py | 0 4 files changed, 14 deletions(-) delete mode 100644 tests/__init__.py delete mode 100644 tmd/areas/prepare/__init__.py delete mode 100644 tmd/datasets/__init__.py delete mode 100644 tmd/national_targets/config/__init__.py diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tmd/areas/prepare/__init__.py b/tmd/areas/prepare/__init__.py deleted file mode 100644 index 6d3df776..00000000 --- a/tmd/areas/prepare/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -""" -Area data preparation package. - -Converts IRS SOI data into area-specific targets for states and -Congressional Districts, replacing the R/Quarto pipeline. -""" - -from pathlib import Path - -PREPARE_FOLDER = Path(__file__).parent diff --git a/tmd/datasets/__init__.py b/tmd/datasets/__init__.py deleted file mode 100644 index 1886edf9..00000000 --- a/tmd/datasets/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .puf import * -from .cps import * -from .tmd import * -from .uprate_puf import * diff --git a/tmd/national_targets/config/__init__.py b/tmd/national_targets/config/__init__.py deleted file mode 100644 index e69de29b..00000000 From 14ff463b251514836a04095b5b240b8b665eecf4 Mon Sep 17 00:00:00 2001 From: martinholmer Date: Fri, 20 Mar 2026 14:12:52 -0400 Subject: [PATCH 6/6] Simplify top-level .gitignore file --- .gitignore | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 5f0263c1..9d0fc5a2 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,6 @@ **/*.pyc **/*.egg-info **/_build/ -**/*tfevents* tmd/storage/output/cached_files tmd/storage/output/tax_expenditures !tmd/storage/input/*.csv @@ -16,15 +15,8 @@ tmd/storage/output/tax_expenditures **demographics_2015.csv **puf_2015.csv *.DS_STORE -.Rproj.user -# Local Netlify folder -.netlify - -# Quarto output directories -**/.quarto/ - -# Claude AI assistant files +# Claude Code AI assistant files claude/ # local environment