Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 1 addition & 9 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
**/*.pyc
**/*.egg-info
**/_build/
**/*tfevents*
tmd/storage/output/cached_files
tmd/storage/output/tax_expenditures
!tmd/storage/input/*.csv
Expand All @@ -16,15 +15,8 @@ tmd/storage/output/tax_expenditures
**demographics_2015.csv
**puf_2015.csv
*.DS_STORE
.Rproj.user

# Local Netlify folder
.netlify

# Quarto output directories
**/.quarto/

# Claude AI assistant files
# Claude Code AI assistant files
claude/

# local environment
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
"scikit-learn",
"scipy",
"xlrd",
"jax",
"black>=26.1.0",
"pycodestyle>=2.14.0",
"pylint>=3.3.8",
Expand Down
Empty file removed tests/__init__.py
Empty file.
10 changes: 6 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pytest
import numpy as np
import pandas as pd
import taxcalc as tc
import taxcalc
from tmd.storage import STORAGE_FOLDER
from tmd.imputation_assumptions import TAXYEAR

Expand All @@ -18,13 +18,15 @@ def create_tmd_records(
data_path, weights_path, growfactors_path, exact_calculations=True
):
"""
Create tc.Records with start_year=TAXYEAR.
Create taxcalc.Records with start_year=TAXYEAR.
Bypasses tmd_constructor() which hardcodes start_year=2021.
"""
return tc.Records(
return taxcalc.Records(
data=pd.read_csv(data_path),
start_year=TAXYEAR,
gfactors=tc.GrowFactors(growfactors_filename=str(growfactors_path)),
gfactors=taxcalc.GrowFactors(
growfactors_filename=str(growfactors_path)
),
weights=pd.read_csv(weights_path),
adjust_ratios=None,
exact_calculations=exact_calculations,
Expand Down
10 changes: 5 additions & 5 deletions tests/test_imputed_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import numpy as np
import pytest
import taxcalc as tc
import taxcalc
from tmd.imputation_assumptions import TAXYEAR, CREDIT_CLAIMING
from tests.conftest import create_tmd_records

Expand Down Expand Up @@ -123,9 +123,9 @@ def actual_results(rdf, bdf):
growfactors_path=tmd_growfactors_path,
)
# create baseline_sim Calculator object for simyear and get its output
pol = tc.Policy()
pol = taxcalc.Policy()
pol.implement_reform(CREDIT_CLAIMING)
baseline_sim = tc.Calculator(policy=pol, records=recs)
baseline_sim = taxcalc.Calculator(policy=pol, records=recs)
baseline_sim.advance_to_year(simyear)
baseline_sim.calc_all()
bdf = baseline_sim.dataframe(output_variables)
Expand All @@ -140,10 +140,10 @@ def actual_results(rdf, bdf):
tolerance_scale = 1.0
for ded, info in deductions.items():
# create reform Calculator object for simyear
reform_policy = tc.Policy()
reform_policy = taxcalc.Policy()
reform_policy.implement_reform(CREDIT_CLAIMING)
reform_policy.implement_reform(info["reform_dict"])
reform_sim = tc.Calculator(policy=reform_policy, records=recs)
reform_sim = taxcalc.Calculator(policy=reform_policy, records=recs)
reform_sim.advance_to_year(simyear)
# get reform Calculator object's output
reform_sim.calc_all()
Expand Down
6 changes: 3 additions & 3 deletions tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""

import pytest
import taxcalc as tc
import taxcalc
from tmd.storage import STORAGE_FOLDER
from tmd.imputation_assumptions import TAXYEAR, CREDIT_CLAIMING
from tests.conftest import create_tmd_records
Expand Down Expand Up @@ -52,14 +52,14 @@ def compare(name, act, exp, tol):
), f"{name}:act,exp,tol= {act} {exp} {tol}"

# use national tmd files to compute various TAXYEAR income tax statistics
pol = tc.Policy()
pol = taxcalc.Policy()
pol.implement_reform(CREDIT_CLAIMING)
rec = create_tmd_records(
data_path=STORAGE_FOLDER / "output" / "tmd.csv.gz",
weights_path=STORAGE_FOLDER / "output" / "tmd_weights.csv.gz",
growfactors_path=STORAGE_FOLDER / "output" / "tmd_growfactors.csv",
)
sim = tc.Calculator(policy=pol, records=rec)
sim = taxcalc.Calculator(policy=pol, records=rec)
sim.advance_to_year(TAXYEAR)
sim.calc_all()
wght = sim.array("s006")
Expand Down
4 changes: 2 additions & 2 deletions tests/test_tax_expenditures.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
"""
Test 2023 tax expenditures calculated using tmd files
Test 2023 tax expenditures calculated using TMD files
against expected tax expenditure values in the tests folder.
"""

import pytest
import numpy as np
from tmd.storage import STORAGE_FOLDER
from tmd.imputation_assumptions import TAXYEAR
from tmd.utils.taxcalc_utils import get_tax_expenditure_results
from tmd.utils.tax_expenditures import get_tax_expenditure_results


@pytest.mark.taxexp
Expand Down
10 changes: 5 additions & 5 deletions tests/test_tax_revenue.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import yaml
import numpy as np
import pytest
import taxcalc as tc
import taxcalc
from tmd.imputation_assumptions import TAXYEAR, CREDIT_CLAIMING

FIRST_CYR = 2023
Expand Down Expand Up @@ -45,11 +45,11 @@ def test_tax_revenue(
exp_itax[year] = round(fy2cy(fy_itax[year], fy_itax[year + 1]), 3)
exp_ptax[year] = round(fy2cy(fy_ptax[year], fy_ptax[year + 1]), 3)
# calculate actual tax revenues for each calendar year
pol = tc.Policy()
pol = taxcalc.Policy()
pol.implement_reform(CREDIT_CLAIMING)
wghts = str(tmd_weights_path)
growf = tc.GrowFactors(growfactors_filename=str(tmd_growfactors_path))
input_data = tc.Records(
growf = taxcalc.GrowFactors(growfactors_filename=str(tmd_growfactors_path))
input_data = taxcalc.Records(
data=tmd_variables,
start_year=TAXYEAR,
weights=wghts,
Expand All @@ -58,7 +58,7 @@ def test_tax_revenue(
exact_calculations=True,
weights_scale=1.0,
)
sim = tc.Calculator(policy=pol, records=input_data)
sim = taxcalc.Calculator(policy=pol, records=input_data)
act_itax = {}
act_ptax = {}
for year in range(FIRST_CYR, LAST_CYR + 1):
Expand Down
10 changes: 0 additions & 10 deletions tmd/areas/prepare/__init__.py

This file was deleted.

10 changes: 5 additions & 5 deletions tmd/create_taxcalc_cached_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import numpy as np
import pandas as pd
import taxcalc as tc
import taxcalc
from tmd.storage import STORAGE_FOLDER, CACHED_TAXCALC_VARIABLES
from tmd.imputation_assumptions import TAXYEAR, CREDIT_CLAIMING

Expand All @@ -21,18 +21,18 @@ def create_cached_files():
# calculate all Tax-Calculator variables for TAXYEAR
# Construct Records directly (bypassing tmd_constructor which
# hardcodes start_year=2021 in the taxcalc library).
pol = tc.Policy()
pol = taxcalc.Policy()
pol.implement_reform(CREDIT_CLAIMING)
rec = tc.Records(
rec = taxcalc.Records(
data=pd.read_csv(INFILE_PATH),
start_year=TAXYEAR,
gfactors=tc.GrowFactors(growfactors_filename=str(GFFILE_PATH)),
gfactors=taxcalc.GrowFactors(growfactors_filename=str(GFFILE_PATH)),
weights=pd.read_csv(WTFILE_PATH),
adjust_ratios=None,
exact_calculations=True,
weights_scale=1.0,
)
calc = tc.Calculator(policy=pol, records=rec)
calc = taxcalc.Calculator(policy=pol, records=rec)
calc.advance_to_year(TAXYEAR)
calc.calc_all()

Expand Down
4 changes: 2 additions & 2 deletions tmd/create_taxcalc_input_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Construct tmd.csv, a Tax-Calculator-style input variable file for TAXYEAR.
"""

import taxcalc as tc
import taxcalc
from tmd.datasets.tmd import create_tmd_dataframe
from tmd.imputation_assumptions import (
TAXYEAR,
Expand Down Expand Up @@ -41,7 +41,7 @@ def create_variable_file(write_file=True):
vdf.to_csv(fname, index=False)
# streamline dataframe so that it includes only input variables
print("Removing output variables from PUF+CPS dataframe...")
rec = tc.Records(
rec = taxcalc.Records(
data=vdf,
start_year=TAXYEAR,
gfactors=None,
Expand Down
4 changes: 0 additions & 4 deletions tmd/datasets/__init__.py

This file was deleted.

10 changes: 5 additions & 5 deletions tmd/datasets/cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@
"WKSWORK",
]

TC_CPS_AGED_RNG = np.random.default_rng(seed=374651932)
TAXCALC_CPS_AGED_RNG = np.random.default_rng(seed=374651932)

CPS_URL_BY_YEAR = {
2018: (
Expand Down Expand Up @@ -305,7 +305,7 @@ def _derive_age(person: pd.DataFrame) -> np.ndarray:
"""
return np.where(
person.A_AGE == 80,
TC_CPS_AGED_RNG.integers(
TAXCALC_CPS_AGED_RNG.integers(
low=80, high=85, endpoint=False, size=len(person)
),
person.A_AGE.values,
Expand Down Expand Up @@ -373,7 +373,7 @@ def _is_tax_filer(tcdf: pd.DataFrame, taxyear: int) -> pd.Series:
return filer


def create_tc_cps(taxyear: int) -> (pd.DataFrame, pd.Series):
def create_taxcalc_cps(taxyear: int) -> (pd.DataFrame, pd.Series):
"""
Create a Tax-Calculator-compatible CPS DataFrame for the given taxyear
directly from the Census raw CPS data.
Expand Down Expand Up @@ -583,7 +583,7 @@ def map_spouse(values):
var["f2441"] = sum_all((age < 13) * dep_flag).astype(int)

# variables with no CPS source are set to zero
zero_tc_names = [
zero_taxcalc_names = [
"a_lineno",
"agi_bin",
"h_seq",
Expand Down Expand Up @@ -646,7 +646,7 @@ def map_spouse(values):
"e09700", # recapture of investment credit
"e09800", # unreported payroll tax
]
for tcname in zero_tc_names:
for tcname in zero_taxcalc_names:
if tcname not in var:
var[tcname] = zeros

Expand Down
6 changes: 3 additions & 3 deletions tmd/datasets/puf.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
return puf


def create_tc_puf(taxyear: int) -> pd.DataFrame:
def create_taxcalc_puf(taxyear: int) -> pd.DataFrame:
"""
Create a Tax-Calculator-compatible PUF DataFrame for
the given taxyear directly from raw PUF data.
Expand Down Expand Up @@ -309,7 +309,7 @@ def _decode_dep_ages(agerange_vals, rng):
# mapping from TC variable name to PE-named column in pre-processed PUF:
# for person-level variables, the tax-unit total is scaled by
# person_scale (= head_frac + (1-head_frac)*is_joint)
tc_to_pe = {
taxcalc_to_policyengine = {
"RECID": "household_id",
"S006": "household_weight",
"E03500": "alimony_expense",
Expand Down Expand Up @@ -413,7 +413,7 @@ def _decode_dep_ages(agerange_vals, rng):
# create dictionary that will be used to create Tax-Calculator DataFrame
NO_SCALE = {"RECID", "S006", "XTOT"}
var = {}
for tcname, pename in tc_to_pe.items():
for tcname, pename in taxcalc_to_policyengine.items():
if tcname in NO_SCALE:
var[tcname] = puf[pename].values
elif pename in PERSON_LEVEL_VARS:
Expand Down
18 changes: 9 additions & 9 deletions tmd/datasets/tmd.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
import numpy as np
import pandas as pd
from tmd.imputation_assumptions import TAXYEAR, CPS_WEIGHTS_SCALE
from tmd.datasets.puf import create_tc_puf
from tmd.datasets.cps import create_tc_cps
from tmd.utils.taxcalc_utils import add_taxcalc_outputs
from tmd.datasets.puf import create_taxcalc_puf
from tmd.datasets.cps import create_taxcalc_cps
from tmd.utils.taxcalc_output import add_taxcalc_outputs
from tmd.utils.reweight import reweight


def create_tmd_dataframe(taxyear: int) -> pd.DataFrame:
"""
Create DataFrame for given taxyear containing PUF filers and CPS nonfilers.
"""
# always call create_tc_puf and create_tc_cps
# always call create_taxcalc_puf and create_taxcalc_cps
# (because imputation assumptions may have changed)
tc_puf = create_tc_puf(taxyear)
tc_cps, nonfiler = create_tc_cps(taxyear)
tc_cps = tc_cps[nonfiler].reset_index(drop=True)
taxcalc_puf = create_taxcalc_puf(taxyear)
taxcalc_cps, nonfiler = create_taxcalc_cps(taxyear)
taxcalc_cps = taxcalc_cps[nonfiler].reset_index(drop=True)

# scale CPS weights to get sensible combined population count
tc_cps["s006"] *= CPS_WEIGHTS_SCALE[TAXYEAR]
taxcalc_cps["s006"] *= CPS_WEIGHTS_SCALE[TAXYEAR]

print("Combining PUF filers and CPS nonfilers...")
combined = pd.concat([tc_puf, tc_cps], ignore_index=True)
combined = pd.concat([taxcalc_puf, taxcalc_cps], ignore_index=True)

# ensure RECID values are unique
combined["RECID"] = np.arange(1, len(combined) + 1, dtype=int)
Expand Down
2 changes: 1 addition & 1 deletion tmd/examination/2022/bootstrap_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
import pandas as pd

USAGE = "USAGE: python bootstrap_sampling.py tc_dump_output_csv_file_name\n"
USAGE = "USAGE: python bootstrap_sampling.py taxcalc_dump_output_csv_fname\n"

SS_FRAC = 1.00 # 0.1
SS_RNSEED = 902345678
Expand Down
Empty file.
3 changes: 0 additions & 3 deletions tmd/storage/output/__init__.py

This file was deleted.

Loading
Loading