diff --git a/.gitignore b/.gitignore index 5739c0a9..c4acb966 100644 --- a/.gitignore +++ b/.gitignore @@ -22,5 +22,10 @@ cpsmar*.sas cpsmar*.csv *.dat +<<<<<<< HEAD +# pickle files +cpsmar*.pkl +======= # pickle cps*.pkl +>>>>>>> master diff --git a/Makefile b/Makefile index 57e73a7d..aabea0a3 100644 --- a/Makefile +++ b/Makefile @@ -157,4 +157,4 @@ cps_stage2/cps_weights.csv.gz: cps_stage2/stage2.py \ gunzip cps_weights.csv.gz && gzip -n cps_weights.csv .PHONY=all -all: puf-files cps-files +all: puf-files cps-files \ No newline at end of file diff --git a/README.md b/README.md index 47121964..38484a9c 100644 --- a/README.md +++ b/README.md @@ -66,18 +66,8 @@ running the `make help` command. If you want more background on the make utility and makefiles, search for Internet links with the keywords `makefile` and `automate`. -Note that the stage2 linear program that generates the weights file for the PUF -is very long-running, taking five or more hours depending on your -computer's CPU speed. We are considering options for speeding up this -stage2 work, but for the time being you can execute `make puf-files` -and `make cps-files` in separate terminal windows to have the two -stage2 linear programs run in parallel. (If you try this parallel -execution approach, be sure to wait for the `make puf-files` job to -begin stage2 work before executing the `make cps-files` command in -the other terminal window. This is necessary because the CPS stage1 -work depends on output from PUF stage1.) If you are generating the -taxdata made files in an overnight run, then simply execute the `make -all` command. +The stage 2 linear programs that generate weights for the PUF and CPS files take about 20 minutes each to run, but if you'd like to slightly speed up the process, you can you can execute `make puf-files` and `make cps-files` in separate terminal windows to have both stage 2 linear programs run in parallel. In order for this parallel execution to run smoothly, please wait for `make puf-files` to begin stage 2 work before executing the `make cps-files` command in +the other terminal window. This is necessary because the CPS stage1 work depends on output from PUF stage1. If you are generating the taxdata made files in an overnight run, then simply execute the `make all` command. You can copy the made files to your local Tax-Calculator directory tree using the [`csvcopy.sh`](csvcopy.sh) bash script. Use the `dryrun` diff --git a/cps_stage2/cps_weights.csv.gz b/cps_stage2/cps_weights.csv.gz index 81775e30..d5ccef03 100644 Binary files a/cps_stage2/cps_weights.csv.gz and b/cps_stage2/cps_weights.csv.gz differ diff --git a/environment.yml b/environment.yml index 0ed33d9d..202ded06 100644 --- a/environment.yml +++ b/environment.yml @@ -9,11 +9,9 @@ dependencies: - bokeh>=0.12.3 - statsmodels - pytest -- pulp - tqdm - requests - lxml - xlrd -- pulp - paramtools -- cvxopt +- cvxopt \ No newline at end of file diff --git a/puf_stage1/growfactors.csv b/puf_stage1/growfactors.csv index e52ea4f9..6ec79f7e 100644 --- a/puf_stage1/growfactors.csv +++ b/puf_stage1/growfactors.csv @@ -5,6 +5,22 @@ YEAR,ATXPY,ASCHF,ABOOK,ACPIU,ACPIM,AWAGE,ASCHCI,ASCHCL,ASCHEI,ASCHEL,AINTS,ADIVS 2014,1.029476,0.931683,0.976566,1.015927,1.023917,1.039999,1.040616,1.030349,1.075978,0.991321,0.925886,1.17606,1.387522,1.004801,0.641103,0.970506,0.99257,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 2015,1.043858,0.508206,0.999544,1.001235,1.026485,1.042272,1.045643,1.045687,0.999528,0.999533,0.992874,1.157209,1.004704,1.015868,0.910908,1.052061,1.053858,1.023325,1.041528,1.019361,1.102667,1.007792,1.026748,1.132657,1.04693 2016,1.021978,1.071198,0.984833,1.012621,1.037807,1.020894,0.96353,0.96354,0.984898,0.984792,1.077538,1.003713,0.876884,1.005343,0.782145,1.030307,1.097065,1.011695,1.010367,0.993375,0.989722,1.002577,1.01625,0.828168,1.105413 +<<<<<<< HEAD +2017,1.051182,0.87479,0.95974,1.021273,1.025035,1.038246,1.130556,1.130608,0.959669,0.959698,1.087469,1.164603,1.347953,1.004845,0.996884,1.05975,1.011911,1.030968,1.014601,0.981621,1.0,0.998715,1.063959,1.0,1.0 +2018,1.047195,0.708285,1.025818,1.0245,1.019707,1.041942,1.046585,1.04658,1.025915,1.025969,1.088529,1.077556,1.097881,1.018763,0.922626,1.055622,1.103035,1.045097,1.045897,1.005738,1.0,1.002574,1.034828,1.0,1.0 +2019,1.037023,1.112441,0.995933,1.018271,1.028328,1.040677,1.03418,1.034223,0.995906,0.995858,1.004144,1.029629,1.066206,1.030112,0.940703,1.045296,1.054052,1.052158,1.045866,1.000751,1.0,1.002567,1.034809,1.0,1.0 +2020,1.030993,0.891236,1.06591,1.024277,1.026424,1.037765,1.03462,1.034545,1.065947,1.065908,0.989296,1.029928,0.973026,1.024743,1.011256,1.039108,0.996727,1.050763,1.046106,1.00255,1.0,1.003841,1.034974,1.0,1.0 +2021,1.034089,1.108265,1.039782,1.025247,0.918936,1.037075,1.027832,1.027853,1.039739,1.03973,1.001943,1.031554,0.988412,1.03027,1.069457,1.04208,1.03007,1.047248,1.047927,1.001796,1.0,1.002551,1.034869,1.0,1.0 +2022,1.033243,1.161086,1.030448,1.02563,1.014646,1.031631,1.028019,1.028038,1.030507,1.030534,1.017629,1.028054,0.987452,1.031659,1.147377,1.041198,1.030159,1.048769,1.047573,0.999851,1.0,1.002545,1.034942,1.0,1.0 +2023,1.033371,1.092271,1.037915,1.024745,1.014267,1.029323,1.027154,1.027122,1.037807,1.037878,1.041403,1.027787,0.974711,1.033024,1.250726,1.041224,1.030193,1.050822,1.048715,1.000448,1.0,1.003807,1.034968,1.0,1.0 +2024,1.033221,1.073036,1.037776,1.024068,1.014066,1.028635,1.025411,1.025408,1.037808,1.03774,1.054751,1.030749,0.991602,1.034781,1.105019,1.040931,1.030334,1.048426,1.051767,0.99776,1.0,1.002528,1.034951,1.0,1.0 +2025,1.033823,1.058105,1.027107,1.022879,1.013463,1.027846,1.027453,1.02747,1.027138,1.027167,1.062337,1.031274,1.001858,1.03308,1.017327,1.041332,1.030635,1.046248,1.052213,1.002245,1.0,1.003783,1.034897,1.0,1.0 +2026,1.03402,1.047526,1.029945,1.022672,1.013445,1.02811,1.029094,1.02913,1.029958,1.029917,1.055739,1.028999,1.011804,1.034513,1.05729,1.04139,1.030633,1.072236,1.0,0.999552,1.0,1.002513,1.034808,1.0,1.0 +2027,1.034226,1.049046,1.032796,1.022467,1.013505,1.030248,1.030839,1.030845,1.032764,1.032815,1.049357,1.028774,1.019382,1.036611,1.032219,1.041525,1.030788,1.0,1.0,1.0,1.0,1.002506,1.034863,1.0,1.0 +2028,1.034968,1.051975,1.027639,1.022337,1.013482,1.030979,1.030428,1.03042,1.027638,1.027638,1.045504,1.030823,1.022654,1.039477,1.032868,1.042147,1.030942,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 +2029,1.034878,1.056474,1.033694,1.022418,1.013766,1.029667,1.030366,1.030306,1.033722,1.033671,1.041334,1.033131,1.024828,1.041059,1.037775,1.041809,1.031131,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 +2030,1.03551,1.056131,1.038349,1.022344,1.013732,1.028376,1.030533,1.030545,1.038318,1.038387,1.038443,1.037577,1.027551,1.043582,1.02912,1.042295,1.03133,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 +======= 2017,1.051182,0.87479,0.95974,1.021273,1.025035,1.038246,1.130556,1.130608,0.959669,0.959698,1.087469,1.164603,1.348396,1.004845,0.996884,1.05975,1.011911,1.030968,1.014601,0.981621,1.0,0.998715,1.063959,1.0,1.0 2018,1.047195,0.708285,1.025818,1.0245,1.019707,1.041942,1.046585,1.04658,1.025915,1.025969,1.088529,1.077556,1.097521,1.018763,0.922626,1.055622,1.103035,1.045097,1.045897,1.005738,1.0,1.002574,1.034828,1.0,1.0 2019,1.036067,1.166962,0.992197,1.017913,1.028328,1.038517,1.033164,1.033156,0.992188,0.99211,1.002592,1.02832,1.065996,1.030112,0.940703,1.044371,1.054052,1.052158,1.045866,1.000751,1.0,1.002567,1.034809,1.0,1.0 @@ -19,3 +35,4 @@ YEAR,ATXPY,ASCHF,ABOOK,ACPIU,ACPIM,AWAGE,ASCHCI,ASCHCL,ASCHEI,ASCHEL,AINTS,ADIVS 2028,1.042697,1.049305,1.020963,1.022187,1.013231,1.035014,1.035433,1.035424,1.02094,1.020997,1.099193,1.050983,1.022666,1.039477,1.032388,1.049924,1.030942,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 2029,1.041089,1.054253,1.015683,1.022446,1.013374,1.031071,1.035495,1.035509,1.015646,1.015698,1.098882,1.047456,1.025483,1.041059,1.038699,1.048055,1.031131,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 2030,1.040419,1.054653,1.019563,1.022171,1.01351,1.03003,1.035259,1.035284,1.019587,1.019514,1.098934,1.04469,1.027213,1.043582,1.027557,1.047229,1.03133,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 +>>>>>>> master diff --git a/puf_stage2/puf_weights.csv.gz b/puf_stage2/puf_weights.csv.gz index 3daee86d..21a88326 100644 Binary files a/puf_stage2/puf_weights.csv.gz and b/puf_stage2/puf_weights.csv.gz differ diff --git a/puf_stage2/solve_lp_for_year.py b/puf_stage2/solve_lp_for_year.py index f58c3fd5..15b0a3a9 100644 --- a/puf_stage2/solve_lp_for_year.py +++ b/puf_stage2/solve_lp_for_year.py @@ -1,5 +1,6 @@ import numpy as np -import pulp +import pandas as pd +import cvxopt def solve_lp_for_year(puf, Stage_I_factors, Stage_II_targets, year, tol): @@ -175,26 +176,32 @@ def solve_lp_for_year(puf, Stage_I_factors, Stage_II_targets, year, tol): b.append(m) print('Constructing LP Model') - LP = pulp.LpProblem('PUF Stage 2', pulp.LpMinimize) - r = pulp.LpVariable.dicts('r', puf.index, lowBound=0) - s = pulp.LpVariable.dicts('s', puf.index, lowBound=0) - # add objective functoin - LP += pulp.lpSum([r[i] + s[i] for i in puf.index]) - # add constraints - for i in puf.index: - LP += r[i] + s[i] <= tol - for i in range(len(b)): - LP += pulp.lpSum([(A1[i][j] * r[j] + A2[i][j] * s[j]) - for j in puf.index]) == b[i] - - print('Solving Model...') - pulp.LpSolverDefault.msg = 1 # ensure there is a command line output - LP.solve() - print(pulp.LpStatus[LP.status]) + + N = len(puf.index) + c = cvxopt.matrix(np.ones(2 * N).tolist()) + + # tolerance and non-negativity constraints + G_values = np.append(np.ones(2 * N), -np.ones(2 * N)).tolist() + G_row = np.concatenate((list(range(N)), list(range(N)), + [i + N for i in list(range(2 * N))])).tolist() + G_row = [int(i) for i in G_row] + G_col = np.concatenate((list(range(2 * N)), list(range(2 * N)))).tolist() + G_col = [int(i) for i in G_col] + + G = cvxopt.spmatrix(G_values, G_row, G_col) + h = cvxopt.matrix(np.append(tol * np.ones(N), np.zeros(2 * N)).tolist()) + + # targets + A = cvxopt.matrix(np.hstack([A1, A2])) + b = cvxopt.matrix(b) + + print("Solving model") + sol_cvxopt = cvxopt.solvers.lp(A=A, b=b, G=G, h=h, c=c, solver=None) # apply r and s values to the weights - r_val = np.array([r[i].varValue for i in r]) - s_val = np.array([s[i].varValue for i in s]) - z = (1. + r_val - s_val) * s006 * 100 + rs_val_cvxopt = np.array(sol_cvxopt["x"]).reshape((2 * N,)) + r_val_cvxopt = rs_val_cvxopt[:N] + s_val_cvxopt = rs_val_cvxopt[N:] + z = r_val_cvxopt - s_val_cvxopt - return z + return (1 + z) * s006 * 100 diff --git a/puf_stage2/stage2.py b/puf_stage2/stage2.py index afc35e1e..118692fc 100644 --- a/puf_stage2/stage2.py +++ b/puf_stage2/stage2.py @@ -28,7 +28,7 @@ year=2012, tol=0.40) z['WT2013'] = solve_lp_for_year(puf, Stage_I_factors, Stage_II_targets, year=2013, tol=0.38) -z['WT2014'] = solve_lp_for_year(puf, Stage_I_factors, Stage_II_targets, +z["WT2014"] = solve_lp_for_year(puf, Stage_I_factors, Stage_II_targets, year=2014, tol=0.35) z["WT2015"] = solve_lp_for_year(puf, Stage_I_factors, Stage_II_targets, year=2015, tol=0.33) diff --git a/puf_stage3/puf_ratios.csv b/puf_stage3/puf_ratios.csv index d10cfaf6..3d26ae74 100644 --- a/puf_stage3/puf_ratios.csv +++ b/puf_stage3/puf_ratios.csv @@ -2,11 +2,25 @@ agi_bin,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18 INT2011,1.0458,0.2514,0.9630,0.9868,0.9917,0.9648,0.8552,0.9171,0.9901,0.9417,0.9523,0.9595,0.9681,0.9694,0.9881,0.9889,0.9754,1.0319,1.7343 INT2012,0.7625,0.9203,0.8327,0.8123,0.6999,0.7927,0.7820,0.7830,0.8309,0.9046,0.9071,0.9411,1.0580,1.0261,0.9956,1.1221,1.0833,1.2296,1.3831 INT2013,1.1322,0.7703,0.7712,0.7713,0.8973,0.8754,0.9499,0.9019,0.8378,0.8660,0.9841,1.0330,1.0363,1.0533,1.0870,0.9633,1.0496,0.9578,1.1518 -INT2014,0.9134,0.8914,0.8577,0.7782,0.8494,0.8787,1.0042,0.8782,0.8477,1.0043,0.8412,1.0565,1.0196,0.9350,1.0651,1.1568,1.0693,1.1008,1.0531 -INT2015,1.0020,0.9996,0.9766,0.9759,0.9881,0.9844,0.9817,0.9863,0.9894,0.9948,0.9988,1.0147,1.0038,0.9925,0.9936,0.9878,1.0011,1.0187,1.0168 +INT2014,0.9134,0.8914,0.8577,0.7782,0.8488,0.8794,1.0042,0.8782,0.8477,1.0043,0.8412,1.0565,1.0196,0.9350,1.0651,1.1568,1.0693,1.1008,1.0531 +INT2015,1.0020,0.9996,0.9766,0.9759,0.9888,0.9837,0.9817,0.9863,0.9894,0.9948,0.9988,1.0147,1.0038,0.9925,0.9936,0.9878,1.0011,1.0187,1.0168 INT2016,0.9974,1.0135,0.9762,0.9757,0.9749,0.9698,0.9777,0.9917,0.9927,0.9910,1.0061,1.0013,1.0020,1.0032,1.0151,0.9923,1.0088,1.0341,1.0090 INT2017,1.0004,0.9750,0.9604,0.9677,0.9748,0.9785,0.9763,0.9880,0.9693,0.9868,1.0011,1.0226,1.0088,0.9999,0.9898,0.9853,0.9941,1.0106,1.0538 INT2018,0.9884,0.9976,0.9722,0.9719,0.9746,0.9780,0.9795,0.9811,0.9904,0.9882,1.0096,1.0087,1.0029,1.0017,1.0046,0.9873,1.0066,1.0232,1.0417 +<<<<<<< HEAD +INT2019,0.9914,1.0032,0.9806,0.9758,0.9806,0.9826,0.9815,0.9839,0.9919,1.0053,1.0055,1.0010,1.0031,0.9992,1.0020,0.9933,1.0090,1.0198,1.0228 +INT2020,1.0014,0.9944,0.9757,0.9774,0.9860,0.9812,0.9885,1.0031,0.9924,1.0052,1.0047,1.0043,0.9986,1.0008,0.9924,0.9898,1.0000,1.0080,1.0257 +INT2021,0.9933,0.9983,0.9795,0.9761,0.9830,0.9884,1.0126,1.0116,0.9873,1.0031,1.0077,0.9966,1.0014,1.0051,0.9975,0.9970,1.0117,1.0117,1.0013 +INT2022,1.0097,0.9912,0.9781,0.9895,0.9927,0.9785,0.9838,1.0030,1.0070,1.0191,0.9958,0.9882,1.0010,1.0058,0.9959,1.0044,1.0176,1.0067,0.9941 +INT2023,0.9974,0.9898,0.9827,0.9834,0.9773,0.9871,1.0134,0.9952,0.9921,0.9963,1.0038,1.0063,1.0044,0.9978,0.9929,0.9908,1.0028,1.0105,1.0229 +INT2024,0.9964,1.0070,0.9737,0.9861,0.9934,0.9878,0.9874,1.0006,1.0070,1.0086,0.9963,0.9936,1.0015,1.0101,0.9980,1.0036,1.0201,1.0091,0.9887 +INT2025,1.0072,0.9898,0.9813,0.9792,0.9794,0.9917,0.9853,0.9916,0.9948,0.9979,1.0066,1.0069,1.0040,0.9965,0.9945,0.9907,0.9997,1.0143,1.0219 +INT2026,0.9987,0.9963,0.9724,0.9765,0.9910,0.9931,1.0040,1.0088,1.0060,1.0010,0.9970,0.9947,1.0046,1.0057,1.0102,1.0075,1.0087,1.0081,0.9962 +INT2027,1.0051,1.0015,0.9775,0.9821,0.9895,0.9908,0.9937,0.9924,1.0058,0.9979,0.9971,1.0011,1.0023,0.9983,1.0084,1.0006,1.0029,1.0101,1.0138 +INT2028,1.0109,0.9912,0.9769,0.9789,0.9858,0.9932,1.0016,0.9999,1.0102,0.9942,0.9973,0.9970,1.0029,1.0093,1.0099,1.0093,1.0099,1.0058,0.9952 +INT2029,1.0148,0.9934,0.9736,0.9811,0.9789,0.9973,0.9950,1.0024,1.0060,0.9983,0.9937,0.9945,1.0052,1.0115,1.0071,1.0099,1.0110,1.0041,0.9949 +INT2030,1.0010,0.9908,0.9863,0.9852,0.9914,0.9965,1.0064,0.9982,1.0006,0.9971,0.9921,0.9969,1.0018,1.0092,1.0114,1.0138,1.0094,1.0119,1.0022 +======= INT2019,0.9913,1.0031,0.9805,0.9758,0.9806,0.9825,0.9814,0.9843,0.9918,1.0057,1.0054,1.0010,1.0030,0.9991,1.0019,0.9932,1.0089,1.0197,1.0228 INT2020,1.0015,0.9945,0.9756,0.9775,0.9861,0.9813,0.9886,1.0027,0.9925,1.0048,1.0048,1.0043,0.9985,1.0009,0.9924,0.9899,1.0004,1.0080,1.0256 INT2021,0.9934,0.9984,0.9797,0.9756,0.9830,0.9885,1.0127,1.0116,0.9874,1.0031,1.0077,0.9966,1.0015,1.0052,0.9977,0.9970,1.0112,1.0118,1.0014 @@ -19,3 +33,4 @@ INT2027,1.0048,1.0024,0.9775,0.9821,0.9895,0.9908,0.9937,0.9923,1.0049,0.9979,0. INT2028,1.0109,0.9904,0.9769,0.9787,0.9859,0.9925,1.0017,1.0001,1.0107,0.9943,0.9972,0.9971,1.0029,1.0094,1.0100,1.0094,1.0099,1.0059,0.9953 INT2029,1.0149,0.9935,0.9736,0.9811,0.9789,0.9982,0.9948,1.0023,1.0060,0.9983,0.9935,0.9946,1.0053,1.0115,1.0069,1.0099,1.0110,1.0041,0.9949 INT2030,1.0006,0.9908,0.9862,0.9851,0.9914,0.9983,1.0066,0.9981,1.0005,0.9970,0.9923,0.9969,1.0017,1.0091,1.0116,1.0137,1.0093,1.0118,1.0022 +>>>>>>> master diff --git a/tests/puf_agg_expected.txt b/tests/puf_agg_expected.txt index e099d781..9e46bfca 100644 --- a/tests/puf_agg_expected.txt +++ b/tests/puf_agg_expected.txt @@ -56,7 +56,7 @@ e11200 16313284 0 28800 e17500 275218715 0 772900 e18400 6056026500 0 15160000 e18500 1232988392 0 578400 -e19200 1951888780 0 5127000 +e19200 1951888779 0 5127000 e19800 2461075094 0 30100000 e20100 1093183833 0 29580000 e20400 1486854771 0 10850000 @@ -87,4 +87,4 @@ p22250 -603210228 -124900000 39410000 p23250 23317338986 -28160000 91220000 pencon_p 442334767 0 16500 pencon_s 273078831 0 16500 -s006 16355017483 1 1043269 +s006 16355017483 1 1043269 \ No newline at end of file