diff --git a/taxcalc.egg-info/PKG-INFO b/taxcalc.egg-info/PKG-INFO index 8667eb489..b93fe2de9 100644 --- a/taxcalc.egg-info/PKG-INFO +++ b/taxcalc.egg-info/PKG-INFO @@ -18,6 +18,13 @@ Classifier: Programming Language :: Python :: 3.12 Classifier: Topic :: Software Development :: Libraries :: Python Modules Description-Content-Type: text/markdown License-File: LICENSE +Requires-Dist: setuptools +Requires-Dist: numpy +Requires-Dist: pandas +Requires-Dist: bokeh +Requires-Dist: numba +Requires-Dist: requests +Requires-Dist: paramtools>=0.18.3 | | | | --- | --- | diff --git a/taxcalc/__init__.py b/taxcalc/__init__.py index 2d4875ac2..01a5efa20 100644 --- a/taxcalc/__init__.py +++ b/taxcalc/__init__.py @@ -14,6 +14,6 @@ from taxcalc.utils import * from taxcalc.cli import * -__version__ = '4.3.0' +__version__ = '4.3.0e' __min_python3_version__ = 10 __max_python3_version__ = 12 diff --git a/taxcalc/data.py b/taxcalc/data.py index 0b861ffea..3f2ccf920 100644 --- a/taxcalc/data.py +++ b/taxcalc/data.py @@ -42,6 +42,13 @@ class Data(): NOTE: when using custom weights, set this argument to a DataFrame. NOTE: assumes weights are integers that are 100 times the real weights. + weights_scale: float + specifies the weights scaling factor used to convert contents + of weights file into the s006 variable. PUF and CPS input data + generated in the taxdata repository use a weights_scale of 0.01, + while TMD input data generated in the tax-microdata repository + use a 1.0 weights_scale value. + Raises ------ ValueError: @@ -66,7 +73,8 @@ class instance: Data VARINFO_FILE_NAME = None VARINFO_FILE_PATH = None - def __init__(self, data, start_year, gfactors=None, weights=None): + def __init__(self, data, start_year, gfactors=None, + weights=None, weights_scale=0.01): # initialize data variable info sets and read variable information self.INTEGER_READ_VARS = set() self.MUST_READ_VARS = set() @@ -97,6 +105,7 @@ def __init__(self, data, start_year, gfactors=None, weights=None): self.gfactors = gfactors # read sample weights self.WT = None + self.weights_scale = weights_scale if self.__aging_data: self._read_weights(weights) # ... weights must be same size as data @@ -114,7 +123,7 @@ def __init__(self, data, start_year, gfactors=None, weights=None): assert wt_colname in self.WT.columns, ( f'no weights for start year {self.current_year}' ) - self.s006 = self.WT[wt_colname] * 0.01 + self.s006 = self.WT[wt_colname] * self.weights_scale @property def data_year(self): @@ -152,7 +161,7 @@ def increment_year(self): assert wt_colname in self.WT.columns, ( f'no weights for new year {self.current_year}' ) - self.s006 = self.WT[wt_colname] * 0.01 + self.s006 = self.WT[wt_colname] * self.weights_scale # ----- begin private methods of Data class ----- @@ -260,7 +269,6 @@ def _read_weights(self, weights): Read sample weights from file or use specified DataFrame as weights or create empty DataFrame if None. - NOTE: assumes weights are integers equal to 100 times the real weight. """ if weights is None: return @@ -276,7 +284,7 @@ def _read_weights(self, weights): msg = 'weights is not None or a string or a Pandas DataFrame' raise ValueError(msg) assert isinstance(WT, pd.DataFrame) - setattr(self, 'WT', WT.astype(np.int32)) + setattr(self, 'WT', WT.astype(np.float64)) del WT def _extrapolate(self, year): diff --git a/taxcalc/records.py b/taxcalc/records.py index dcebf687f..b67987bf1 100644 --- a/taxcalc/records.py +++ b/taxcalc/records.py @@ -53,7 +53,7 @@ class Records(Data): None creates empty sample-weights DataFrame; default value is filename of the PUF weights. NOTE: when using custom weights, set this argument to a DataFrame. - NOTE: assumes weights are integers that are 100 times the real weights. + NOTE: see weights_scale documentation below. adjust_ratios: string or Pandas DataFrame or None string describes CSV file in which adjustment ratios reside; @@ -69,6 +69,13 @@ class Records(Data): any smoothing of stair-step provisions in income tax law; default value is false. + weights_scale: float + specifies the weights scaling factor used to convert contents + of weights file into the s006 variable. PUF and CPS input data + generated in the taxdata repository use a weights_scale of 0.01, + while TMD input data generated in the tax-microdata repository + use a 1.0 weights_scale value. + Raises ------ ValueError: @@ -127,11 +134,12 @@ def __init__(self, gfactors=GrowFactors(), weights=PUF_WEIGHTS_FILENAME, adjust_ratios=PUF_RATIOS_FILENAME, - exact_calculations=False): + exact_calculations=False, + weights_scale=0.01): # pylint: disable=no-member,too-many-branches if isinstance(weights, str): weights = os.path.join(Records.CODE_PATH, weights) - super().__init__(data, start_year, gfactors, weights) + super().__init__(data, start_year, gfactors, weights, weights_scale) if data is None: return # because there are no data # read adjustment ratios @@ -228,7 +236,7 @@ def tmd_constructor( data_path: Path, weights_path: Path, growfactors_path: Path, - exact_calculations=False + exact_calculations=False, ): # pragma: no cover """ Static method returns a Records object instantiated with TMD @@ -250,6 +258,7 @@ def tmd_constructor( gfactors=GrowFactors(growfactors_filename=str(growfactors_path)), adjust_ratios=None, exact_calculations=exact_calculations, + weights_scale=1.0, ) def increment_year(self): diff --git a/taxcalc/taxcalcio.py b/taxcalc/taxcalcio.py index 708bb6184..d459f3686 100644 --- a/taxcalc/taxcalcio.py +++ b/taxcalc/taxcalcio.py @@ -355,7 +355,8 @@ def init(self, input_data, tax_year, baseline, reform, assump, weights=wghts, gfactors=gfactors_ref, adjust_ratios=None, - exact_calculations=exact_calculations + exact_calculations=exact_calculations, + weights_scale=1.0, ) recs_base = Records( data=pd.read_csv(input_data), @@ -363,7 +364,8 @@ def init(self, input_data, tax_year, baseline, reform, assump, weights=wghts, gfactors=gfactors_base, adjust_ratios=None, - exact_calculations=exact_calculations + exact_calculations=exact_calculations, + weights_scale=1.0, ) else: # if not {cps|tmd}_input_data but aging_input_data: puf recs = Records( @@ -548,8 +550,17 @@ def write_output_file(self, output_dump, dump_varset, outdf = self.minimal_output() column_order = outdf.columns assert len(outdf.index) == self.calc.array_len - outdf.to_csv(self._output_filename, columns=column_order, - index=False, float_format='%.2f') + if self.tmd_input_data: # pragma: no cover + if "s006" in outdf: + weights = outdf["s006"].round(5) + outdf = outdf.round(2) + if "s006" in outdf: + outdf["s006"] = weights + outdf.to_csv(self._output_filename, columns=column_order, + index=False) + else: + outdf.to_csv(self._output_filename, columns=column_order, + index=False, float_format='%.2f') del outdf gc.collect() @@ -786,8 +797,8 @@ def dump_output(self, calcx, dump_varset, mtr_inctax, mtr_paytax): vardata = calcx.array(varname) if varname in recs_vinfo.INTEGER_VARS: odf[varname] = vardata - else: - odf[varname] = vardata.round(2) # rounded to nearest cent + else: # specify precision that can handle small TMD area weights + odf[varname] = vardata.round(5) odf = odf.copy() # specify mtr values in percentage terms if 'mtr_inctax' in varset: diff --git a/taxcalc/tests/test_benefits.py b/taxcalc/tests/test_benefits.py index e53d07424..be0dbf7d8 100644 --- a/taxcalc/tests/test_benefits.py +++ b/taxcalc/tests/test_benefits.py @@ -77,9 +77,9 @@ def test_benefits(tests_path, cps_fullsample): if diffs: msg = 'CPS BENEFITS RESULTS DIFFER\n' msg += '-------------------------------------------------\n' - msg += '--- NEW RESULTS IN benefits_actual.txt FILE ---\n' - msg += '--- if new OK, copy benefits_actual.txt to ---\n' - msg += '--- benefits_expect.txt ---\n' + msg += '--- NEW RESULTS IN benefits_actual.csv FILE ---\n' + msg += '--- if new OK, copy benefits_actual.csv to ---\n' + msg += '--- benefits_expect.csv ---\n' msg += '--- and rerun test. ---\n' msg += '-------------------------------------------------\n' raise ValueError(msg)