From b20bf77b60f8bfd73dc5fb5a3aaa3aa011b2e385 Mon Sep 17 00:00:00 2001
From: "martin.holmer@gmail.com" <martin.holmer@gmail.com>
Date: Fri, 25 Oct 2024 13:48:49 -0400
Subject: [PATCH 1/7] Add weights_scale attribute to Records and Data classes

---
 taxcalc.egg-info/PKG-INFO |  7 +++++++
 taxcalc/data.py           |  8 +++++---
 taxcalc/records.py        | 11 ++++++++++-
 taxcalc/taxcalcio.py      |  6 ++++--
 4 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/taxcalc.egg-info/PKG-INFO b/taxcalc.egg-info/PKG-INFO
index 8667eb489..b93fe2de9 100644
--- a/taxcalc.egg-info/PKG-INFO
+++ b/taxcalc.egg-info/PKG-INFO
@@ -18,6 +18,13 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: setuptools
+Requires-Dist: numpy
+Requires-Dist: pandas
+Requires-Dist: bokeh
+Requires-Dist: numba
+Requires-Dist: requests
+Requires-Dist: paramtools>=0.18.3
 
 | | |
 | --- | --- |
diff --git a/taxcalc/data.py b/taxcalc/data.py
index 0b861ffea..c7a6743ec 100644
--- a/taxcalc/data.py
+++ b/taxcalc/data.py
@@ -66,7 +66,8 @@ class instance: Data
     VARINFO_FILE_NAME = None
     VARINFO_FILE_PATH = None
 
-    def __init__(self, data, start_year, gfactors=None, weights=None):
+    def __init__(self, data, start_year, gfactors=None,
+                 weights=None, weights_scale=0.01):
         # initialize data variable info sets and read variable information
         self.INTEGER_READ_VARS = set()
         self.MUST_READ_VARS = set()
@@ -97,6 +98,7 @@ def __init__(self, data, start_year, gfactors=None, weights=None):
             self.gfactors = gfactors
             # read sample weights
             self.WT = None
+            self.weights_scale = weights_scale
             if self.__aging_data:
                 self._read_weights(weights)
                 # ... weights must be same size as data
@@ -114,7 +116,7 @@ def __init__(self, data, start_year, gfactors=None, weights=None):
                 assert wt_colname in self.WT.columns, (
                     f'no weights for start year {self.current_year}'
                 )
-                self.s006 = self.WT[wt_colname] * 0.01
+                self.s006 = self.WT[wt_colname] * self.weights_scale
 
     @property
     def data_year(self):
@@ -152,7 +154,7 @@ def increment_year(self):
             assert wt_colname in self.WT.columns, (
                 f'no weights for new year {self.current_year}'
             )
-            self.s006 = self.WT[wt_colname] * 0.01
+            self.s006 = self.WT[wt_colname] * self.weights_scale
 
     # ----- begin private methods of Data class -----
 
diff --git a/taxcalc/records.py b/taxcalc/records.py
index dcebf687f..37149d02e 100644
--- a/taxcalc/records.py
+++ b/taxcalc/records.py
@@ -69,6 +69,13 @@ class Records(Data):
         any smoothing of stair-step provisions in income tax law;
         default value is false.
 
+    weights_scale: float
+        specifies the weights scaling factor used to convert contents
+        of weights file into the s006 variable.  PUF and CPS input data
+        generated in the taxdata repository use a weights_scale of 0.01,
+        while TMD input data generated in the tax-microdata repository
+        use a 1.0 weights_scale value.
+
     Raises
     ------
     ValueError:
@@ -127,7 +134,8 @@ def __init__(self,
                  gfactors=GrowFactors(),
                  weights=PUF_WEIGHTS_FILENAME,
                  adjust_ratios=PUF_RATIOS_FILENAME,
-                 exact_calculations=False):
+                 exact_calculations=False,
+                 weights_scale=0.01):
         # pylint: disable=no-member,too-many-branches
         if isinstance(weights, str):
             weights = os.path.join(Records.CODE_PATH, weights)
@@ -250,6 +258,7 @@ def tmd_constructor(
             gfactors=GrowFactors(growfactors_filename=str(growfactors_path)),
             adjust_ratios=None,
             exact_calculations=exact_calculations,
+            weights_scale=1.0,
         )
 
     def increment_year(self):
diff --git a/taxcalc/taxcalcio.py b/taxcalc/taxcalcio.py
index 708bb6184..19326cc26 100644
--- a/taxcalc/taxcalcio.py
+++ b/taxcalc/taxcalcio.py
@@ -355,7 +355,8 @@ def init(self, input_data, tax_year, baseline, reform, assump,
                     weights=wghts,
                     gfactors=gfactors_ref,
                     adjust_ratios=None,
-                    exact_calculations=exact_calculations
+                    exact_calculations=exact_calculations,
+                    weights_scale=1.0,
                 )
                 recs_base = Records(
                     data=pd.read_csv(input_data),
@@ -363,7 +364,8 @@ def init(self, input_data, tax_year, baseline, reform, assump,
                     weights=wghts,
                     gfactors=gfactors_base,
                     adjust_ratios=None,
-                    exact_calculations=exact_calculations
+                    exact_calculations=exact_calculations,
+                    weights_scale=1.0,
                 )
             else:  # if not {cps|tmd}_input_data but aging_input_data: puf
                 recs = Records(

From 27c62bd7545a3310e4a6d1b5893a3b2ebd8c479a Mon Sep 17 00:00:00 2001
From: "martin.holmer@gmail.com" <martin.holmer@gmail.com>
Date: Fri, 25 Oct 2024 14:27:27 -0400
Subject: [PATCH 2/7] Add weights_scale documentation

---
 taxcalc/__init__.py | 2 +-
 taxcalc/data.py     | 7 +++++++
 taxcalc/records.py  | 4 ++--
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/taxcalc/__init__.py b/taxcalc/__init__.py
index 2d4875ac2..e75e859d2 100644
--- a/taxcalc/__init__.py
+++ b/taxcalc/__init__.py
@@ -14,6 +14,6 @@
 from taxcalc.utils import *
 from taxcalc.cli import *
 
-__version__ = '4.3.0'
+__version__ = '4.3.0a'
 __min_python3_version__ = 10
 __max_python3_version__ = 12
diff --git a/taxcalc/data.py b/taxcalc/data.py
index c7a6743ec..ed356099b 100644
--- a/taxcalc/data.py
+++ b/taxcalc/data.py
@@ -42,6 +42,13 @@ class Data():
         NOTE: when using custom weights, set this argument to a DataFrame.
         NOTE: assumes weights are integers that are 100 times the real weights.
 
+    weights_scale: float
+        specifies the weights scaling factor used to convert contents
+        of weights file into the s006 variable.  PUF and CPS input data
+        generated in the taxdata repository use a weights_scale of 0.01,
+        while TMD input data generated in the tax-microdata repository
+        use a 1.0 weights_scale value.
+
     Raises
     ------
     ValueError:
diff --git a/taxcalc/records.py b/taxcalc/records.py
index 37149d02e..fde70df96 100644
--- a/taxcalc/records.py
+++ b/taxcalc/records.py
@@ -53,7 +53,7 @@ class Records(Data):
         None creates empty sample-weights DataFrame;
         default value is filename of the PUF weights.
         NOTE: when using custom weights, set this argument to a DataFrame.
-        NOTE: assumes weights are integers that are 100 times the real weights.
+        NOTE: see weights_scale documentation below.
 
     adjust_ratios: string or Pandas DataFrame or None
         string describes CSV file in which adjustment ratios reside;
@@ -139,7 +139,7 @@ def __init__(self,
         # pylint: disable=no-member,too-many-branches
         if isinstance(weights, str):
             weights = os.path.join(Records.CODE_PATH, weights)
-        super().__init__(data, start_year, gfactors, weights)
+        super().__init__(data, start_year, gfactors, weights, weights_scale)
         if data is None:
             return  # because there are no data
         # read adjustment ratios

From ccaeefd88f9e4cc0724cbb8e3f11778873f73def Mon Sep 17 00:00:00 2001
From: "martin.holmer@gmail.com" <martin.holmer@gmail.com>
Date: Fri, 25 Oct 2024 15:29:33 -0400
Subject: [PATCH 3/7] Change version to 4.3.0b

---
 taxcalc/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/taxcalc/__init__.py b/taxcalc/__init__.py
index e75e859d2..f346b94bd 100644
--- a/taxcalc/__init__.py
+++ b/taxcalc/__init__.py
@@ -14,6 +14,6 @@
 from taxcalc.utils import *
 from taxcalc.cli import *
 
-__version__ = '4.3.0a'
+__version__ = '4.3.0b'
 __min_python3_version__ = 10
 __max_python3_version__ = 12

From 533124b6e04aead609582031a366cbf055c34fa9 Mon Sep 17 00:00:00 2001
From: "martin.holmer@gmail.com" <martin.holmer@gmail.com>
Date: Fri, 25 Oct 2024 19:04:47 -0400
Subject: [PATCH 4/7] Non-substantive formatting change

---
 taxcalc/records.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/taxcalc/records.py b/taxcalc/records.py
index fde70df96..b67987bf1 100644
--- a/taxcalc/records.py
+++ b/taxcalc/records.py
@@ -236,7 +236,7 @@ def tmd_constructor(
             data_path: Path,
             weights_path: Path,
             growfactors_path: Path,
-            exact_calculations=False
+            exact_calculations=False,
     ):  # pragma: no cover
         """
         Static method returns a Records object instantiated with TMD

From eaffd02b2092b49869fa7cb9fb80d6b9ed113350 Mon Sep 17 00:00:00 2001
From: "martin.holmer@gmail.com" <martin.holmer@gmail.com>
Date: Sun, 27 Oct 2024 11:26:50 -0400
Subject: [PATCH 5/7] Store weights in Data class as floats (not integers)

---
 taxcalc/__init__.py                          | 2 +-
 taxcalc/data.py                              | 3 +--
 taxcalc/tests/benefits_expect.csv            | 2 +-
 taxcalc/tests/puf_var_wght_means_by_year.csv | 2 +-
 taxcalc/tests/test_benefits.py               | 6 +++---
 5 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/taxcalc/__init__.py b/taxcalc/__init__.py
index f346b94bd..01a5efa20 100644
--- a/taxcalc/__init__.py
+++ b/taxcalc/__init__.py
@@ -14,6 +14,6 @@
 from taxcalc.utils import *
 from taxcalc.cli import *
 
-__version__ = '4.3.0b'
+__version__ = '4.3.0e'
 __min_python3_version__ = 10
 __max_python3_version__ = 12
diff --git a/taxcalc/data.py b/taxcalc/data.py
index ed356099b..79523db93 100644
--- a/taxcalc/data.py
+++ b/taxcalc/data.py
@@ -269,7 +269,6 @@ def _read_weights(self, weights):
         Read sample weights from file or
         use specified DataFrame as weights or
         create empty DataFrame if None.
-        NOTE: assumes weights are integers equal to 100 times the real weight.
         """
         if weights is None:
             return
@@ -285,7 +284,7 @@ def _read_weights(self, weights):
             msg = 'weights is not None or a string or a Pandas DataFrame'
             raise ValueError(msg)
         assert isinstance(WT, pd.DataFrame)
-        setattr(self, 'WT', WT.astype(np.int32))
+        setattr(self, 'WT', WT.astype(np.float32))
         del WT
 
     def _extrapolate(self, year):
diff --git a/taxcalc/tests/benefits_expect.csv b/taxcalc/tests/benefits_expect.csv
index 1da97e273..357a470b0 100644
--- a/taxcalc/tests/benefits_expect.csv
+++ b/taxcalc/tests/benefits_expect.csv
@@ -141,7 +141,7 @@ year,bname,benamt,bencnt,benavg
 2031,snap,158.784,81.873,1.9
 2031,wic,5.313,23.047,0.2
 2031,tanf,36.49,9.639,3.8
-2031,vet,247.791,12.699,19.5
+2031,vet,247.79,12.699,19.5
 2031,housing,83.299,14.327,5.8
 2032,ssi,79.862,17.902,4.5
 2032,mcare,1553.523,95.617,16.2
diff --git a/taxcalc/tests/puf_var_wght_means_by_year.csv b/taxcalc/tests/puf_var_wght_means_by_year.csv
index 4f4d61834..e89c64d49 100644
--- a/taxcalc/tests/puf_var_wght_means_by_year.csv
+++ b/taxcalc/tests/puf_var_wght_means_by_year.csv
@@ -3,7 +3,7 @@ c00100,Federal AGI,   50840,   54457,   54942,   55071,   58024,   60318,   6159
 c02500,OASDI benefits in AGI,    1295,    1394,    1416,    1479,    1501,    1587,    1690,    1832,    2005,    2129,    2428,    2659,    2844,    3028,    3214,    3413,    3619,    3833,    4055,    4308,    4459,    4698
 c04470,Post-phase-out itemized deduction,    5908,    6037,    6150,    6383,    6563,    6803,    7011,    7493,    7881,    8018,    8324,    8672,    8950,    9288,    9614,    9940,   10266,   10611,   10970,   11450,   11641,   12004
 c04600,Post-phase-out personal exemption,    7105,    7131,    7163,    7217,    7149,    7247,    7380,    7471,    7481,    7806,    8373,    8834,    9054,    9248,    9434,    9612,    9797,    9988,   10186,   10398,   10579,   10791
-c04800,Federal regular taxable income,   35753,   39297,   39663,   39591,   42568,   44523,   45416,   46476,   55959,   54603,   57321,   58385,   60072,   61575,   63147,   64911,   66831,   68828,   70916,   73686,   74835,   76970
+c04800,Federal regular taxable income,   35753,   39297,   39663,   39591,   42568,   44523,   45416,   46475,   55959,   54603,   57321,   58385,   60072,   61575,   63147,   64911,   66831,   68828,   70916,   73686,   74835,   76970
 c05200,Regular tax on taxable income,    7671,    8731,    8725,    8584,    9499,   10001,   10138,   10475,   13361,   12576,   13165,   13230,   13561,   13847,   14165,   14544,   14965,   15398,   15840,   16404,   16816,   17278
 c07180,Child care credit,      17,      17,      17,      17,      17,      17,      17,      17,       0,      17,      17,      17,      17,      17,      17,      17,      17,      17,      17,      17,      16,      16
 c07220,Child tax credit (adjusted),     158,     155,     147,     143,     139,     134,     129,     124,     359,     116,     107,     101,      97,      94,      91,      88,      85,      82,      80,      77,      74,      72
diff --git a/taxcalc/tests/test_benefits.py b/taxcalc/tests/test_benefits.py
index e53d07424..be0dbf7d8 100644
--- a/taxcalc/tests/test_benefits.py
+++ b/taxcalc/tests/test_benefits.py
@@ -77,9 +77,9 @@ def test_benefits(tests_path, cps_fullsample):
     if diffs:
         msg = 'CPS BENEFITS RESULTS DIFFER\n'
         msg += '-------------------------------------------------\n'
-        msg += '--- NEW RESULTS IN benefits_actual.txt FILE   ---\n'
-        msg += '--- if new OK, copy benefits_actual.txt to    ---\n'
-        msg += '---                 benefits_expect.txt       ---\n'
+        msg += '--- NEW RESULTS IN benefits_actual.csv FILE   ---\n'
+        msg += '--- if new OK, copy benefits_actual.csv to    ---\n'
+        msg += '---                 benefits_expect.csv       ---\n'
         msg += '---            and rerun test.                ---\n'
         msg += '-------------------------------------------------\n'
         raise ValueError(msg)

From 9518205deb8ef7ec1e6dfb87dafe0071e57fac28 Mon Sep 17 00:00:00 2001
From: "martin.holmer@gmail.com" <martin.holmer@gmail.com>
Date: Sun, 27 Oct 2024 15:15:49 -0400
Subject: [PATCH 6/7] Make CLI dump output contain high-precision weights when
 using TMD input data

---
 taxcalc/data.py                              |  2 +-
 taxcalc/taxcalcio.py                         | 17 +++++++++++++----
 taxcalc/tests/benefits_expect.csv            |  2 +-
 taxcalc/tests/puf_var_wght_means_by_year.csv |  2 +-
 4 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/taxcalc/data.py b/taxcalc/data.py
index 79523db93..3f2ccf920 100644
--- a/taxcalc/data.py
+++ b/taxcalc/data.py
@@ -284,7 +284,7 @@ def _read_weights(self, weights):
             msg = 'weights is not None or a string or a Pandas DataFrame'
             raise ValueError(msg)
         assert isinstance(WT, pd.DataFrame)
-        setattr(self, 'WT', WT.astype(np.float32))
+        setattr(self, 'WT', WT.astype(np.float64))
         del WT
 
     def _extrapolate(self, year):
diff --git a/taxcalc/taxcalcio.py b/taxcalc/taxcalcio.py
index 19326cc26..a070f1be4 100644
--- a/taxcalc/taxcalcio.py
+++ b/taxcalc/taxcalcio.py
@@ -550,8 +550,17 @@ def write_output_file(self, output_dump, dump_varset,
             outdf = self.minimal_output()
             column_order = outdf.columns
         assert len(outdf.index) == self.calc.array_len
-        outdf.to_csv(self._output_filename, columns=column_order,
-                     index=False, float_format='%.2f')
+        if self.tmd_input_data:
+            if "s006" in outdf:
+                weights = outdf["s006"].round(5)
+            outdf = outdf.round(2)
+            if "s006" in outdf:
+                outdf["s006"] = weights
+            outdf.to_csv(self._output_filename, columns=column_order,
+                         index=False)
+        else:
+            outdf.to_csv(self._output_filename, columns=column_order,
+                         index=False, float_format='%.2f')
         del outdf
         gc.collect()
 
@@ -788,8 +797,8 @@ def dump_output(self, calcx, dump_varset, mtr_inctax, mtr_paytax):
             vardata = calcx.array(varname)
             if varname in recs_vinfo.INTEGER_VARS:
                 odf[varname] = vardata
-            else:
-                odf[varname] = vardata.round(2)  # rounded to nearest cent
+            else:  # specify precision that can handle small TMD area weights
+                odf[varname] = vardata.round(5)
             odf = odf.copy()
         # specify mtr values in percentage terms
         if 'mtr_inctax' in varset:
diff --git a/taxcalc/tests/benefits_expect.csv b/taxcalc/tests/benefits_expect.csv
index 357a470b0..1da97e273 100644
--- a/taxcalc/tests/benefits_expect.csv
+++ b/taxcalc/tests/benefits_expect.csv
@@ -141,7 +141,7 @@ year,bname,benamt,bencnt,benavg
 2031,snap,158.784,81.873,1.9
 2031,wic,5.313,23.047,0.2
 2031,tanf,36.49,9.639,3.8
-2031,vet,247.79,12.699,19.5
+2031,vet,247.791,12.699,19.5
 2031,housing,83.299,14.327,5.8
 2032,ssi,79.862,17.902,4.5
 2032,mcare,1553.523,95.617,16.2
diff --git a/taxcalc/tests/puf_var_wght_means_by_year.csv b/taxcalc/tests/puf_var_wght_means_by_year.csv
index e89c64d49..4f4d61834 100644
--- a/taxcalc/tests/puf_var_wght_means_by_year.csv
+++ b/taxcalc/tests/puf_var_wght_means_by_year.csv
@@ -3,7 +3,7 @@ c00100,Federal AGI,   50840,   54457,   54942,   55071,   58024,   60318,   6159
 c02500,OASDI benefits in AGI,    1295,    1394,    1416,    1479,    1501,    1587,    1690,    1832,    2005,    2129,    2428,    2659,    2844,    3028,    3214,    3413,    3619,    3833,    4055,    4308,    4459,    4698
 c04470,Post-phase-out itemized deduction,    5908,    6037,    6150,    6383,    6563,    6803,    7011,    7493,    7881,    8018,    8324,    8672,    8950,    9288,    9614,    9940,   10266,   10611,   10970,   11450,   11641,   12004
 c04600,Post-phase-out personal exemption,    7105,    7131,    7163,    7217,    7149,    7247,    7380,    7471,    7481,    7806,    8373,    8834,    9054,    9248,    9434,    9612,    9797,    9988,   10186,   10398,   10579,   10791
-c04800,Federal regular taxable income,   35753,   39297,   39663,   39591,   42568,   44523,   45416,   46475,   55959,   54603,   57321,   58385,   60072,   61575,   63147,   64911,   66831,   68828,   70916,   73686,   74835,   76970
+c04800,Federal regular taxable income,   35753,   39297,   39663,   39591,   42568,   44523,   45416,   46476,   55959,   54603,   57321,   58385,   60072,   61575,   63147,   64911,   66831,   68828,   70916,   73686,   74835,   76970
 c05200,Regular tax on taxable income,    7671,    8731,    8725,    8584,    9499,   10001,   10138,   10475,   13361,   12576,   13165,   13230,   13561,   13847,   14165,   14544,   14965,   15398,   15840,   16404,   16816,   17278
 c07180,Child care credit,      17,      17,      17,      17,      17,      17,      17,      17,       0,      17,      17,      17,      17,      17,      17,      17,      17,      17,      17,      17,      16,      16
 c07220,Child tax credit (adjusted),     158,     155,     147,     143,     139,     134,     129,     124,     359,     116,     107,     101,      97,      94,      91,      88,      85,      82,      80,      77,      74,      72

From 163d0408b758811f7270d9bd81ba0ebcf25d0dac Mon Sep 17 00:00:00 2001
From: "martin.holmer@gmail.com" <martin.holmer@gmail.com>
Date: Sun, 27 Oct 2024 15:45:39 -0400
Subject: [PATCH 7/7] Add no cover pragma to TMD-specific code

---
 taxcalc/taxcalcio.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/taxcalc/taxcalcio.py b/taxcalc/taxcalcio.py
index a070f1be4..d459f3686 100644
--- a/taxcalc/taxcalcio.py
+++ b/taxcalc/taxcalcio.py
@@ -550,7 +550,7 @@ def write_output_file(self, output_dump, dump_varset,
             outdf = self.minimal_output()
             column_order = outdf.columns
         assert len(outdf.index) == self.calc.array_len
-        if self.tmd_input_data:
+        if self.tmd_input_data:  # pragma: no cover
             if "s006" in outdf:
                 weights = outdf["s006"].round(5)
             outdf = outdf.round(2)