From 9961a22c09078d044ba7872843f27a00e2144990 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Fri, 29 Sep 2023 17:09:52 +0200
Subject: [PATCH 01/22] basic iterative and not-quite tree reduction hadd
 functions started

---
 src/proteus/operations/add_histograms.py  | 115 +++++++++++++++
 src/proteus/operations/parquet_to_root.py |  20 +++
 src/proteus/to_feather.py                 |  65 +++++++++
 src/proteus/to_parquet.py                 | 167 ++++++++++++++++++++++
 4 files changed, 367 insertions(+)
 create mode 100644 src/proteus/operations/add_histograms.py
 create mode 100644 src/proteus/operations/parquet_to_root.py
 create mode 100644 src/proteus/to_feather.py
 create mode 100644 src/proteus/to_parquet.py

diff --git a/src/proteus/operations/add_histograms.py b/src/proteus/operations/add_histograms.py
new file mode 100644
index 0000000..163550c
--- /dev/null
+++ b/src/proteus/operations/add_histograms.py
@@ -0,0 +1,115 @@
+import uproot
+
+# Only combines one histogram per file
+def hadd_like(files, destination, *, hist_name=None, hist_paths=None):
+    if hist_name == None:
+        # ? search through them nicely? Assume there are multiple?
+        array = hist.classnames()
+    try:
+        hist = uproot.open(files[0])[hist_name]
+    except:
+        # error: name not the same!
+        error = 5
+
+    # Base case
+    bins = hist.member('fN')
+    values = hist.values(flow=True)
+    fEntries = hist.member("fEntries")
+    fTsumw = hist.member("fTsumw")
+    if hist.member("fTsumw2") != None:
+        fTsumw2 = hist.member("fTsumw2")
+    else:
+        fTsumw2 = 0
+    fTsumwx = hist.member("fTsumwx")
+    fTsumwx2 = hist.member("fTsumwx2")
+    variances = hist.variances("flow=True")
+
+    # Iteratively / Sequentially:
+    for path in files[1:]:
+        with uproot.open(path) as file:
+            hist = file[hist_name]  # histogram = uproot.open("file.root:path/to/histogram") 
+            if bins != hist.member('fN'):
+                raise ValueError( 
+                    "Bins must be the same, not " + {bins} + " and " + {hist.member('fN')}
+                )
+
+            values += hist.values(flow=True)
+            fEntries += hist.member("fEntries")
+            fTsumw += hist.member("fTsumw")
+            if hist.member("fTsumw2") != None:
+                fTsumw2 += hist.member("fTsumw2")
+            fTsumwx += hist.member("fTsumwx")
+            fTsumwx2 += hist.member("fTsumwx2")
+            variances += hist.variances("flow=True")
+
+    h_sum = uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"), values,
+                                            fEntries, fTsumw, fTsumw2, fTsumwx, fTsumwx2, variances, hist.member("fXaxis"))
+
+    file_out = uproot.recreate(destination)
+    file_out[h_sum.member("fName")] = h_sum
+
+
+# If we can do things in parallel
+def hadd_like_tree_reduction(files, destination, *, hist_name=None, threads=1):
+    import numpy as np
+    hist = uproot.open(files[0])
+    try:
+        hist = uproot.open(files[0])[hist_name]
+    except:
+        # error: name not the same!
+        error = 5
+
+    iterations = len(files)
+
+    if (iterations%2) != 0:
+        hist = files[-1]
+        values = hist.values(flow=True)
+        fEntries = hist.member("fEntries")
+        fTsumw = hist.member("fTsumw")
+        if hist.member("fTsumw2") != None:
+            fTsumw2 = hist.member("fTsumw2")
+        fTsumwx = hist.member("fTsumwx")
+        fTsumwx2 = hist.member("fTsumwx2")
+        variances = hist.variances("flow=True")
+    else:
+        values = 0
+        fEntries = 0
+        fTsumw = 0
+        fTsumw2 = 0
+        fTsumwx = 0
+        fTsumwx2 = 0
+        variances = 0
+
+    for i in range(iterations/2):
+        print(i)
+        values, fEntries, fTsumw, fTsumw2, fTsumwx, fTsumwx2, variances += sum_hists(files[i], files[-i], hist_name)
+
+    h_sum = uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"), values,
+                                            fEntries, fTsumw, fTsumw2, fTsumwx, fTsumwx2, variances, hist.member("fXaxis"))
+
+    file_out = uproot.recreate(destination)
+    file_out[h_sum.member("fName")] = h_sum
+
+hadd_like_tree_reduction(["/Users/zobil/Documents/Proteus/file1.root", "/Users/zobil/Documents/Proteus/file2.root"], "place.root", hist_name="name")
+
+def sum_hists(hist1, hist2):
+    # Check bins
+    hist1 = uproot.open(hist1)
+    hist2 = uproot.open(hist2)
+    if hist1.member("fN") != hist2.member("fN"):
+        raise ValueError( 
+                    "Bins must be the same, not " + {hist1.member("fN")} + " and " + {hist2.member("fN")} # Get file names
+                )
+    values = hist1.values(flow=True) + hist2.values(flow=True)
+    fEntries = hist1.member("fEntries") + hist2.member("fEntries")
+    fTsumw = hist1.member("fTsumw") + hist2.member("fTsumw")
+    if hist1.member("fTsumw2") != None:
+        fTsumw2 = hist1.member("fTsumw2")   
+    else:
+        fTsumw2 = 0
+    if hist2.member("fTsumw2") != None:
+        fTsumw2 += hist2.member("fTsumw2")
+    fTsumwx = hist1.member("fTsumwx") + hist2.member("fTsumwx")
+    fTsumwx2 = hist1.member("fTsumwx2") + hist2.member("fTsumwx")
+    variances = hist1.variances("flow=True") + hist2.variances("flow=True")
+    return hist1.member("fN"), values, fEntries, fTsumw, fTsumw2, fTsumwx, fTsumwx2, variances
diff --git a/src/proteus/operations/parquet_to_root.py b/src/proteus/operations/parquet_to_root.py
new file mode 100644
index 0000000..2acccc0
--- /dev/null
+++ b/src/proteus/operations/parquet_to_root.py
@@ -0,0 +1,20 @@
+import dask_awkward as dak
+import uproot
+def parquet_to_root(read_path,
+    write_path,
+    *,
+    columns,
+    storage_options,
+    max_gap,
+    max_block,
+    footer_sample_size,
+    generate_bitmasks,
+    highlevel,
+    behavior,
+    ):
+    arrays = dak.from_parquet(read_path, split_row_groups=True)
+    tree = uproot.recreate(write_path)
+    tree.mktree("tree", {arrays.partitions[0]}) #name? But root files aren't just TTrees...
+    for i in range(1,arrays.npartitions):
+        tree["tree"].extend(arrays.partitions[i])
+        
\ No newline at end of file
diff --git a/src/proteus/to_feather.py b/src/proteus/to_feather.py
new file mode 100644
index 0000000..8d7b065
--- /dev/null
+++ b/src/proteus/to_feather.py
@@ -0,0 +1,65 @@
+import pyarrow as pa
+import pyarrow.parquet as pq
+import awkward as ak
+from fsspec import AbstractFileSystem
+
+def parquet_to_feather(
+    path,
+    new_path, # ?
+    *,
+    columns=None,
+    row_groups=None,
+    storage_options=None,
+    max_gap=64_000,
+    max_block=256_000_000,
+    footer_sample_size=1_000_000,
+    generate_bitmasks=False,
+    highlevel=True,
+    behavior=None,
+):
+    """
+    Args:
+        path (str): Local filename or remote URL, passed to fsspec for resolution.
+            May contain glob patterns.
+        columns (None, str, or list of str): Glob pattern(s) with bash-like curly
+            brackets for matching column names. Nested records are separated by dots.
+            If a list of patterns, the logical-or is matched. If None, all columns
+            are read.
+        row_groups (None or set of int): Row groups to read; must be non-negative.
+            Order is ignored: the output array is presented in the order specified by
+            Parquet metadata. If None, all row groups/all rows are read.
+        storage_options: Passed to `fsspec.parquet.open_parquet_file`.
+        max_gap (int): Passed to `fsspec.parquet.open_parquet_file`.
+        max_block (int): Passed to `fsspec.parquet.open_parquet_file`.
+        footer_sample_size (int): Passed to `fsspec.parquet.open_parquet_file`.
+        generate_bitmasks (bool): If enabled and Arrow/Parquet does not have Awkward
+            metadata, `generate_bitmasks=True` creates empty bitmasks for nullable
+            types that don't have bitmasks in the Arrow/Parquet data, so that the
+            Form (BitMaskedForm vs UnmaskedForm) is predictable.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+    
+    Reads data from a local or remote Parquet file a feather file (or a collection of feather files?).
+
+    Different from ak.to_parquet etc. because...
+    """
+    #create feather file?
+    
+    # read one page of parquet file
+    parquet_file = pq.ParquetFile(path) # does this put the whole thing in memory?
+    metadata = ak.metadata_from_parquet(path)
+    # parquet_metadata 
+    # read_row_group or with metadata?
+    # batch vs page? what size? 
+    # with metadata['fs'].open as fp: #why would this be necessary?
+    for batch in parquet_file.iter_batches():
+        pa.concat(new_path, ak.to_feather(new_path, batch)) #but this shouldn't be something that sets a var to a bigger file??
+
+
+
+
+    # write to feather file - find concat without 
+    # feather_file = pa.concat([ak.from_parquet(file, page) for page in pages],ignore_index=True)
+
diff --git a/src/proteus/to_parquet.py b/src/proteus/to_parquet.py
new file mode 100644
index 0000000..a596f70
--- /dev/null
+++ b/src/proteus/to_parquet.py
@@ -0,0 +1,167 @@
+import dask_awkward as da
+import _collections_abc
+import awkward as ak
+import pyarrow.parquet
+
+# Feather to parquet first?
+def feather_to_parquet(
+    # array,
+    path,
+    *,
+    list_to32=False,
+    string_to32=True,
+    bytestring_to32=True,
+    emptyarray_to=None,
+    categorical_as_dictionary=False,
+    extensionarray=True,
+    count_nulls=True,
+    compression="zstd",
+    compression_level=None,
+    row_group_size=64 * 1024 * 1024,
+    data_page_size=None,
+    parquet_flavor=None,
+    parquet_version="2.4",
+    parquet_page_version="1.0",
+    parquet_metadata_statistics=True,
+    parquet_dictionary_encoding=False,
+    parquet_byte_stream_split=False,
+    parquet_coerce_timestamps=None,
+    parquet_old_int96_timestamps=None,
+    parquet_compliant_nested=False,  # https://issues.apache.org/jira/browse/ARROW-16348
+    parquet_extra_options=None,
+    storage_options=None,
+    # Potentially need:
+        # expressions=None,
+        # cut=None,
+        # filter_name = no_filter,
+        # filter_typename = no_filter,
+        # aliases=None,
+        # language=uproot.language.python.python_language,
+        # entry_start=None,
+        # entry_stop=None,
+        # step_size="100 MB",
+        # library="ak",
+        # how=None,
+):
+    
+    # Do in steps. can use argument "columns" to select amount - have it be the same as the 
+    # size of a page is in a parquet? Or step_size like in uproot's iterate?
+    # much to read, can choose bytes, tuple, str, list (not sure what most of those mean here)
+    # Data page size!
+    # First read ak.feather()
+    # Read feather also has columns...best to read a bit at a time and keep track?
+
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        destination (path-like): Name of the output file, file path, or
+            remote URL passed to [fsspec.core.url_to_fs](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.core.url_to_fs)
+            for remote writing.
+        list_to32 (bool): If True, convert Awkward lists into 32-bit Arrow lists
+            if they're small enough, even if it means an extra conversion. Otherwise,
+            signed 32-bit #ak.types.ListType maps to Arrow `ListType`,
+            signed 64-bit #ak.types.ListType maps to Arrow `LargeListType`,
+            and unsigned 32-bit #ak.types.ListType picks whichever Arrow type its
+            values fit into.
+        string_to32 (bool): Same as the above for Arrow `string` and `large_string`.
+        bytestring_to32 (bool): Same as the above for Arrow `binary` and `large_binary`.
+        emptyarray_to (None or dtype): If None, #ak.types.UnknownType maps to Arrow's
+            null type; otherwise, it is converted a given numeric dtype.
+        categorical_as_dictionary (bool): If True, #ak.contents.IndexedArray and
+            #ak.contents.IndexedOptionArray labeled with `__array__ = "categorical"`
+            are mapped to Arrow `DictionaryArray`; otherwise, the projection is
+            evaluated before conversion (always the case without
+            `__array__ = "categorical"`).
+        extensionarray (bool): If True, this function returns extended Arrow arrays
+            (at all levels of nesting), which preserve metadata so that Awkward \u2192
+            Arrow \u2192 Awkward preserves the array's #ak.types.Type (though not
+            the #ak.forms.Form). If False, this function returns generic Arrow arrays
+            that might be needed for third-party tools that don't recognize Arrow's
+            extensions. Even with `extensionarray=False`, the values produced by
+            Arrow's `to_pylist` method are the same as the values produced by Awkward's
+            #ak.to_list.
+        count_nulls (bool): If True, count the number of missing values at each level
+            and include these in the resulting Arrow array, which makes some downstream
+            applications faster. If False, skip the up-front cost of counting them.
+        compression (None, str, or dict): Compression algorithm name, passed to
+            [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
+            Parquet supports `{"NONE", "SNAPPY", "GZIP", "BROTLI", "LZ4", "ZSTD"}`
+            (where `"GZIP"` is also known as "zlib" or "deflate"). If a dict, the keys
+            are column names (the same column names that #ak.forms.Form.columns returns
+            and #ak.forms.Form.select_columns accepts) and the values are compression
+            algorithm names, to compress each column differently.
+        compression_level (None, int, or dict None): Compression level, passed to
+            [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
+            Compression levels have different meanings for different compression
+            algorithms: GZIP ranges from 1 to 9, but ZSTD ranges from -7 to 22, for
+            example. Generally, higher numbers provide slower but smaller compression.
+        row_group_size (int or None): Number of entries in each row group (except the last),
+            passed to [pyarrow.parquet.ParquetWriter.write_table](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html#pyarrow.parquet.ParquetWriter.write_table).
+            If None, the Parquet default of 64 MiB is used.
+        data_page_size (None or int): Number of bytes in each data page, passed to
+            [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
+            If None, the Parquet default of 1 MiB is used.
+        parquet_flavor (None or `"spark"`): If None, the output Parquet file will follow
+            Arrow conventions; if `"spark"`, it will follow Spark conventions. Some
+            systems, such as Spark and Google BigQuery, might need Spark conventions,
+            while others might need Arrow conventions. Passed to
+            [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
+            as `flavor`.
+        parquet_version (`"1.0"`, `"2.4"`, or `"2.6"`): Parquet file format version.
+            Passed to [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
+            as `version`.
+        parquet_page_version (`"1.0"` or `"2.0"`): Parquet page format version.
+            Passed to [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
+            as `data_page_version`.
+        parquet_metadata_statistics (bool or dict): If True, include summary
+            statistics for each data page in the Parquet metadata, which lets some
+            applications search for data more quickly (by skipping pages). If a dict
+            mapping column names to bool, include summary statistics on only the
+            specified columns. Passed to
+            [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
+            as `write_statistics`.
+        parquet_dictionary_encoding (bool or dict): If True, allow Parquet to pre-compress
+            with dictionary encoding. If a dict mapping column names to bool, only
+            use dictionary encoding on the specified columns. Passed to
+            [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
+            as `use_dictionary`.
+        parquet_byte_stream_split (bool or dict): If True, pre-compress floating
+            point fields (`float32` or `float64`) with byte stream splitting, which
+            collects all mantissas in one part of the stream and exponents in another.
+            Passed to [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
+            as `use_byte_stream_split`.
+        parquet_coerce_timestamps (None, `"ms"`, or `"us"`): If None, any timestamps
+            (`datetime64` data) are coerced to a given resolution depending on
+            `parquet_version`: version `"1.0"` and `"2.4"` are coerced to microseconds,
+            but later versions use the `datetime64`'s own units. If `"ms"` is explicitly
+            specified, timestamps are coerced to milliseconds; if `"us"`, microseconds.
+            Passed to [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
+            as `coerce_timestamps`.
+        parquet_old_int96_timestamps (None or bool): If True, use Parquet's INT96 format
+            for any timestamps (`datetime64` data), taking priority over `parquet_coerce_timestamps`.
+            If None, let the `parquet_flavor` decide. Passed to
+            [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
+            as `use_deprecated_int96_timestamps`.
+        parquet_compliant_nested (bool): If True, use the Spark/BigQuery/Parquet
+            [convention for nested lists](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#nested-types),
+            in which each list is a one-field record with field name "`element`";
+            otherwise, use the Arrow convention, in which the field name is "`item`".
+            Passed to [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
+            as `use_compliant_nested_type`.
+        parquet_extra_options (None or dict): Any additional options to pass to
+            [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
+        storage_options (None or dict): Any additional options to pass to
+            [fsspec.core.url_to_fs](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.core.url_to_fs)
+            to open a remote file for writing.
+            """
+    
+
+
+        # Some kind of for-structure where it checks if there is more of the file before 
+    parquet_writer = pq.ParquetWriter(path, ak.to_parquet(first_batch))
+    for i in hasNextPage:
+        parquet_writer.write_table(i)
+        
+        # class pyarrow.parquet.ParquetWriter(where, schema, filesystem=None, flavor=None, version='2.6', use_dictionary=True, compression='snappy', write_statistics=True, use_deprecated_int96_timestamps=None, compression_level=None, use_byte_stream_split=False, column_encoding=None, writer_engine_version=None, data_page_version='1.0', use_compliant_nested_type=True, encryption_properties=None, write_batch_size=None, dictionary_pagesize_limit=None, store_schema=True, write_page_index=False, **options)
+    parquet_writer.close()
+        
\ No newline at end of file

From 35738e44ae9ad1d7bb663b55433b2c6fdb405dbd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Tue, 3 Oct 2023 10:57:57 +0200
Subject: [PATCH 02/22] Variables now stored in np array, some flexibility
 added (directories, finding histogram names etc.) removed attempt at tree
 reduction

---
 src/proteus/operations/add_histograms.py | 115 ---------------
 src/proteus/operations/hadd_like.py      | 169 +++++++++++++++++++++++
 2 files changed, 169 insertions(+), 115 deletions(-)
 delete mode 100644 src/proteus/operations/add_histograms.py
 create mode 100644 src/proteus/operations/hadd_like.py

diff --git a/src/proteus/operations/add_histograms.py b/src/proteus/operations/add_histograms.py
deleted file mode 100644
index 163550c..0000000
--- a/src/proteus/operations/add_histograms.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import uproot
-
-# Only combines one histogram per file
-def hadd_like(files, destination, *, hist_name=None, hist_paths=None):
-    if hist_name == None:
-        # ? search through them nicely? Assume there are multiple?
-        array = hist.classnames()
-    try:
-        hist = uproot.open(files[0])[hist_name]
-    except:
-        # error: name not the same!
-        error = 5
-
-    # Base case
-    bins = hist.member('fN')
-    values = hist.values(flow=True)
-    fEntries = hist.member("fEntries")
-    fTsumw = hist.member("fTsumw")
-    if hist.member("fTsumw2") != None:
-        fTsumw2 = hist.member("fTsumw2")
-    else:
-        fTsumw2 = 0
-    fTsumwx = hist.member("fTsumwx")
-    fTsumwx2 = hist.member("fTsumwx2")
-    variances = hist.variances("flow=True")
-
-    # Iteratively / Sequentially:
-    for path in files[1:]:
-        with uproot.open(path) as file:
-            hist = file[hist_name]  # histogram = uproot.open("file.root:path/to/histogram") 
-            if bins != hist.member('fN'):
-                raise ValueError( 
-                    "Bins must be the same, not " + {bins} + " and " + {hist.member('fN')}
-                )
-
-            values += hist.values(flow=True)
-            fEntries += hist.member("fEntries")
-            fTsumw += hist.member("fTsumw")
-            if hist.member("fTsumw2") != None:
-                fTsumw2 += hist.member("fTsumw2")
-            fTsumwx += hist.member("fTsumwx")
-            fTsumwx2 += hist.member("fTsumwx2")
-            variances += hist.variances("flow=True")
-
-    h_sum = uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"), values,
-                                            fEntries, fTsumw, fTsumw2, fTsumwx, fTsumwx2, variances, hist.member("fXaxis"))
-
-    file_out = uproot.recreate(destination)
-    file_out[h_sum.member("fName")] = h_sum
-
-
-# If we can do things in parallel
-def hadd_like_tree_reduction(files, destination, *, hist_name=None, threads=1):
-    import numpy as np
-    hist = uproot.open(files[0])
-    try:
-        hist = uproot.open(files[0])[hist_name]
-    except:
-        # error: name not the same!
-        error = 5
-
-    iterations = len(files)
-
-    if (iterations%2) != 0:
-        hist = files[-1]
-        values = hist.values(flow=True)
-        fEntries = hist.member("fEntries")
-        fTsumw = hist.member("fTsumw")
-        if hist.member("fTsumw2") != None:
-            fTsumw2 = hist.member("fTsumw2")
-        fTsumwx = hist.member("fTsumwx")
-        fTsumwx2 = hist.member("fTsumwx2")
-        variances = hist.variances("flow=True")
-    else:
-        values = 0
-        fEntries = 0
-        fTsumw = 0
-        fTsumw2 = 0
-        fTsumwx = 0
-        fTsumwx2 = 0
-        variances = 0
-
-    for i in range(iterations/2):
-        print(i)
-        values, fEntries, fTsumw, fTsumw2, fTsumwx, fTsumwx2, variances += sum_hists(files[i], files[-i], hist_name)
-
-    h_sum = uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"), values,
-                                            fEntries, fTsumw, fTsumw2, fTsumwx, fTsumwx2, variances, hist.member("fXaxis"))
-
-    file_out = uproot.recreate(destination)
-    file_out[h_sum.member("fName")] = h_sum
-
-hadd_like_tree_reduction(["/Users/zobil/Documents/Proteus/file1.root", "/Users/zobil/Documents/Proteus/file2.root"], "place.root", hist_name="name")
-
-def sum_hists(hist1, hist2):
-    # Check bins
-    hist1 = uproot.open(hist1)
-    hist2 = uproot.open(hist2)
-    if hist1.member("fN") != hist2.member("fN"):
-        raise ValueError( 
-                    "Bins must be the same, not " + {hist1.member("fN")} + " and " + {hist2.member("fN")} # Get file names
-                )
-    values = hist1.values(flow=True) + hist2.values(flow=True)
-    fEntries = hist1.member("fEntries") + hist2.member("fEntries")
-    fTsumw = hist1.member("fTsumw") + hist2.member("fTsumw")
-    if hist1.member("fTsumw2") != None:
-        fTsumw2 = hist1.member("fTsumw2")   
-    else:
-        fTsumw2 = 0
-    if hist2.member("fTsumw2") != None:
-        fTsumw2 += hist2.member("fTsumw2")
-    fTsumwx = hist1.member("fTsumwx") + hist2.member("fTsumwx")
-    fTsumwx2 = hist1.member("fTsumwx2") + hist2.member("fTsumwx")
-    variances = hist1.variances("flow=True") + hist2.variances("flow=True")
-    return hist1.member("fN"), values, fEntries, fTsumw, fTsumw2, fTsumwx, fTsumwx2, variances
diff --git a/src/proteus/operations/hadd_like.py b/src/proteus/operations/hadd_like.py
new file mode 100644
index 0000000..ee98dd6
--- /dev/null
+++ b/src/proteus/operations/hadd_like.py
@@ -0,0 +1,169 @@
+import uproot
+import numpy as np
+
+
+def add_1D_hists(files, hist_name, members, values, bins):
+    for path in files[1:]:
+        with uproot.open(path) as file:
+            hist = file[hist_name]
+            if bins != hist.member('fN'):
+                raise ValueError(
+                    "Bins must be equal, not " + bins + " and " + hist.member('fN')
+                )
+            if hist.member('fName') != hist_name:
+                raise ValueError(
+                    "Names must be the same, not " + hist_name + " and " + hist.member('fName')
+                )
+            
+            temp_members = [
+                            hist.member('fEntries'), 
+                            hist.member('fTsumw'),
+                            hist.member('fTsumw2'),
+                            hist.member('fTsumwx'),
+                            hist.member('fTsumwx2'),
+                            hist.variances(flow=True)
+                           ]
+            
+            values += hist.values(flow=True)
+            members += temp_members
+    return uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"), values,
+                                            *members[0:6], hist.member("fXaxis"))
+
+def add_2D_hists(files, hist_name, values, members, bins):
+    for path in files[1:]:
+        with uproot.open(path) as file:
+            hist = file[hist_name]
+            if bins != hist.member('fN'):
+                raise ValueError(
+                    "Bins must be equal, not "+ {members['fN']} + " and " + {hist.member('fN')}
+                )
+            if hist.member('fName') != hist_name:
+                raise ValueError(
+                    "Names must be the same, not "+ {members['fN']} + " and " + {hist.member('fName')}
+                )
+            
+            temp_members = [
+                            hist.member('fEntries'), 
+                            hist.member('fTsumw'),
+                            hist.member('fTsumw2'),
+                            hist.member('fTsumwx'),
+                            hist.member('fTsumwx2'),
+                            hist.member('fTsumwy'),
+                            hist.member('fTsumwy2'),
+                            hist.member('fTsumxy'),
+                            hist.variances(flow=True)
+                           ]
+            members += temp_members
+            values += hist.values(flow=True)
+    return uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"), values, 
+        *members[0:9],
+        hist.member("fXaxis"), hist.member("fYaxis"))
+
+def add_3D_hists(files, hist_names, values, members):
+    for path in files[1:]:
+        with uproot.open(path) as file:
+            hist = file[hist_names]
+            if members['fN'] != hist.member('fN'):
+                raise ValueError(
+                    "Bins must be equal, not "+ {members['fN']} + " and " + {hist.member('fN')}
+                )
+            if hist.member('fName') != hist_names:
+                raise ValueError(
+                    "Names must be the same, not "+ {members['fN']} + " and " + {hist.member('fN')}
+                )
+            temp_members = [
+                hist.member('fEntries'), 
+                hist.member('fTsumw'),
+                hist.member('fTsumw2'),
+                hist.member('fTsumwx'),
+                hist.member('fTsumwx2'),
+                hist.member('fTsumwy'),
+                hist.member('fTsumwy2'),
+                hist.member('fTsumxy'),
+                hist.member('fTsumwz'),
+                hist.member('fTsumwz2'),
+                hist.member('fTsumwxz'),
+                hist.member('fTsumwyz'),
+                hist.variances(flow=True)
+            ]
+        members += temp_members
+        values += hist.values(flow=True)
+    return uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"), values,
+                            *members[0:14], hist.member("fXaxis"))
+
+def find_histograms(file):
+    # for i in filenames:
+        with uproot.open(file) as i:
+            array = i.classnames()
+            list = np.array([i for i in array if (array.get(i).startswith("TH1") or array.get(i).startswith("TH2") or array.get(i).startswith("TH3"))])
+        return list
+
+def hadd_like(destination, filenames=None, directory=None, hist_names=None):
+    """
+    Args:
+        destination (path-like): Name of the output file or file path.
+        filenames (None, or list of str): 
+        directory (None, str): Local path, may contain glob patterns 
+        hist_names (None, str, or list of str): Names of histograms to be added together. 
+
+    Adds together histograms from local ROOT files of a collection of ROOT files, and writes them to one ROOT file.
+    """
+    if directory!=None:
+        import glob
+        filenames = sorted(
+            glob.glob(directory + f"/**/*{'.root'}", recursive=True)
+        )
+
+    if hist_names == None: # if histogram names are not provided
+        hist_names = find_histograms(filenames[0])
+        hist_names = hist_names[0]
+
+    file = uproot.open(filenames[0]) # This file may never close until the end...
+    hist_name = hist_names
+    hist = file[hist_name]
+    bins = hist.member('fN')
+    if len(hist.axes) == 1:
+        members = [
+                    hist.member('fEntries'), 
+                    hist.member('fTsumw'),
+                    hist.member('fTsumw2'),
+                    hist.member('fTsumwx'),
+                    hist.member('fTsumwx2'),
+                    hist.variances(flow=True)
+                ]
+        values = hist.values(flow=True)
+        h_sum = add_1D_hists(filenames, hist.member('fName'), members, values, bins)
+    elif len(hist.axes) == 2:
+        members = [
+            hist.member('fEntries'), 
+            hist.member('fTsumw'),
+            hist.member('fTsumw2'),
+            hist.member('fTsumwx'),
+            hist.member('fTsumwx2'),
+            hist.member('fTsumwy'),
+            hist.member('fTsumwy2'),
+            hist.member('fTsumxy'),
+            hist.variances(flow=True)
+            ]
+        values = hist.values(flow=True)
+        h_sum = add_2D_hists(filenames, hist.member('fName'), members, values, bins)
+    elif len(hist.axes) == 3:
+        members = [
+            hist.member('fEntries'), 
+            hist.member('fTsumw'),
+            hist.member('fTsumw2'),
+            hist.member('fTsumwx'),
+            hist.member('fTsumwx2'),
+            hist.member('fTsumwy'),
+            hist.member('fTsumwy2'),
+            hist.member('fTsumxy'),
+            hist.member('fTsumwz'),
+            hist.member('fTsumwz2'),
+            hist.member('fTsumwxz'),
+            hist.member('fTsumwyz'),
+            hist.variances(flow=True)
+            ]
+        values = hist.values(flow=True)
+        h_sum = add_3D_hists(filenames, hist.member('fName'), members, values, bins)
+    file_out = uproot.recreate(destination) # What compression level?
+    file_out[h_sum.member("fName")] = h_sum

From 1bcac6ad1c813ec0f72251defc1e55f203bc1de7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Wed, 4 Oct 2023 10:34:45 +0200
Subject: [PATCH 03/22] Changed to adding numpy arrays, added to docstrings,
 changed structure for multiple histograms per file (still untested though)

---
 src/proteus/operations/add_histograms.py | 186 +++++++++++++++++++++++
 src/proteus/operations/hadd_like.py      |  53 ++++---
 2 files changed, 217 insertions(+), 22 deletions(-)
 create mode 100644 src/proteus/operations/add_histograms.py

diff --git a/src/proteus/operations/add_histograms.py b/src/proteus/operations/add_histograms.py
new file mode 100644
index 0000000..1ee603d
--- /dev/null
+++ b/src/proteus/operations/add_histograms.py
@@ -0,0 +1,186 @@
+import uproot
+import numpy as np
+import awkward as ak
+
+def add_1D_hists(files, hist_name):
+    bins = -1
+    for path in files:
+        with uproot.open(path) as file:
+            hist = file[hist_name]
+            if bins == -1:
+                bins = hist.member('fN')
+                member_data = np.array([
+                            hist.member('fEntries'), 
+                            hist.member('fTsumw'),
+                            hist.member('fTsumw2'),
+                            hist.member('fTsumwx'),
+                            hist.member('fTsumwx2'),
+                           ])
+                variances = np.array(hist.variances(flow=True))
+                values = np.array(hist.values(flow=True))
+            elif bins != hist.member('fN'):
+                raise ValueError(
+                    "Bins must be equal, not ", bins, " and ", hist.member('fN')
+                )
+            if hist.member('fName') != hist_name:
+                raise ValueError(
+                    "Names must be the same, not " + hist_name + " and " + hist.member('fName')
+                )
+            
+            member_data += [
+                            hist.member('fEntries'), 
+                            hist.member('fTsumw'),
+                            hist.member('fTsumw2'),
+                            hist.member('fTsumwx'),
+                            hist.member('fTsumwx2')
+                           ]
+            variances += hist.variances(flow=True)
+            values += hist.values(flow=True)
+    return uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"),
+        values, *member_data, variances, hist.member("fXaxis"))
+
+def add_2D_hists(files, hist_name):
+    bins = -1
+    for path in files:
+        with uproot.open(path) as file:
+            hist = file[hist_name]
+            if bins == -1:
+                bins = hist.member('fN')
+                member_data = np.array([
+                            hist.member('fEntries'), 
+                            hist.member('fTsumw'),
+                            hist.member('fTsumw2'),
+                            hist.member('fTsumwx'),
+                            hist.member('fTsumwx2'),
+                            hist.member('fTsumwy'),
+                            hist.member('fTsumwy2'),
+                            hist.member('fTsumwxy'),
+                           ])
+                variances = np.array(hist.variances(flow=True))
+                values = np.array(hist.values(flow=True))
+            elif bins != hist.member('fN'):
+                raise ValueError(
+                    "Bins must be equal, not ", bins, " and ", hist.member('fN')
+                )
+            if hist.member('fName') != hist_name:
+                raise ValueError(
+                    "Names must be the same, not " + hist_name + " and " + hist.member('fName')
+                )
+            
+            member_data += [
+                            hist.member('fEntries'), 
+                            hist.member('fTsumw'),
+                            hist.member('fTsumw2'),
+                            hist.member('fTsumwx'),
+                            hist.member('fTsumwx2'),
+                            hist.member('fTsumwy'),
+                            hist.member('fTsumwy2'),
+                            hist.member('fTsumwxy')
+                           ]
+            variances += hist.variances(flow=True)
+            values += hist.values(flow=True)
+    return uproot.writing.identify.to_TH2x(hist.member("fName"), hist.member("fTitle"),
+        values, *member_data, variances, hist.member("fXaxis"), hist.member("fYaxis"))
+
+def add_3D_hists(files, hist_name):
+    bins = -1
+    for path in files:
+        with uproot.open(path) as file:
+            hist = file[hist_name]
+            if bins == -1:
+                bins = hist.member('fN')
+                member_data = np.array([
+                            hist.member('fEntries'), 
+                            hist.member('fTsumw'),
+                            hist.member('fTsumw2'),
+                            hist.member('fTsumwx'),
+                            hist.member('fTsumwx2'),
+                            hist.member('fTsumwy'),
+                            hist.member('fTsumwy2'),
+                            hist.member('fTsumwxy'),
+                            hist.member('fTsumwz'),
+                            hist.member('fTsumwz2'),
+                            hist.member('fTsumwxz'),
+                            hist.member('fTsumwyz')
+                           ])
+                variances = np.array(hist.variances(flow=True))
+                values = np.array(hist.values(flow=True))
+            elif bins != hist.member('fN'):
+                raise ValueError(
+                    "Bins must be equal, not ", bins, " and ", hist.member('fN')
+                )
+            if hist.member('fName') != hist_name:
+                raise ValueError(
+                    "Names must be the same, not " + hist_name + " and " + hist.member('fName')
+                )
+            
+            member_data += [
+                            hist.member('fEntries'), 
+                            hist.member('fTsumw'),
+                            hist.member('fTsumw2'),
+                            hist.member('fTsumwx'),
+                            hist.member('fTsumwx2'),
+                            hist.member('fTsumwy'),
+                            hist.member('fTsumwy2'),
+                            hist.member('fTsumwxy'),
+                            hist.member('fTsumwz'),
+                            hist.member('fTsumwz2'),
+                            hist.member('fTsumwxz'),
+                            hist.member('fTsumwyz')
+                           ]
+            variances += hist.variances(flow=True)
+            values += hist.values(flow=True)
+    return uproot.writing.identify.to_TH2x(hist.member("fName"), hist.member("fTitle"),
+        values, *member_data, variances, hist.member("fXaxis"), hist.member("fYaxis"), hist.member("fZaxis"))
+
+def find_histograms(file):
+    # for i in filenames:
+        with uproot.open(file) as i:
+            array = i.classnames()
+            list = np.array([i for i in array if (array.get(i).startswith("TH1") or array.get(i).startswith("TH2") or array.get(i).startswith("TH3"))])
+        return list
+
+def hadd_like(destination, files, hist_names=None):
+    """
+    Args:
+        destination (path-like): Name of the output file or file path.
+        filenames (None, or list of str): List of local ROOT files to read histograms from.
+        directory (None, str): Local path, may contain glob patterns 
+        hist_names (None, str, or list of str): Names of histograms to be added together, must be specified if files contain different numbers of histograms.
+
+    Adds together histograms from local ROOT files of a collection of ROOT files, and writes them to one ROOT file.
+    """
+    if type(files) != list: # Will this always work?
+        import glob
+        files = sorted(
+            glob.glob(files + f"/**/*{'.root'}", recursive=True)
+        )
+
+    if hist_names == None: # if histogram names are not provided
+        hist_names = find_histograms(files[0])
+
+    with uproot.open(files[0]) as file: # This file may never close until the end...
+        hist = file[hist_names]
+        num_axes = len(hist.axes)
+
+    if type(hist_names) == str:
+        if num_axes == 1:
+            h_sum = add_1D_hists(files, hist_names)
+        elif num_axes == 2:
+            h_sum = add_2D_hists(files, hist_names)
+        elif num_axes == 3:
+            h_sum = add_3D_hists(files, hist_names)
+        file_out = uproot.recreate(destination) # What compression level?
+        file_out[h_sum.member("fName")] = h_sum
+    else:
+        file_out = uproot.recreate(destination) # What compression level? Would it still be recreate?
+        for name in hist_names:
+            if num_axes == 1:
+                h_sum = add_1D_hists(files, name)
+            elif num_axes == 2:
+                h_sum = add_2D_hists(files, name)
+            elif num_axes == 3:
+                h_sum = add_3D_hists(files, name)
+            file_out[h_sum.member("fName")] = h_sum
+
+
diff --git a/src/proteus/operations/hadd_like.py b/src/proteus/operations/hadd_like.py
index ee98dd6..354a2fc 100644
--- a/src/proteus/operations/hadd_like.py
+++ b/src/proteus/operations/hadd_like.py
@@ -8,7 +8,7 @@ def add_1D_hists(files, hist_name, members, values, bins):
             hist = file[hist_name]
             if bins != hist.member('fN'):
                 raise ValueError(
-                    "Bins must be equal, not " + bins + " and " + hist.member('fN')
+                    "Bins must be equal, not ", bins, " and ", hist.member('fN')
                 )
             if hist.member('fName') != hist_name:
                 raise ValueError(
@@ -29,17 +29,20 @@ def add_1D_hists(files, hist_name, members, values, bins):
     return uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"), values,
                                             *members[0:6], hist.member("fXaxis"))
 
-def add_2D_hists(files, hist_name, values, members, bins):
+def add_2D_hists(files, hist_name, members, values, bins):
     for path in files[1:]:
         with uproot.open(path) as file:
+            if hist_names == None: # if histogram names are not provided
+                hist_names = find_histograms(path)
             hist = file[hist_name]
+            print(path)
             if bins != hist.member('fN'):
                 raise ValueError(
-                    "Bins must be equal, not "+ {members['fN']} + " and " + {hist.member('fN')}
+                    "Bins must be equal, not ", bins, " and ", hist.member('fN')
                 )
             if hist.member('fName') != hist_name:
                 raise ValueError(
-                    "Names must be the same, not "+ {members['fN']} + " and " + {hist.member('fName')}
+                    "Names must be the same, not " + hist_name + " and " + hist.member('fName')
                 )
             
             temp_members = [
@@ -50,26 +53,27 @@ def add_2D_hists(files, hist_name, values, members, bins):
                             hist.member('fTsumwx2'),
                             hist.member('fTsumwy'),
                             hist.member('fTsumwy2'),
-                            hist.member('fTsumxy'),
+                            hist.member('fTsumwxy'),
                             hist.variances(flow=True)
                            ]
             members += temp_members
-            values += hist.values(flow=True)
-    return uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"), values, 
-        *members[0:9],
+            values += np.array(hist.values(flow=True))
+            print(type(values))
+    return uproot.writing.identify.to_TH2x(hist.member("fName"), hist.member("fTitle"), values, 
+        *members,
         hist.member("fXaxis"), hist.member("fYaxis"))
 
-def add_3D_hists(files, hist_names, values, members):
+def add_3D_hists(files, hist_names, values, members, bins):
     for path in files[1:]:
         with uproot.open(path) as file:
             hist = file[hist_names]
             if members['fN'] != hist.member('fN'):
                 raise ValueError(
-                    "Bins must be equal, not "+ {members['fN']} + " and " + {hist.member('fN')}
+                     "Bins must be equal, not ", bins, " and ", hist.member('fN')
                 )
             if hist.member('fName') != hist_names:
                 raise ValueError(
-                    "Names must be the same, not "+ {members['fN']} + " and " + {hist.member('fN')}
+                    "Names must be the same, not " + hist.member('fName') + " and " + hist.member('fName')
                 )
             temp_members = [
                 hist.member('fEntries'), 
@@ -79,7 +83,7 @@ def add_3D_hists(files, hist_names, values, members):
                 hist.member('fTsumwx2'),
                 hist.member('fTsumwy'),
                 hist.member('fTsumwy2'),
-                hist.member('fTsumxy'),
+                hist.member('fTsumwxy'),
                 hist.member('fTsumwz'),
                 hist.member('fTsumwz2'),
                 hist.member('fTsumwxz'),
@@ -88,8 +92,8 @@ def add_3D_hists(files, hist_names, values, members):
             ]
         members += temp_members
         values += hist.values(flow=True)
-    return uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"), values,
-                            *members[0:14], hist.member("fXaxis"))
+    return uproot.writing.identify.to_TH3x(hist.member("fName"), hist.member("fTitle"), values,
+                            *members[0:14], hist.member("fXaxis"), hist.member("fYaxis"), hist.member("fZaxis"))
 
 def find_histograms(file):
     # for i in filenames:
@@ -102,13 +106,13 @@ def hadd_like(destination, filenames=None, directory=None, hist_names=None):
     """
     Args:
         destination (path-like): Name of the output file or file path.
-        filenames (None, or list of str): 
+        filenames (None, or list of str): List of local ROOT files to read histograms from.
         directory (None, str): Local path, may contain glob patterns 
         hist_names (None, str, or list of str): Names of histograms to be added together. 
 
     Adds together histograms from local ROOT files of a collection of ROOT files, and writes them to one ROOT file.
     """
-    if directory!=None:
+    if directory!=None: # Merge directory and filenames arguments?
         import glob
         filenames = sorted(
             glob.glob(directory + f"/**/*{'.root'}", recursive=True)
@@ -116,7 +120,7 @@ def hadd_like(destination, filenames=None, directory=None, hist_names=None):
 
     if hist_names == None: # if histogram names are not provided
         hist_names = find_histograms(filenames[0])
-        hist_names = hist_names[0]
+
 
     file = uproot.open(filenames[0]) # This file may never close until the end...
     hist_name = hist_names
@@ -142,10 +146,11 @@ def hadd_like(destination, filenames=None, directory=None, hist_names=None):
             hist.member('fTsumwx2'),
             hist.member('fTsumwy'),
             hist.member('fTsumwy2'),
-            hist.member('fTsumxy'),
+            hist.member('fTsumwxy'),
             hist.variances(flow=True)
             ]
-        values = hist.values(flow=True)
+        values = np.array(hist.values(flow=True))
+        print(type(values))
         h_sum = add_2D_hists(filenames, hist.member('fName'), members, values, bins)
     elif len(hist.axes) == 3:
         members = [
@@ -156,14 +161,18 @@ def hadd_like(destination, filenames=None, directory=None, hist_names=None):
             hist.member('fTsumwx2'),
             hist.member('fTsumwy'),
             hist.member('fTsumwy2'),
-            hist.member('fTsumxy'),
+            hist.member('fTsumwxy'),
             hist.member('fTsumwz'),
             hist.member('fTsumwz2'),
             hist.member('fTsumwxz'),
             hist.member('fTsumwyz'),
             hist.variances(flow=True)
-            ]
-        values = hist.values(flow=True)
+        ]
+        values += np.array(hist.values(flow=True))
         h_sum = add_3D_hists(filenames, hist.member('fName'), members, values, bins)
     file_out = uproot.recreate(destination) # What compression level?
     file_out[h_sum.member("fName")] = h_sum
+
+
+hadd_like("place.root", filenames=["/Users/zobil/Documents/Proteus/tests/file1.root", "/Users/zobil/Documents/Proteus/tests/file2.root"], hist_names="name")
+# hadd_like("place.root",  directory="/Users/zobil/Documents/Proteus/tests/")

From 2bb8f4b922a8a88f2513541dd58b25844d656300 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Wed, 4 Oct 2023 11:56:31 +0200
Subject: [PATCH 04/22] Setting up tests and module

---
 src/proteus/__init__.py                  |   9 +-
 src/proteus/operations/__init__.py       |   1 +
 src/proteus/operations/add_histograms.py |  15 +-
 src/proteus/to_feather.py                |  65 ---------
 src/proteus/to_parquet.py                | 167 -----------------------
 tests/generate_hists_root.py             |  99 ++++++++++++++
 tests/test_add_histograms.py             |  47 +++++++
 7 files changed, 160 insertions(+), 243 deletions(-)
 create mode 100644 src/proteus/operations/__init__.py
 delete mode 100644 src/proteus/to_feather.py
 delete mode 100644 src/proteus/to_parquet.py
 create mode 100644 tests/generate_hists_root.py
 create mode 100644 tests/test_add_histograms.py

diff --git a/src/proteus/__init__.py b/src/proteus/__init__.py
index 6ef7cae..bf99a9b 100644
--- a/src/proteus/__init__.py
+++ b/src/proteus/__init__.py
@@ -7,6 +7,11 @@
 
 from __future__ import annotations
 
-from ._version import version as __version__
+from proteus._version import version as __version__
 
-__all__ = ("__version__",)
+from proteus.operations import add_histograms
+
+__all__ = [x for x in globals() if not x.startswith("_")]
+
+def __dir__():
+    return __all__
\ No newline at end of file
diff --git a/src/proteus/operations/__init__.py b/src/proteus/operations/__init__.py
new file mode 100644
index 0000000..1308244
--- /dev/null
+++ b/src/proteus/operations/__init__.py
@@ -0,0 +1 @@
+from proteus.operations.add_histograms import *
\ No newline at end of file
diff --git a/src/proteus/operations/add_histograms.py b/src/proteus/operations/add_histograms.py
index 1ee603d..595ea9e 100644
--- a/src/proteus/operations/add_histograms.py
+++ b/src/proteus/operations/add_histograms.py
@@ -134,13 +134,12 @@ def add_3D_hists(files, hist_name):
         values, *member_data, variances, hist.member("fXaxis"), hist.member("fYaxis"), hist.member("fZaxis"))
 
 def find_histograms(file):
-    # for i in filenames:
-        with uproot.open(file) as i:
-            array = i.classnames()
-            list = np.array([i for i in array if (array.get(i).startswith("TH1") or array.get(i).startswith("TH2") or array.get(i).startswith("TH3"))])
-        return list
+    with uproot.open(file) as i:
+        array = i.classnames()
+        list = np.array([i for i in array if (array.get(i).startswith("TH1") or array.get(i).startswith("TH2") or array.get(i).startswith("TH3"))])
+    return list
 
-def hadd_like(destination, files, hist_names=None):
+def add_hists(destination, files, hist_names=None):
     """
     Args:
         destination (path-like): Name of the output file or file path.
@@ -181,6 +180,4 @@ def hadd_like(destination, files, hist_names=None):
                 h_sum = add_2D_hists(files, name)
             elif num_axes == 3:
                 h_sum = add_3D_hists(files, name)
-            file_out[h_sum.member("fName")] = h_sum
-
-
+            file_out[h_sum.member("fName")] = h_sum
\ No newline at end of file
diff --git a/src/proteus/to_feather.py b/src/proteus/to_feather.py
deleted file mode 100644
index 8d7b065..0000000
--- a/src/proteus/to_feather.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import pyarrow as pa
-import pyarrow.parquet as pq
-import awkward as ak
-from fsspec import AbstractFileSystem
-
-def parquet_to_feather(
-    path,
-    new_path, # ?
-    *,
-    columns=None,
-    row_groups=None,
-    storage_options=None,
-    max_gap=64_000,
-    max_block=256_000_000,
-    footer_sample_size=1_000_000,
-    generate_bitmasks=False,
-    highlevel=True,
-    behavior=None,
-):
-    """
-    Args:
-        path (str): Local filename or remote URL, passed to fsspec for resolution.
-            May contain glob patterns.
-        columns (None, str, or list of str): Glob pattern(s) with bash-like curly
-            brackets for matching column names. Nested records are separated by dots.
-            If a list of patterns, the logical-or is matched. If None, all columns
-            are read.
-        row_groups (None or set of int): Row groups to read; must be non-negative.
-            Order is ignored: the output array is presented in the order specified by
-            Parquet metadata. If None, all row groups/all rows are read.
-        storage_options: Passed to `fsspec.parquet.open_parquet_file`.
-        max_gap (int): Passed to `fsspec.parquet.open_parquet_file`.
-        max_block (int): Passed to `fsspec.parquet.open_parquet_file`.
-        footer_sample_size (int): Passed to `fsspec.parquet.open_parquet_file`.
-        generate_bitmasks (bool): If enabled and Arrow/Parquet does not have Awkward
-            metadata, `generate_bitmasks=True` creates empty bitmasks for nullable
-            types that don't have bitmasks in the Arrow/Parquet data, so that the
-            Form (BitMaskedForm vs UnmaskedForm) is predictable.
-        highlevel (bool): If True, return an #ak.Array; otherwise, return
-            a low-level #ak.contents.Content subclass.
-        behavior (None or dict): Custom #ak.behavior for the output array, if
-            high-level.
-    
-    Reads data from a local or remote Parquet file a feather file (or a collection of feather files?).
-
-    Different from ak.to_parquet etc. because...
-    """
-    #create feather file?
-    
-    # read one page of parquet file
-    parquet_file = pq.ParquetFile(path) # does this put the whole thing in memory?
-    metadata = ak.metadata_from_parquet(path)
-    # parquet_metadata 
-    # read_row_group or with metadata?
-    # batch vs page? what size? 
-    # with metadata['fs'].open as fp: #why would this be necessary?
-    for batch in parquet_file.iter_batches():
-        pa.concat(new_path, ak.to_feather(new_path, batch)) #but this shouldn't be something that sets a var to a bigger file??
-
-
-
-
-    # write to feather file - find concat without 
-    # feather_file = pa.concat([ak.from_parquet(file, page) for page in pages],ignore_index=True)
-
diff --git a/src/proteus/to_parquet.py b/src/proteus/to_parquet.py
deleted file mode 100644
index a596f70..0000000
--- a/src/proteus/to_parquet.py
+++ /dev/null
@@ -1,167 +0,0 @@
-import dask_awkward as da
-import _collections_abc
-import awkward as ak
-import pyarrow.parquet
-
-# Feather to parquet first?
-def feather_to_parquet(
-    # array,
-    path,
-    *,
-    list_to32=False,
-    string_to32=True,
-    bytestring_to32=True,
-    emptyarray_to=None,
-    categorical_as_dictionary=False,
-    extensionarray=True,
-    count_nulls=True,
-    compression="zstd",
-    compression_level=None,
-    row_group_size=64 * 1024 * 1024,
-    data_page_size=None,
-    parquet_flavor=None,
-    parquet_version="2.4",
-    parquet_page_version="1.0",
-    parquet_metadata_statistics=True,
-    parquet_dictionary_encoding=False,
-    parquet_byte_stream_split=False,
-    parquet_coerce_timestamps=None,
-    parquet_old_int96_timestamps=None,
-    parquet_compliant_nested=False,  # https://issues.apache.org/jira/browse/ARROW-16348
-    parquet_extra_options=None,
-    storage_options=None,
-    # Potentially need:
-        # expressions=None,
-        # cut=None,
-        # filter_name = no_filter,
-        # filter_typename = no_filter,
-        # aliases=None,
-        # language=uproot.language.python.python_language,
-        # entry_start=None,
-        # entry_stop=None,
-        # step_size="100 MB",
-        # library="ak",
-        # how=None,
-):
-    
-    # Do in steps. can use argument "columns" to select amount - have it be the same as the 
-    # size of a page is in a parquet? Or step_size like in uproot's iterate?
-    # much to read, can choose bytes, tuple, str, list (not sure what most of those mean here)
-    # Data page size!
-    # First read ak.feather()
-    # Read feather also has columns...best to read a bit at a time and keep track?
-
-    """
-    Args:
-        array: Array-like data (anything #ak.to_layout recognizes).
-        destination (path-like): Name of the output file, file path, or
-            remote URL passed to [fsspec.core.url_to_fs](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.core.url_to_fs)
-            for remote writing.
-        list_to32 (bool): If True, convert Awkward lists into 32-bit Arrow lists
-            if they're small enough, even if it means an extra conversion. Otherwise,
-            signed 32-bit #ak.types.ListType maps to Arrow `ListType`,
-            signed 64-bit #ak.types.ListType maps to Arrow `LargeListType`,
-            and unsigned 32-bit #ak.types.ListType picks whichever Arrow type its
-            values fit into.
-        string_to32 (bool): Same as the above for Arrow `string` and `large_string`.
-        bytestring_to32 (bool): Same as the above for Arrow `binary` and `large_binary`.
-        emptyarray_to (None or dtype): If None, #ak.types.UnknownType maps to Arrow's
-            null type; otherwise, it is converted a given numeric dtype.
-        categorical_as_dictionary (bool): If True, #ak.contents.IndexedArray and
-            #ak.contents.IndexedOptionArray labeled with `__array__ = "categorical"`
-            are mapped to Arrow `DictionaryArray`; otherwise, the projection is
-            evaluated before conversion (always the case without
-            `__array__ = "categorical"`).
-        extensionarray (bool): If True, this function returns extended Arrow arrays
-            (at all levels of nesting), which preserve metadata so that Awkward \u2192
-            Arrow \u2192 Awkward preserves the array's #ak.types.Type (though not
-            the #ak.forms.Form). If False, this function returns generic Arrow arrays
-            that might be needed for third-party tools that don't recognize Arrow's
-            extensions. Even with `extensionarray=False`, the values produced by
-            Arrow's `to_pylist` method are the same as the values produced by Awkward's
-            #ak.to_list.
-        count_nulls (bool): If True, count the number of missing values at each level
-            and include these in the resulting Arrow array, which makes some downstream
-            applications faster. If False, skip the up-front cost of counting them.
-        compression (None, str, or dict): Compression algorithm name, passed to
-            [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
-            Parquet supports `{"NONE", "SNAPPY", "GZIP", "BROTLI", "LZ4", "ZSTD"}`
-            (where `"GZIP"` is also known as "zlib" or "deflate"). If a dict, the keys
-            are column names (the same column names that #ak.forms.Form.columns returns
-            and #ak.forms.Form.select_columns accepts) and the values are compression
-            algorithm names, to compress each column differently.
-        compression_level (None, int, or dict None): Compression level, passed to
-            [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
-            Compression levels have different meanings for different compression
-            algorithms: GZIP ranges from 1 to 9, but ZSTD ranges from -7 to 22, for
-            example. Generally, higher numbers provide slower but smaller compression.
-        row_group_size (int or None): Number of entries in each row group (except the last),
-            passed to [pyarrow.parquet.ParquetWriter.write_table](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html#pyarrow.parquet.ParquetWriter.write_table).
-            If None, the Parquet default of 64 MiB is used.
-        data_page_size (None or int): Number of bytes in each data page, passed to
-            [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
-            If None, the Parquet default of 1 MiB is used.
-        parquet_flavor (None or `"spark"`): If None, the output Parquet file will follow
-            Arrow conventions; if `"spark"`, it will follow Spark conventions. Some
-            systems, such as Spark and Google BigQuery, might need Spark conventions,
-            while others might need Arrow conventions. Passed to
-            [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
-            as `flavor`.
-        parquet_version (`"1.0"`, `"2.4"`, or `"2.6"`): Parquet file format version.
-            Passed to [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
-            as `version`.
-        parquet_page_version (`"1.0"` or `"2.0"`): Parquet page format version.
-            Passed to [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
-            as `data_page_version`.
-        parquet_metadata_statistics (bool or dict): If True, include summary
-            statistics for each data page in the Parquet metadata, which lets some
-            applications search for data more quickly (by skipping pages). If a dict
-            mapping column names to bool, include summary statistics on only the
-            specified columns. Passed to
-            [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
-            as `write_statistics`.
-        parquet_dictionary_encoding (bool or dict): If True, allow Parquet to pre-compress
-            with dictionary encoding. If a dict mapping column names to bool, only
-            use dictionary encoding on the specified columns. Passed to
-            [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
-            as `use_dictionary`.
-        parquet_byte_stream_split (bool or dict): If True, pre-compress floating
-            point fields (`float32` or `float64`) with byte stream splitting, which
-            collects all mantissas in one part of the stream and exponents in another.
-            Passed to [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
-            as `use_byte_stream_split`.
-        parquet_coerce_timestamps (None, `"ms"`, or `"us"`): If None, any timestamps
-            (`datetime64` data) are coerced to a given resolution depending on
-            `parquet_version`: version `"1.0"` and `"2.4"` are coerced to microseconds,
-            but later versions use the `datetime64`'s own units. If `"ms"` is explicitly
-            specified, timestamps are coerced to milliseconds; if `"us"`, microseconds.
-            Passed to [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
-            as `coerce_timestamps`.
-        parquet_old_int96_timestamps (None or bool): If True, use Parquet's INT96 format
-            for any timestamps (`datetime64` data), taking priority over `parquet_coerce_timestamps`.
-            If None, let the `parquet_flavor` decide. Passed to
-            [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
-            as `use_deprecated_int96_timestamps`.
-        parquet_compliant_nested (bool): If True, use the Spark/BigQuery/Parquet
-            [convention for nested lists](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#nested-types),
-            in which each list is a one-field record with field name "`element`";
-            otherwise, use the Arrow convention, in which the field name is "`item`".
-            Passed to [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
-            as `use_compliant_nested_type`.
-        parquet_extra_options (None or dict): Any additional options to pass to
-            [pyarrow.parquet.ParquetWriter](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html).
-        storage_options (None or dict): Any additional options to pass to
-            [fsspec.core.url_to_fs](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.core.url_to_fs)
-            to open a remote file for writing.
-            """
-    
-
-
-        # Some kind of for-structure where it checks if there is more of the file before 
-    parquet_writer = pq.ParquetWriter(path, ak.to_parquet(first_batch))
-    for i in hasNextPage:
-        parquet_writer.write_table(i)
-        
-        # class pyarrow.parquet.ParquetWriter(where, schema, filesystem=None, flavor=None, version='2.6', use_dictionary=True, compression='snappy', write_statistics=True, use_deprecated_int96_timestamps=None, compression_level=None, use_byte_stream_split=False, column_encoding=None, writer_engine_version=None, data_page_version='1.0', use_compliant_nested_type=True, encryption_properties=None, write_batch_size=None, dictionary_pagesize_limit=None, store_schema=True, write_page_index=False, **options)
-    parquet_writer.close()
-        
\ No newline at end of file
diff --git a/tests/generate_hists_root.py b/tests/generate_hists_root.py
new file mode 100644
index 0000000..a2499e8
--- /dev/null
+++ b/tests/generate_hists_root.py
@@ -0,0 +1,99 @@
+import ROOT
+import uproot
+
+# h1 = ROOT.TH1I("name", "title", 10, -4, 4)
+# h1.FillRandom("gaus")
+# h2 = ROOT.TH1I("name", "title", 10, -4, 4)
+# h2.FillRandom("gaus")
+
+def gen_gause_hists_uproot():
+    file_out = uproot.recreate("file1.root")
+    h1 = uproot.from_pyroot(h1)
+    h_1 = uproot.writing.identify.to_TH1x(h1.member("fName"),
+    h1.member("fTitle"),
+    h1.values(flow=True),
+    h1.member("fEntries"),
+    h1.member("fTsumw"),
+    h1.member("fTsumw2"),
+    h1.member("fTsumwx"),
+    h1.member("fTsumwx2"),
+    h1.variances(flow=True),
+    h1.member("fXaxis"),
+    )
+    print(h_1)
+    file_out[h_1.member("fName")] = h_1
+
+    file_out = uproot.recreate("file2.root")
+    h2 = uproot.from_pyroot(h2)
+    h_2 = uproot.writing.identify.to_TH1x(h2.member("fName"),
+    h2.member("fTitle"),
+    h2.values(flow=True),
+    h2.member("fEntries"),
+    h2.member("fTsumw"),
+    h2.member("fTsumw2"),
+    h2.member("fTsumwx"),
+    h2.member("fTsumwx2"),
+    h2.variances(flow=True),
+    h2.member("fXaxis"),
+    )
+
+    file_out[h_2.member("fName")] = h_2
+
+def gen_gaus_hists_pyroot(names, file_names):
+    # Will create histograms with same names and bins for files in file_names
+    for file in file_names:
+        for name in names:
+            h = ROOT.TH1I(name, name, 10, -4, 4)
+            h.FillRandom("gaus")
+            h.Sumw2()
+            h.SetDirectory(0)
+            outHistFile = ROOT.TFile.Open(file, "RECREATE")
+            outHistFile.cd()
+            h.Write()
+            outHistFile.Close()
+
+def gen_gaus_hists_pyroot():
+    h1 = ROOT.TH1I("name", "title", 10, -4, 4)
+    h1.FillRandom("gaus")
+    h1.Sumw2()
+    h1.SetDirectory(0)
+    outHistFile = ROOT.TFile.Open("file1.root", "RECREATE")
+    outHistFile.cd()
+    h1.Write()
+    outHistFile.Close()
+
+    h2 = ROOT.TH1I("name", "title", 10, -4, 4)
+    h2.FillRandom("gaus")
+    h2.Sumw2()
+    h2.SetDirectory(0)
+    outHistFile = ROOT.TFile.Open("file2.root", "RECREATE")
+    outHistFile.cd()
+    h2.Write()
+    outHistFile.Close()
+
+def gen_2dim_hists_pyroot(num_hists, num_files, names):
+    import numpy as np
+    xedges = [0, 1, 3, 5]
+    yedges = [0, 2, 3, 4, 6]
+    x = np.random.normal(2, 1, 100)
+    y = np.random.normal(1, 1, 100)
+    H, xedges, yedges = np.histogram2d(x, y, bins=(xedges, yedges))
+
+    h1 = ROOT.TH2I("name", "title", len(xedges), 0.0, 5.0, len(yedges), 0.0, 6.0)
+    h1.Sumw2()
+    h1.Fill(0,0)
+    h1.SetDirectory(0)
+    outHistFile = ROOT.TFile.Open("file2dim1.root", "RECREATE")
+    outHistFile.cd()
+    h1.Write()
+    outHistFile.Close()
+
+    
+    h2 = ROOT.TH2I("name", "title", len(xedges), 0.0, 5.0, len(yedges), 0.0, 6.0)
+    h2.Sumw2()
+    h2.Fill(0,0)
+    h2.SetDirectory(0)
+    outHistFile = ROOT.TFile.Open("file2dim2.root", "RECREATE")
+    outHistFile.cd()
+    h2.Write()
+    outHistFile.Close()
diff --git a/tests/test_add_histograms.py b/tests/test_add_histograms.py
new file mode 100644
index 0000000..18f63cd
--- /dev/null
+++ b/tests/test_add_histograms.py
@@ -0,0 +1,47 @@
+import os
+
+import pytest
+
+import proteus
+
+import ROOT
+
+def make_hists():
+    h1 = ROOT.TH1I("name", "title", 10, -4, 4)
+    h1.FillRandom("gaus")
+    h1.Sumw2()
+    h1.SetDirectory(0)
+    outHistFile = ROOT.TFile.Open("file1.root", "RECREATE")
+    outHistFile.cd()
+    h1.Write()
+    outHistFile.Close()
+
+    h2 = ROOT.TH1I("name", "title", 10, -4, 4)
+    h2.FillRandom("gaus")
+    h2.Sumw2()
+    h2.SetDirectory(0)
+    outHistFile = ROOT.TFile.Open("file2.root", "RECREATE")
+    outHistFile.cd()
+    h2.Write()
+    outHistFile.Close()
+
+    h3 = ROOT.TH1I("name", "title", 10, -4, 4)
+    h3.FillRandom("gaus")
+    h3.Sumw2()
+    h3.SetDirectory(0)
+    outHistFile = ROOT.TFile.Open("file3.root", "RECREATE")
+    outHistFile.cd()
+    h3.Write()
+    outHistFile.Close()
+
+def test_simple(tmp_path):
+    # 1-Dimensional Histograms, list of files, one histogram per file
+    destination = os.path.join(tmp_path, "destination.root")
+    make_hists()
+    proteus.operations.add_hists("place.root", filenames=["file1.root", "file2.root", "file3.root"], hist_names="name")
+
+    # assert get hists from destination file and compare?
+
+# hadd_like("place.root",  directory="/Users/zobil/Documents/Proteus/tests/")
+
+test_simple("/")
\ No newline at end of file

From a913db2f5f0ac47af0b3d1d6a137d2adcf03937a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Thu, 5 Oct 2023 11:08:50 +0200
Subject: [PATCH 05/22] small fixes for different cases, added tests

---
 src/proteus/__init__.py                  |  5 +-
 src/proteus/operations/add_histograms.py | 77 +++++++++++-----------
 tests/test_add_histograms.py             | 84 +++++++++++++++++++-----
 3 files changed, 106 insertions(+), 60 deletions(-)

diff --git a/src/proteus/__init__.py b/src/proteus/__init__.py
index bf99a9b..9ece269 100644
--- a/src/proteus/__init__.py
+++ b/src/proteus/__init__.py
@@ -11,7 +11,4 @@
 
 from proteus.operations import add_histograms
 
-__all__ = [x for x in globals() if not x.startswith("_")]
-
-def __dir__():
-    return __all__
\ No newline at end of file
+__all__ = ["add_histograms"]
\ No newline at end of file
diff --git a/src/proteus/operations/add_histograms.py b/src/proteus/operations/add_histograms.py
index 595ea9e..bff67bb 100644
--- a/src/proteus/operations/add_histograms.py
+++ b/src/proteus/operations/add_histograms.py
@@ -16,26 +16,26 @@ def add_1D_hists(files, hist_name):
                             hist.member('fTsumwx'),
                             hist.member('fTsumwx2'),
                            ])
-                variances = np.array(hist.variances(flow=True))
-                values = np.array(hist.values(flow=True))
+                variances = hist.variances(flow=True)
+                values = hist.values(flow=True)
             elif bins != hist.member('fN'):
                 raise ValueError(
                     "Bins must be equal, not ", bins, " and ", hist.member('fN')
                 )
-            if hist.member('fName') != hist_name:
+            elif hist.member('fName') != hist_name:
                 raise ValueError(
                     "Names must be the same, not " + hist_name + " and " + hist.member('fName')
                 )
-            
-            member_data += [
-                            hist.member('fEntries'), 
-                            hist.member('fTsumw'),
-                            hist.member('fTsumw2'),
-                            hist.member('fTsumwx'),
-                            hist.member('fTsumwx2')
-                           ]
-            variances += hist.variances(flow=True)
-            values += hist.values(flow=True)
+            else:
+                member_data += [
+                                hist.member('fEntries'), 
+                                hist.member('fTsumw'),
+                                hist.member('fTsumw2'),
+                                hist.member('fTsumwx'),
+                                hist.member('fTsumwx2')
+                            ]
+                variances += hist.variances(flow=True)
+                values += hist.values(flow=True)
     return uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"),
         values, *member_data, variances, hist.member("fXaxis"))
 
@@ -62,23 +62,23 @@ def add_2D_hists(files, hist_name):
                 raise ValueError(
                     "Bins must be equal, not ", bins, " and ", hist.member('fN')
                 )
-            if hist.member('fName') != hist_name:
+            elif hist.member('fName') != hist_name:
                 raise ValueError(
                     "Names must be the same, not " + hist_name + " and " + hist.member('fName')
                 )
-            
-            member_data += [
-                            hist.member('fEntries'), 
-                            hist.member('fTsumw'),
-                            hist.member('fTsumw2'),
-                            hist.member('fTsumwx'),
-                            hist.member('fTsumwx2'),
-                            hist.member('fTsumwy'),
-                            hist.member('fTsumwy2'),
-                            hist.member('fTsumwxy')
-                           ]
-            variances += hist.variances(flow=True)
-            values += hist.values(flow=True)
+            else:
+                member_data += [
+                                hist.member('fEntries'), 
+                                hist.member('fTsumw'),
+                                hist.member('fTsumw2'),
+                                hist.member('fTsumwx'),
+                                hist.member('fTsumwx2'),
+                                hist.member('fTsumwy'),
+                                hist.member('fTsumwy2'),
+                                hist.member('fTsumwxy')
+                            ]
+                variances += hist.variances(flow=True)
+                values += hist.values(flow=True)
     return uproot.writing.identify.to_TH2x(hist.member("fName"), hist.member("fTitle"),
         values, *member_data, variances, hist.member("fXaxis"), hist.member("fYaxis"))
 
@@ -109,12 +109,13 @@ def add_3D_hists(files, hist_name):
                 raise ValueError(
                     "Bins must be equal, not ", bins, " and ", hist.member('fN')
                 )
-            if hist.member('fName') != hist_name:
+            elif hist.member('fName') != hist_name:
                 raise ValueError(
                     "Names must be the same, not " + hist_name + " and " + hist.member('fName')
                 )
             
-            member_data += [
+            else:    
+                member_data += [
                             hist.member('fEntries'), 
                             hist.member('fTsumw'),
                             hist.member('fTsumw2'),
@@ -127,24 +128,23 @@ def add_3D_hists(files, hist_name):
                             hist.member('fTsumwz2'),
                             hist.member('fTsumwxz'),
                             hist.member('fTsumwyz')
-                           ]
-            variances += hist.variances(flow=True)
-            values += hist.values(flow=True)
+                        ]
+                variances += hist.variances(flow=True)
+                values += hist.values(flow=True)
     return uproot.writing.identify.to_TH2x(hist.member("fName"), hist.member("fTitle"),
         values, *member_data, variances, hist.member("fXaxis"), hist.member("fYaxis"), hist.member("fZaxis"))
 
 def find_histograms(file):
-    with uproot.open(file) as i:
-        array = i.classnames()
-        list = np.array([i for i in array if (array.get(i).startswith("TH1") or array.get(i).startswith("TH2") or array.get(i).startswith("TH3"))])
+    with uproot.open(file) as h:
+        array = h.classnames()
+        list = np.array([h[i].member("fName") for i in array if (array.get(i).startswith("TH1") or array.get(i).startswith("TH2") or array.get(i).startswith("TH3"))])
     return list
 
 def add_hists(destination, files, hist_names=None):
     """
     Args:
         destination (path-like): Name of the output file or file path.
-        filenames (None, or list of str): List of local ROOT files to read histograms from.
-        directory (None, str): Local path, may contain glob patterns 
+        files (Str or list of str): List of local ROOT files to read histograms from.
         hist_names (None, str, or list of str): Names of histograms to be added together, must be specified if files contain different numbers of histograms.
 
     Adds together histograms from local ROOT files of a collection of ROOT files, and writes them to one ROOT file.
@@ -157,9 +157,10 @@ def add_hists(destination, files, hist_names=None):
 
     if hist_names == None: # if histogram names are not provided
         hist_names = find_histograms(files[0])
+        # print(hist_names[0].member("fName"))
 
     with uproot.open(files[0]) as file: # This file may never close until the end...
-        hist = file[hist_names]
+        hist = file[hist_names[0]]
         num_axes = len(hist.axes)
 
     if type(hist_names) == str:
diff --git a/tests/test_add_histograms.py b/tests/test_add_histograms.py
index 18f63cd..8791a68 100644
--- a/tests/test_add_histograms.py
+++ b/tests/test_add_histograms.py
@@ -1,47 +1,95 @@
 import os
-
-import pytest
-
+import uproot
+import sys
+sys.path.append("/Users/zobil/Documents/Proteus/src/")
 import proteus
-
 import ROOT
+import numpy as np
 
-def make_hists():
-    h1 = ROOT.TH1I("name", "title", 10, -4, 4)
+def gen_1d_root(file_paths):
+    h1 = ROOT.TH1I("name", "title", 5, -4, 4)
     h1.FillRandom("gaus")
     h1.Sumw2()
     h1.SetDirectory(0)
-    outHistFile = ROOT.TFile.Open("file1.root", "RECREATE")
+    outHistFile = ROOT.TFile.Open(file_paths[0], "RECREATE")
     outHistFile.cd()
     h1.Write()
     outHistFile.Close()
+    h1 = uproot.from_pyroot(h1)
 
-    h2 = ROOT.TH1I("name", "title", 10, -4, 4)
+    h2 = ROOT.TH1I("name", "title", 5, -4, 4)
     h2.FillRandom("gaus")
     h2.Sumw2()
     h2.SetDirectory(0)
-    outHistFile = ROOT.TFile.Open("file2.root", "RECREATE")
+    outHistFile = ROOT.TFile.Open(file_paths[1], "RECREATE")
     outHistFile.cd()
     h2.Write()
     outHistFile.Close()
+    h2 = uproot.from_pyroot(h2)
 
-    h3 = ROOT.TH1I("name", "title", 10, -4, 4)
+    h3 = ROOT.TH1I("name", "title", 5, -4, 4)
     h3.FillRandom("gaus")
     h3.Sumw2()
     h3.SetDirectory(0)
-    outHistFile = ROOT.TFile.Open("file3.root", "RECREATE")
+    outHistFile = ROOT.TFile.Open(file_paths[2], "RECREATE")
     outHistFile.cd()
     h3.Write()
     outHistFile.Close()
+    h3 = uproot.from_pyroot(h3)
+    return h1, h2, h3
+
+def test_simple(tmp_path, file_paths):
+    h1, h2, h3 = gen_1d_root(file_paths)
 
-def test_simple(tmp_path):
-    # 1-Dimensional Histograms, list of files, one histogram per file
     destination = os.path.join(tmp_path, "destination.root")
-    make_hists()
-    proteus.operations.add_hists("place.root", filenames=["file1.root", "file2.root", "file3.root"], hist_names="name")
+    proteus.operations.add_hists(destination, ["tests/directory/file1.root", "tests/directory/file2.root"], hist_names="name")
+
+    with uproot.open(destination) as file:
+        assert file["name"].member("fN") == h1.member("fN")
+        assert file["name"].member("fTsumw") == h1.member("fTsumw") + h2.member("fTsumw") + h3.member("fTsumw")
+        assert np.equal(file["name"].values(flow=True), np.array(h1.values(flow=True) + h2.values(flow=True))).all
+
+def test_3_glob(tmp_path, file_paths):
+    h1, h2, h3 = gen_1d_root(file_paths)
+
+    # destination = os.path.join(tmp_path, "destination.root")
+    proteus.operations.add_hists(os.path.join(tmp_path, "place.root"), "tests/directory")
+    
+    with uproot.open("tests/place.root") as file:
+        assert file["name"].member("fN") == h1.member("fN")
+        assert file["name"].member("fTsumw") == h1.member("fTsumw") + h2.member("fTsumw") + h3.member("fTsumw")
+        assert np.equal(file["name"].values(flow=True), np.array(h1.values(flow=True) + h2.values(flow=True) + h3.values(flow=True))).all
+
+def test_2dim(tmp_path):
+    xedges = [0, 1, 3, 5]
+    yedges = [0, 2, 3, 4, 6]
+    x = np.random.normal(2, 1, 100)
+    y = np.random.normal(1, 1, 100)
+    H, xedges, yedges = np.histogram2d(x, y, bins=(xedges, yedges))
+
+    h1 = ROOT.TH2I("name", "title", len(xedges), 0.0, 5.0, len(yedges), 0.0, 6.0)
+    h1.Sumw2()
+    h1.Fill(0,0)
+    h1.SetDirectory(0)
+    outHistFile = ROOT.TFile.Open("tests/file2dim1.root", "RECREATE")
+    outHistFile.cd()
+    h1.Write()
+    outHistFile.Close()
+
+    h2 = ROOT.TH2I("name", "title", len(xedges), 0.0, 5.0, len(yedges), 0.0, 6.0)
+    h2.Sumw2()
+    h2.Fill(0,0)
+    h2.SetDirectory(0)
+    outHistFile = ROOT.TFile.Open("tests/file2dim2.root", "RECREATE")
+    outHistFile.cd()
+    h2.Write()
+    outHistFile.Close()
 
-    # assert get hists from destination file and compare?
+    proteus.operations.add_hists("tests/place2.root", ["file2dim1.root", "file2dim2.root"], hist_names="name")
 
-# hadd_like("place.root",  directory="/Users/zobil/Documents/Proteus/tests/")
+    with uproot.open("tests/place2.root") as file:
+        assert file["name"].member("fN") == h1.member("fN")
+        assert file["name"].member("fTsumw") == h1.member("fTsumw") + h2.member("fTsumw")
+        assert np.equal(file["name"].values(flow=True), np.array(h1.values(flow=True) + h2.values(flow=True))).all
 
-test_simple("/")
\ No newline at end of file
+test_3_glob("",["tests/directory/file1.root","tests/directory/file2.root","tests/directory/file3.root"])

From 2edd868e6e4556d5154e91dd93824bc0a2bcfead Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Thu, 5 Oct 2023 16:57:38 +0200
Subject: [PATCH 06/22] Added a sort of partial tree reduction and tests

---
 src/proteus/operations/add_histograms.py | 110 ++++++++++++++++++-----
 tests/test_add_histograms.py             |  19 +++-
 2 files changed, 107 insertions(+), 22 deletions(-)

diff --git a/src/proteus/operations/add_histograms.py b/src/proteus/operations/add_histograms.py
index bff67bb..4ba3ff0 100644
--- a/src/proteus/operations/add_histograms.py
+++ b/src/proteus/operations/add_histograms.py
@@ -1,12 +1,11 @@
 import uproot
 import numpy as np
-import awkward as ak
 
 def add_1D_hists(files, hist_name):
     bins = -1
     for path in files:
         with uproot.open(path) as file:
-            hist = file[hist_name]
+            hist = file[hist_name] # Try catch?
             if bins == -1:
                 bins = hist.member('fN')
                 member_data = np.array([
@@ -22,17 +21,17 @@ def add_1D_hists(files, hist_name):
                 raise ValueError(
                     "Bins must be equal, not ", bins, " and ", hist.member('fN')
                 )
-            elif hist.member('fName') != hist_name:
-                raise ValueError(
-                    "Names must be the same, not " + hist_name + " and " + hist.member('fName')
-                )
+            # elif hist.member('fName') != hist_name:
+            #     raise ValueError(
+            #         "Names must be the same, not " + hist_name + " and " + hist.member('fName')
+            #     )
             else:
                 member_data += [
                                 hist.member('fEntries'), 
                                 hist.member('fTsumw'),
                                 hist.member('fTsumw2'),
                                 hist.member('fTsumwx'),
-                                hist.member('fTsumwx2')
+                                hist.member('fTsumwx2'),
                             ]
                 variances += hist.variances(flow=True)
                 values += hist.values(flow=True)
@@ -134,13 +133,7 @@ def add_3D_hists(files, hist_name):
     return uproot.writing.identify.to_TH2x(hist.member("fName"), hist.member("fTitle"),
         values, *member_data, variances, hist.member("fXaxis"), hist.member("fYaxis"), hist.member("fZaxis"))
 
-def find_histograms(file):
-    with uproot.open(file) as h:
-        array = h.classnames()
-        list = np.array([h[i].member("fName") for i in array if (array.get(i).startswith("TH1") or array.get(i).startswith("TH2") or array.get(i).startswith("TH3"))])
-    return list
-
-def add_hists(destination, files, hist_names=None):
+def add_hists(destination, files, hist_names=None, tree_reduction=False):
     """
     Args:
         destination (path-like): Name of the output file or file path.
@@ -154,16 +147,23 @@ def add_hists(destination, files, hist_names=None):
         files = sorted(
             glob.glob(files + f"/**/*{'.root'}", recursive=True)
         )
-
+    
     if hist_names == None: # if histogram names are not provided
-        hist_names = find_histograms(files[0])
-        # print(hist_names[0].member("fName"))
+        with uproot.open(files[0]) as h:
+            array = h.classnames()
+            hist_names = np.array([h[i].member("fName") for i in array if (array.get(i).startswith("TH1") or array.get(i).startswith("TH2") or array.get(i).startswith("TH3"))])
+ 
 
-    with uproot.open(files[0]) as file: # This file may never close until the end...
-        hist = file[hist_names[0]]
+    with uproot.open(files[0]) as file:
+        hist = file[[str(hist_names)][0]]
         num_axes = len(hist.axes)
 
+    if tree_reduction == True:
+        h_sum = tree_reduction_add(files, hist_names)
+
     if type(hist_names) == str:
+        if tree_reduction == True:
+            h_sum = tree_reduction_add(files, hist_names)
         if num_axes == 1:
             h_sum = add_1D_hists(files, hist_names)
         elif num_axes == 2:
@@ -172,13 +172,83 @@ def add_hists(destination, files, hist_names=None):
             h_sum = add_3D_hists(files, hist_names)
         file_out = uproot.recreate(destination) # What compression level?
         file_out[h_sum.member("fName")] = h_sum
+
     else:
         file_out = uproot.recreate(destination) # What compression level? Would it still be recreate?
         for name in hist_names:
+            if tree_reduction == True:
+                h_sum = tree_reduction_add(files, name)
             if num_axes == 1:
                 h_sum = add_1D_hists(files, name)
             elif num_axes == 2:
                 h_sum = add_2D_hists(files, name)
             elif num_axes == 3:
                 h_sum = add_3D_hists(files, name)
-            file_out[h_sum.member("fName")] = h_sum
\ No newline at end of file
+            file_out[h_sum.member("fName")] = h_sum
+
+def tree_reduction_add(files, hist_name):
+    # *** Partial tree reduction...
+
+    # Get rid of need for all the dtype conversions?
+    indx = int(0) 
+    i = int(len(files)/2)
+    member_data = np.ndarray((i,5))
+    values, variances = np.ndarray(i), np.ndarray(i)
+
+    if type(files) != list: # Will this always work?
+        import glob
+        files = sorted(
+            glob.glob(files + f"/**/*{'.root'}", recursive=True)
+        )
+
+    x_axis = ""
+    title = ""
+    while indx+1 <= i:
+        with uproot.open(files[indx]) as file1:
+            with uproot.open(files[indx+1]) as file2:
+                    try:
+                        hist1, hist2 = file1[hist_name], file2[hist_name]
+                    except:
+                        raise ValueError("Names of histograms must all be the same.") # How get other hist name?
+                    title = hist1.member("fTitle")
+                    x_axis = hist1.member("fXaxis")
+                    hist1, hist2 = file1[hist_name], file2[hist_name]
+                    i = indx/int(2)
+                    member_data[:] = np.add(np.array([
+                            hist1.member('fEntries'), 
+                            hist1.member('fTsumw'),
+                            hist1.member('fTsumw2'),
+                            hist1.member('fTsumwx'),
+                            hist1.member('fTsumwx2'),
+                        ]), np.array([
+                            hist2.member('fEntries'), 
+                            hist2.member('fTsumw'),
+                            hist2.member('fTsumw2'),
+                            hist2.member('fTsumwx'),
+                            hist2.member('fTsumwx2'),
+                        ]))
+                    variances = np.add(hist1.variances(flow=True), hist2.variances(flow=True))
+                    values = np.add(hist1.values(flow=True), hist2.values(flow=True))
+                    indx+=2
+        if(len(files)%2==1):
+            with uproot.open(files[-1]) as file:
+                try:
+                    hist = file[hist_name]
+                except:
+                    raise ValueError("Names of histograms must all be the same.") # How get other hist name?
+
+                member_data[-1] += np.array([
+                                    hist.member('fEntries'), 
+                                    hist.member('fTsumw'),
+                                    hist.member('fTsumw2'),
+                                    hist.member('fTsumwx'),
+                                    hist.member('fTsumwx2'),
+                                ])
+                variances += hist.variances(flow=True)
+                values += hist.values(flow=True)
+        try:
+            return uproot.writing.identify.to_TH1x(hist_name, title, # pass Title? It may end up random
+                values, *np.sum(member_data, axis=0), variances, x_axis)
+        except:
+            print("Write failed.")
+            print("Bins must be the same size.") # Change!
\ No newline at end of file
diff --git a/tests/test_add_histograms.py b/tests/test_add_histograms.py
index 8791a68..84352c8 100644
--- a/tests/test_add_histograms.py
+++ b/tests/test_add_histograms.py
@@ -42,7 +42,18 @@ def test_simple(tmp_path, file_paths):
     h1, h2, h3 = gen_1d_root(file_paths)
 
     destination = os.path.join(tmp_path, "destination.root")
-    proteus.operations.add_hists(destination, ["tests/directory/file1.root", "tests/directory/file2.root"], hist_names="name")
+    proteus.operations.add_hists(destination, file_paths, hist_names="name", tree_reduction=False)
+
+    with uproot.open(destination) as file:
+        assert file["name"].member("fN") == h1.member("fN")
+        assert file["name"].member("fTsumw") == h1.member("fTsumw") + h2.member("fTsumw") + h3.member("fTsumw")
+        assert np.equal(file["name"].values(flow=True), np.array(h1.values(flow=True) + h2.values(flow=True))).all
+
+def test_tree_reduction(tmp_path, file_paths):
+    h1, h2, h3 = gen_1d_root(file_paths)
+
+    destination = os.path.join(tmp_path, "destination.root")
+    proteus.operations.add_hists(destination, file_paths, hist_names="name", tree_reduction=True)
 
     with uproot.open(destination) as file:
         assert file["name"].member("fN") == h1.member("fN")
@@ -90,6 +101,10 @@ def test_2dim(tmp_path):
     with uproot.open("tests/place2.root") as file:
         assert file["name"].member("fN") == h1.member("fN")
         assert file["name"].member("fTsumw") == h1.member("fTsumw") + h2.member("fTsumw")
+        print(file["name"].values(flow=True))
         assert np.equal(file["name"].values(flow=True), np.array(h1.values(flow=True) + h2.values(flow=True))).all
 
-test_3_glob("",["tests/directory/file1.root","tests/directory/file2.root","tests/directory/file3.root"])
+# def test_partial_tree_reduction():
+
+# test_3_glob("",["tests/directory/file1.root","tests/directory/file2.root","tests/directory/file3.root"])
+test_simple("",["tests/directory/file1.root","tests/directory/file2.root","tests/directory/file3.root"])
\ No newline at end of file

From 99815e323bd6fb56fb026808cabbe6c3f33dcb00 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Thu, 12 Oct 2023 16:23:08 +0200
Subject: [PATCH 07/22] rewrote to work smoother, added options from ROOT hadd
 and started commandline arguments

---
 src/proteus/operations/__init__.py       |   3 +-
 src/proteus/operations/add_histograms.py | 403 +++++++++++------------
 src/proteus/operations/hadd_like.py      | 178 ----------
 3 files changed, 199 insertions(+), 385 deletions(-)
 delete mode 100644 src/proteus/operations/hadd_like.py

diff --git a/src/proteus/operations/__init__.py b/src/proteus/operations/__init__.py
index 1308244..fcafb42 100644
--- a/src/proteus/operations/__init__.py
+++ b/src/proteus/operations/__init__.py
@@ -1 +1,2 @@
-from proteus.operations.add_histograms import *
\ No newline at end of file
+from proteus.operations.add_histograms import *
+from proteus.operations.temp import *
\ No newline at end of file
diff --git a/src/proteus/operations/add_histograms.py b/src/proteus/operations/add_histograms.py
index 4ba3ff0..91e6044 100644
--- a/src/proteus/operations/add_histograms.py
+++ b/src/proteus/operations/add_histograms.py
@@ -1,94 +1,115 @@
 import uproot
 import numpy as np
+import argparse
+import os
 
-def add_1D_hists(files, hist_name):
-    bins = -1
-    for path in files:
-        with uproot.open(path) as file:
-            hist = file[hist_name] # Try catch?
-            if bins == -1:
-                bins = hist.member('fN')
-                member_data = np.array([
-                            hist.member('fEntries'), 
-                            hist.member('fTsumw'),
-                            hist.member('fTsumw2'),
-                            hist.member('fTsumwx'),
-                            hist.member('fTsumwx2'),
-                           ])
-                variances = hist.variances(flow=True)
-                values = hist.values(flow=True)
-            elif bins != hist.member('fN'):
-                raise ValueError(
-                    "Bins must be equal, not ", bins, " and ", hist.member('fN')
-                )
-            # elif hist.member('fName') != hist_name:
-            #     raise ValueError(
-            #         "Names must be the same, not " + hist_name + " and " + hist.member('fName')
-            #     )
-            else:
-                member_data += [
-                                hist.member('fEntries'), 
-                                hist.member('fTsumw'),
-                                hist.member('fTsumw2'),
-                                hist.member('fTsumwx'),
-                                hist.member('fTsumwx2'),
-                            ]
-                variances += hist.variances(flow=True)
-                values += hist.values(flow=True)
-    return uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"),
-        values, *member_data, variances, hist.member("fXaxis"))
+def get_1d_data(hist):
+    return np.array([
+                    hist.member('fEntries'), 
+                    hist.member('fTsumw'),
+                    hist.member('fTsumw2'),
+                    hist.member('fTsumwx'),
+                    hist.member('fTsumwx2'),
+                    ])
+
+def add_1D_hists(destination, file, key, union, first, keys, skip_errors):
+    outfile = uproot.open(destination)
+    try:
+        hist = file[key] # Try catch?
+    except:
+        if union:
+            print('New key')
+            return keys.append(), None
+        elif skip_errors:
+            return keys, None
+        else:
+            ValueError("Histogram ", key, " missing from other files")
+    if first:
+        member_data = np.array([
+                        hist.member('fEntries'), 
+                        hist.member('fTsumw'),
+                        hist.member('fTsumw2'),
+                        hist.member('fTsumwx'),
+                        hist.member('fTsumwx2'),
+                    ])
+        return keys, uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"),
+            hist.values(flow=True), *member_data, hist.variances(flow=True), hist.member("fXaxis"))
+    elif hist.member('fN') == outfile[key].member('fN'):
+        member_data = np.array([
+                        hist.member('fEntries'), 
+                        hist.member('fTsumw'),
+                        hist.member('fTsumw2'),
+                        hist.member('fTsumwx'),
+                        hist.member('fTsumwx2'),
+                    ])
+        h_sum = uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"), outfile[key].values(flow=True) + hist.values(flow=True), 
+                        *np.add(np.array([outfile[key].member('fEntries'), outfile[key].member('fTsumw'), outfile[key].member('fTsumw2'), outfile[key].member('fTsumwx'), 
+                                    outfile[key].member('fTsumwx2')]), member_data), outfile[key].variances(flow=True) + hist.variances(flow=True), hist.member("fXaxis"))    
+        file.close()
+        return keys, h_sum
+
+
+def add_2D_hists(destination, file, key, union, first, keys, skip_errors):
+       # bins = -1
+    # for path in files:
+    outfile = uproot.open(destination)
+    # keys = {keys}
+    # for key in keys:
+    try:
+        hist = file[key] # Try catch?
+    except:
+        if union:
+            print('New key')
+            keys.append()
+        elif skip_errors:
+            return keys, None
+        else:
+            ValueError("Histogram ", key, " missing from other files")
+    if first:
+        member_data = np.array([
+                        hist.member('fEntries'), 
+                        hist.member('fTsumw'),
+                        hist.member('fTsumw2'),
+                        hist.member('fTsumwx'),
+                        hist.member('fTsumwx2'),
+                        hist.member('fTsumwy'),
+                        hist.member('fTsumwy2'),
+                        hist.member('fTsumwxy')
+                    ])
+        return keys, uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"),
+            np.ravel(hist.values(flow=True), order='C'), *member_data, np.ravel(hist.variances(flow=True), order='C'), hist.member("fXaxis"), hist.member("fYaxis"))
+    elif hist.member('fN') == outfile[key].member('fN'):
+        member_data = np.array([
+                        hist.member('fEntries'), 
+                        hist.member('fTsumw'),
+                        hist.member('fTsumw2'),
+                        hist.member('fTsumwx'),
+                        hist.member('fTsumwx2'),
+                        hist.member('fTsumwy'),
+                        hist.member('fTsumwy2'),
+                        hist.member('fTsumwxy')
+                    ])
+        h_sum = uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"), outfile[key].values(flow=True) + hist.values(flow=True), 
+                        *np.add(np.array([outfile[key].member('fEntries'), outfile[key].member('fTsumw'), outfile[key].member('fTsumw2'), outfile[key].member('fTsumwx'), 
+                                    outfile[key].member('fTsumwx2')]), member_data), outfile[key].variances(flow=True) + hist.variances(flow=True), hist.member("fXaxis"))    
+        file.close()
+        return keys, h_sum
 
-def add_2D_hists(files, hist_name):
-    bins = -1
-    for path in files:
-        with uproot.open(path) as file:
-            hist = file[hist_name]
-            if bins == -1:
-                bins = hist.member('fN')
-                member_data = np.array([
-                            hist.member('fEntries'), 
-                            hist.member('fTsumw'),
-                            hist.member('fTsumw2'),
-                            hist.member('fTsumwx'),
-                            hist.member('fTsumwx2'),
-                            hist.member('fTsumwy'),
-                            hist.member('fTsumwy2'),
-                            hist.member('fTsumwxy'),
-                           ])
-                variances = np.array(hist.variances(flow=True))
-                values = np.array(hist.values(flow=True))
-            elif bins != hist.member('fN'):
-                raise ValueError(
-                    "Bins must be equal, not ", bins, " and ", hist.member('fN')
-                )
-            elif hist.member('fName') != hist_name:
-                raise ValueError(
-                    "Names must be the same, not " + hist_name + " and " + hist.member('fName')
-                )
-            else:
-                member_data += [
-                                hist.member('fEntries'), 
-                                hist.member('fTsumw'),
-                                hist.member('fTsumw2'),
-                                hist.member('fTsumwx'),
-                                hist.member('fTsumwx2'),
-                                hist.member('fTsumwy'),
-                                hist.member('fTsumwy2'),
-                                hist.member('fTsumwxy')
-                            ]
-                variances += hist.variances(flow=True)
-                values += hist.values(flow=True)
-    return uproot.writing.identify.to_TH2x(hist.member("fName"), hist.member("fTitle"),
-        values, *member_data, variances, hist.member("fXaxis"), hist.member("fYaxis"))
 
-def add_3D_hists(files, hist_name):
-    bins = -1
-    for path in files:
-        with uproot.open(path) as file:
-            hist = file[hist_name]
-            if bins == -1:
-                bins = hist.member('fN')
-                member_data = np.array([
+def add_3D_hists(destination, file, key, union, first, keys, skip_errors):
+    outfile = uproot.open(destination)
+    try:
+        hist = file[key] # Try catch?
+    except:
+        if union:
+            print('New key')
+            return keys.append(), None
+        elif skip_errors:
+            return keys, None
+        else:
+            ValueError("Histogram ", key, " missing from other files")
+    if first:
+        member_data = np.array([
                             hist.member('fEntries'), 
                             hist.member('fTsumw'),
                             hist.member('fTsumw2'),
@@ -101,20 +122,11 @@ def add_3D_hists(files, hist_name):
                             hist.member('fTsumwz2'),
                             hist.member('fTsumwxz'),
                             hist.member('fTsumwyz')
-                           ])
-                variances = np.array(hist.variances(flow=True))
-                values = np.array(hist.values(flow=True))
-            elif bins != hist.member('fN'):
-                raise ValueError(
-                    "Bins must be equal, not ", bins, " and ", hist.member('fN')
-                )
-            elif hist.member('fName') != hist_name:
-                raise ValueError(
-                    "Names must be the same, not " + hist_name + " and " + hist.member('fName')
-                )
-            
-            else:    
-                member_data += [
+                    ])
+        return uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"),
+            np.ravel(hist.values(flow=True), order='C'), *member_data, np.ravel(hist.variances(flow=True), order='C'), hist.member("fXaxis"), hist.member("fYaxis"), hist.member("fZaxis"))
+    elif hist.member('fN') == outfile[key].member('fN'):
+        member_data = np.add(np.array([
                             hist.member('fEntries'), 
                             hist.member('fTsumw'),
                             hist.member('fTsumw2'),
@@ -127,128 +139,107 @@ def add_3D_hists(files, hist_name):
                             hist.member('fTsumwz2'),
                             hist.member('fTsumwxz'),
                             hist.member('fTsumwyz')
-                        ]
-                variances += hist.variances(flow=True)
-                values += hist.values(flow=True)
-    return uproot.writing.identify.to_TH2x(hist.member("fName"), hist.member("fTitle"),
-        values, *member_data, variances, hist.member("fXaxis"), hist.member("fYaxis"), hist.member("fZaxis"))
+                    ]), np.array(hist.member('fEntries'), 
+                            outfile[key].member('fTsumw'),
+                            outfile[key].member('fTsumw2'),
+                            outfile[key].member('fTsumwx'),
+                            outfile[key].member('fTsumwx2'),
+                            outfile[key].member('fTsumwy'),
+                            outfile[key].member('fTsumwy2'),
+                            outfile[key].member('fTsumwxy'),
+                            outfile[key].member('fTsumwz'),
+                            outfile[key].member('fTsumwz2'),
+                            outfile[key].member('fTsumwxz'),
+                            outfile[key].member('fTsumwyz')))
+        h_sum = uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"), np.ravel(outfile[key].values(flow=True) + hist.values(flow=True), order='C'), 
+                        *member_data, np.ravel((outfile[key].variances(flow=True) + hist.variances(flow=True)), order='C'), hist.member("fXaxis"), hist.member("fYaxis"), hist.member("fZaxis"))    
+        file.close()
+        return keys, h_sum
+
+def add_hists(
+        destination, 
+        files, 
+        *,
+        target_compression=1, 
+        tree_reduction=False, 
+        append=False, 
+        force=False, 
+        no_trees=True, 
+        skip_errors=False, 
+        max_opened_files=0,
+        union=False, # Union vs intersection
+        same_name_only=True
+    ):
 
-def add_hists(destination, files, hist_names=None, tree_reduction=False):
     """
     Args:
         destination (path-like): Name of the output file or file path.
         files (Str or list of str): List of local ROOT files to read histograms from.
         hist_names (None, str, or list of str): Names of histograms to be added together, must be specified if files contain different numbers of histograms.
-
+        force (bool): If True, overwrites destination file if it exists.
+        append (bool): If True, appends histograms to an existing file.
+        skip_errors (bool): If True, skips corrupt or non-existant files without exiting.
+        max_opened_files (int): Limits the number of files to be open at the same time. 
+        skip_extra (bool): If True, ignores histograms that are not in all files. If False, writes all histograms to destination file.
+        no_extra (bool): If True, throws an error if files do not have the same histograms.
+        
     Adds together histograms from local ROOT files of a collection of ROOT files, and writes them to one ROOT file.
     """
+    if os.path.isfile(destination):
+        if not force and not append:
+            raise FileExistsError
+        elif force and append:
+            raise ValueError("Cannot append to a new file. Either force or append can be true.")
+    if force:
+        file_out = uproot.recreate(destination)
+
     if type(files) != list: # Will this always work?
+        if files.endswith('.txt'):
+            readfile=readfile
         import glob
         files = sorted(
             glob.glob(files + f"/**/*{'.root'}", recursive=True)
         )
-    
-    if hist_names == None: # if histogram names are not provided
-        with uproot.open(files[0]) as h:
-            array = h.classnames()
-            hist_names = np.array([h[i].member("fName") for i in array if (array.get(i).startswith("TH1") or array.get(i).startswith("TH2") or array.get(i).startswith("TH3"))])
- 
-
-    with uproot.open(files[0]) as file:
-        hist = file[[str(hist_names)][0]]
-        num_axes = len(hist.axes)
-
-    if tree_reduction == True:
-        h_sum = tree_reduction_add(files, hist_names)
-
-    if type(hist_names) == str:
-        if tree_reduction == True:
-            h_sum = tree_reduction_add(files, hist_names)
-        if num_axes == 1:
-            h_sum = add_1D_hists(files, hist_names)
-        elif num_axes == 2:
-            h_sum = add_2D_hists(files, hist_names)
-        elif num_axes == 3:
-            h_sum = add_3D_hists(files, hist_names)
-        file_out = uproot.recreate(destination) # What compression level?
-        file_out[h_sum.member("fName")] = h_sum
 
+    if no_trees:
+        with uproot.open(files[0]) as file:
+            # iterclassnames ? https://uproot.readthedocs.io/en/latest/uproot.reading.ReadOnlyDirectory.html
+            keys = file.keys(cycle=False) 
+            print(type(keys))
+            keys_axes = dict(zip(keys, (len(file[i].axes) for i in keys)))
+            # print(file.classnames())
     else:
-        file_out = uproot.recreate(destination) # What compression level? Would it still be recreate?
-        for name in hist_names:
-            if tree_reduction == True:
-                h_sum = tree_reduction_add(files, name)
-            if num_axes == 1:
-                h_sum = add_1D_hists(files, name)
-            elif num_axes == 2:
-                h_sum = add_2D_hists(files, name)
-            elif num_axes == 3:
-                h_sum = add_3D_hists(files, name)
-            file_out[h_sum.member("fName")] = h_sum
-
-def tree_reduction_add(files, hist_name):
-    # *** Partial tree reduction...
+        with uproot.open(files[0]) as file:
+            #filter for both TTrees and histograms
+            keys = file.keys(filter_classname='[TH[1|2|3][I|S|F|D|C]|TTREE]', cycle=False) # Actually might account for subdirectories and everything? https://uproot.readthedocs.io/en/latest/basic.html#finding-objects-in-a-file
+            keys_axes = dict(zip(keys, (len(file[i].axes) for i in keys)))
 
-    # Get rid of need for all the dtype conversions?
-    indx = int(0) 
-    i = int(len(files)/2)
-    member_data = np.ndarray((i,5))
-    values, variances = np.ndarray(i), np.ndarray(i)
+    first = True
+    for file in files:
+        try:
+            file = uproot.open(file)
+        except:
+            Warning("File: " + {file} + " does not exist or is corrupt.")
+            continue
+        for key in keys:
+            if keys_axes[key] == 1:
+                keys, h_sum = add_1D_hists(destination, file, key, union, first, keys, skip_errors)
+            elif keys_axes[key] == 2:
+                keys, h_sum = add_2D_hists(destination, file, key, union, first, keys, skip_errors)
+            else:
+                keys, h_sum = add_3D_hists(destination, file, key, union, first, keys, skip_errors)
+            if h_sum != None:
+                file_out[key] = h_sum
+            first = False
+        file.close()
 
-    if type(files) != list: # Will this always work?
-        import glob
-        files = sorted(
-            glob.glob(files + f"/**/*{'.root'}", recursive=True)
-        )
+def args():
+    argparser = argparse.ArgumentParser(description="Hadd ROOT histograms with Uproot")
+    argparser.add_argument("destination", type=str, help="path of output file")
+    argparser.add_argument("input_files", type=str, nargs="+", help="list or directory (glob syntax accepted) of input files")
+    argparser.add_argument("-f", action="store_true",default=False, help="force overwrite of output file")
 
-    x_axis = ""
-    title = ""
-    while indx+1 <= i:
-        with uproot.open(files[indx]) as file1:
-            with uproot.open(files[indx+1]) as file2:
-                    try:
-                        hist1, hist2 = file1[hist_name], file2[hist_name]
-                    except:
-                        raise ValueError("Names of histograms must all be the same.") # How get other hist name?
-                    title = hist1.member("fTitle")
-                    x_axis = hist1.member("fXaxis")
-                    hist1, hist2 = file1[hist_name], file2[hist_name]
-                    i = indx/int(2)
-                    member_data[:] = np.add(np.array([
-                            hist1.member('fEntries'), 
-                            hist1.member('fTsumw'),
-                            hist1.member('fTsumw2'),
-                            hist1.member('fTsumwx'),
-                            hist1.member('fTsumwx2'),
-                        ]), np.array([
-                            hist2.member('fEntries'), 
-                            hist2.member('fTsumw'),
-                            hist2.member('fTsumw2'),
-                            hist2.member('fTsumwx'),
-                            hist2.member('fTsumwx2'),
-                        ]))
-                    variances = np.add(hist1.variances(flow=True), hist2.variances(flow=True))
-                    values = np.add(hist1.values(flow=True), hist2.values(flow=True))
-                    indx+=2
-        if(len(files)%2==1):
-            with uproot.open(files[-1]) as file:
-                try:
-                    hist = file[hist_name]
-                except:
-                    raise ValueError("Names of histograms must all be the same.") # How get other hist name?
+def tree_reduction(max_opened_files):
+    # Root checks system max opened files
+    work = work
 
-                member_data[-1] += np.array([
-                                    hist.member('fEntries'), 
-                                    hist.member('fTsumw'),
-                                    hist.member('fTsumw2'),
-                                    hist.member('fTsumwx'),
-                                    hist.member('fTsumwx2'),
-                                ])
-                variances += hist.variances(flow=True)
-                values += hist.values(flow=True)
-        try:
-            return uproot.writing.identify.to_TH1x(hist_name, title, # pass Title? It may end up random
-                values, *np.sum(member_data, axis=0), variances, x_axis)
-        except:
-            print("Write failed.")
-            print("Bins must be the same size.") # Change!
\ No newline at end of file
diff --git a/src/proteus/operations/hadd_like.py b/src/proteus/operations/hadd_like.py
deleted file mode 100644
index 354a2fc..0000000
--- a/src/proteus/operations/hadd_like.py
+++ /dev/null
@@ -1,178 +0,0 @@
-import uproot
-import numpy as np
-
-
-def add_1D_hists(files, hist_name, members, values, bins):
-    for path in files[1:]:
-        with uproot.open(path) as file:
-            hist = file[hist_name]
-            if bins != hist.member('fN'):
-                raise ValueError(
-                    "Bins must be equal, not ", bins, " and ", hist.member('fN')
-                )
-            if hist.member('fName') != hist_name:
-                raise ValueError(
-                    "Names must be the same, not " + hist_name + " and " + hist.member('fName')
-                )
-            
-            temp_members = [
-                            hist.member('fEntries'), 
-                            hist.member('fTsumw'),
-                            hist.member('fTsumw2'),
-                            hist.member('fTsumwx'),
-                            hist.member('fTsumwx2'),
-                            hist.variances(flow=True)
-                           ]
-            
-            values += hist.values(flow=True)
-            members += temp_members
-    return uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"), values,
-                                            *members[0:6], hist.member("fXaxis"))
-
-def add_2D_hists(files, hist_name, members, values, bins):
-    for path in files[1:]:
-        with uproot.open(path) as file:
-            if hist_names == None: # if histogram names are not provided
-                hist_names = find_histograms(path)
-            hist = file[hist_name]
-            print(path)
-            if bins != hist.member('fN'):
-                raise ValueError(
-                    "Bins must be equal, not ", bins, " and ", hist.member('fN')
-                )
-            if hist.member('fName') != hist_name:
-                raise ValueError(
-                    "Names must be the same, not " + hist_name + " and " + hist.member('fName')
-                )
-            
-            temp_members = [
-                            hist.member('fEntries'), 
-                            hist.member('fTsumw'),
-                            hist.member('fTsumw2'),
-                            hist.member('fTsumwx'),
-                            hist.member('fTsumwx2'),
-                            hist.member('fTsumwy'),
-                            hist.member('fTsumwy2'),
-                            hist.member('fTsumwxy'),
-                            hist.variances(flow=True)
-                           ]
-            members += temp_members
-            values += np.array(hist.values(flow=True))
-            print(type(values))
-    return uproot.writing.identify.to_TH2x(hist.member("fName"), hist.member("fTitle"), values, 
-        *members,
-        hist.member("fXaxis"), hist.member("fYaxis"))
-
-def add_3D_hists(files, hist_names, values, members, bins):
-    for path in files[1:]:
-        with uproot.open(path) as file:
-            hist = file[hist_names]
-            if members['fN'] != hist.member('fN'):
-                raise ValueError(
-                     "Bins must be equal, not ", bins, " and ", hist.member('fN')
-                )
-            if hist.member('fName') != hist_names:
-                raise ValueError(
-                    "Names must be the same, not " + hist.member('fName') + " and " + hist.member('fName')
-                )
-            temp_members = [
-                hist.member('fEntries'), 
-                hist.member('fTsumw'),
-                hist.member('fTsumw2'),
-                hist.member('fTsumwx'),
-                hist.member('fTsumwx2'),
-                hist.member('fTsumwy'),
-                hist.member('fTsumwy2'),
-                hist.member('fTsumwxy'),
-                hist.member('fTsumwz'),
-                hist.member('fTsumwz2'),
-                hist.member('fTsumwxz'),
-                hist.member('fTsumwyz'),
-                hist.variances(flow=True)
-            ]
-        members += temp_members
-        values += hist.values(flow=True)
-    return uproot.writing.identify.to_TH3x(hist.member("fName"), hist.member("fTitle"), values,
-                            *members[0:14], hist.member("fXaxis"), hist.member("fYaxis"), hist.member("fZaxis"))
-
-def find_histograms(file):
-    # for i in filenames:
-        with uproot.open(file) as i:
-            array = i.classnames()
-            list = np.array([i for i in array if (array.get(i).startswith("TH1") or array.get(i).startswith("TH2") or array.get(i).startswith("TH3"))])
-        return list
-
-def hadd_like(destination, filenames=None, directory=None, hist_names=None):
-    """
-    Args:
-        destination (path-like): Name of the output file or file path.
-        filenames (None, or list of str): List of local ROOT files to read histograms from.
-        directory (None, str): Local path, may contain glob patterns 
-        hist_names (None, str, or list of str): Names of histograms to be added together. 
-
-    Adds together histograms from local ROOT files of a collection of ROOT files, and writes them to one ROOT file.
-    """
-    if directory!=None: # Merge directory and filenames arguments?
-        import glob
-        filenames = sorted(
-            glob.glob(directory + f"/**/*{'.root'}", recursive=True)
-        )
-
-    if hist_names == None: # if histogram names are not provided
-        hist_names = find_histograms(filenames[0])
-
-
-    file = uproot.open(filenames[0]) # This file may never close until the end...
-    hist_name = hist_names
-    hist = file[hist_name]
-    bins = hist.member('fN')
-    if len(hist.axes) == 1:
-        members = [
-                    hist.member('fEntries'), 
-                    hist.member('fTsumw'),
-                    hist.member('fTsumw2'),
-                    hist.member('fTsumwx'),
-                    hist.member('fTsumwx2'),
-                    hist.variances(flow=True)
-                ]
-        values = hist.values(flow=True)
-        h_sum = add_1D_hists(filenames, hist.member('fName'), members, values, bins)
-    elif len(hist.axes) == 2:
-        members = [
-            hist.member('fEntries'), 
-            hist.member('fTsumw'),
-            hist.member('fTsumw2'),
-            hist.member('fTsumwx'),
-            hist.member('fTsumwx2'),
-            hist.member('fTsumwy'),
-            hist.member('fTsumwy2'),
-            hist.member('fTsumwxy'),
-            hist.variances(flow=True)
-            ]
-        values = np.array(hist.values(flow=True))
-        print(type(values))
-        h_sum = add_2D_hists(filenames, hist.member('fName'), members, values, bins)
-    elif len(hist.axes) == 3:
-        members = [
-            hist.member('fEntries'), 
-            hist.member('fTsumw'),
-            hist.member('fTsumw2'),
-            hist.member('fTsumwx'),
-            hist.member('fTsumwx2'),
-            hist.member('fTsumwy'),
-            hist.member('fTsumwy2'),
-            hist.member('fTsumwxy'),
-            hist.member('fTsumwz'),
-            hist.member('fTsumwz2'),
-            hist.member('fTsumwxz'),
-            hist.member('fTsumwyz'),
-            hist.variances(flow=True)
-        ]
-        values += np.array(hist.values(flow=True))
-        h_sum = add_3D_hists(filenames, hist.member('fName'), members, values, bins)
-    file_out = uproot.recreate(destination) # What compression level?
-    file_out[h_sum.member("fName")] = h_sum
-
-
-hadd_like("place.root", filenames=["/Users/zobil/Documents/Proteus/tests/file1.root", "/Users/zobil/Documents/Proteus/tests/file2.root"], hist_names="name")
-# hadd_like("place.root",  directory="/Users/zobil/Documents/Proteus/tests/")

From c121ebe782c5b31a2a2aa871c7a1a0e903634561 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Mon, 16 Oct 2023 09:47:34 +0200
Subject: [PATCH 08/22] name change

---
 src/{proteus => odapt}/__init__.py                   | 5 +++--
 src/{proteus => odapt}/_version.pyi                  | 0
 src/odapt/operations/__init__.py                     | 1 +
 src/{proteus => odapt}/operations/add_histograms.py  | 0
 src/{proteus => odapt}/operations/parquet_to_root.py | 0
 src/{proteus => odapt}/py.typed                      | 0
 src/proteus/operations/__init__.py                   | 2 --
 tests/test_package.py                                | 4 ++--
 8 files changed, 6 insertions(+), 6 deletions(-)
 rename src/{proteus => odapt}/__init__.py (62%)
 rename src/{proteus => odapt}/_version.pyi (100%)
 create mode 100644 src/odapt/operations/__init__.py
 rename src/{proteus => odapt}/operations/add_histograms.py (100%)
 rename src/{proteus => odapt}/operations/parquet_to_root.py (100%)
 rename src/{proteus => odapt}/py.typed (100%)
 delete mode 100644 src/proteus/operations/__init__.py

diff --git a/src/proteus/__init__.py b/src/odapt/__init__.py
similarity index 62%
rename from src/proteus/__init__.py
rename to src/odapt/__init__.py
index 9ece269..a9308c7 100644
--- a/src/proteus/__init__.py
+++ b/src/odapt/__init__.py
@@ -5,10 +5,11 @@
 """
 
 
+
 from __future__ import annotations
 
-from proteus._version import version as __version__
+from odapt._version import version as __version__
 
-from proteus.operations import add_histograms
+from odapt.operations import add_histograms
 
 __all__ = ["add_histograms"]
\ No newline at end of file
diff --git a/src/proteus/_version.pyi b/src/odapt/_version.pyi
similarity index 100%
rename from src/proteus/_version.pyi
rename to src/odapt/_version.pyi
diff --git a/src/odapt/operations/__init__.py b/src/odapt/operations/__init__.py
new file mode 100644
index 0000000..0cc8d38
--- /dev/null
+++ b/src/odapt/operations/__init__.py
@@ -0,0 +1 @@
+from odapt.operations.add_histograms import *
\ No newline at end of file
diff --git a/src/proteus/operations/add_histograms.py b/src/odapt/operations/add_histograms.py
similarity index 100%
rename from src/proteus/operations/add_histograms.py
rename to src/odapt/operations/add_histograms.py
diff --git a/src/proteus/operations/parquet_to_root.py b/src/odapt/operations/parquet_to_root.py
similarity index 100%
rename from src/proteus/operations/parquet_to_root.py
rename to src/odapt/operations/parquet_to_root.py
diff --git a/src/proteus/py.typed b/src/odapt/py.typed
similarity index 100%
rename from src/proteus/py.typed
rename to src/odapt/py.typed
diff --git a/src/proteus/operations/__init__.py b/src/proteus/operations/__init__.py
deleted file mode 100644
index fcafb42..0000000
--- a/src/proteus/operations/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from proteus.operations.add_histograms import *
-from proteus.operations.temp import *
\ No newline at end of file
diff --git a/tests/test_package.py b/tests/test_package.py
index 11c9493..7abc07f 100644
--- a/tests/test_package.py
+++ b/tests/test_package.py
@@ -2,8 +2,8 @@
 
 import importlib.metadata
 
-import proteus as m
+import odapt as m
 
 
 def test_version():
-    assert importlib.metadata.version("proteus") == m.__version__
+    assert importlib.metadata.version("odapt") == m.__version__

From a27ad884466fafd5244bc2162eea2f27bba12ffa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Mon, 16 Oct 2023 09:54:32 +0200
Subject: [PATCH 09/22] removing extra changes

---
 src/odapt/operations/__init__.py        |   1 -
 src/odapt/operations/add_histograms.py  | 245 ------------------------
 src/odapt/operations/parquet_to_root.py |  20 --
 tests/generate_hists_root.py            |  99 ----------
 tests/test_add_histograms.py            | 110 -----------
 5 files changed, 475 deletions(-)
 delete mode 100644 src/odapt/operations/__init__.py
 delete mode 100644 src/odapt/operations/add_histograms.py
 delete mode 100644 src/odapt/operations/parquet_to_root.py
 delete mode 100644 tests/generate_hists_root.py
 delete mode 100644 tests/test_add_histograms.py

diff --git a/src/odapt/operations/__init__.py b/src/odapt/operations/__init__.py
deleted file mode 100644
index 0cc8d38..0000000
--- a/src/odapt/operations/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from odapt.operations.add_histograms import *
\ No newline at end of file
diff --git a/src/odapt/operations/add_histograms.py b/src/odapt/operations/add_histograms.py
deleted file mode 100644
index 91e6044..0000000
--- a/src/odapt/operations/add_histograms.py
+++ /dev/null
@@ -1,245 +0,0 @@
-import uproot
-import numpy as np
-import argparse
-import os
-
-def get_1d_data(hist):
-    return np.array([
-                    hist.member('fEntries'), 
-                    hist.member('fTsumw'),
-                    hist.member('fTsumw2'),
-                    hist.member('fTsumwx'),
-                    hist.member('fTsumwx2'),
-                    ])
-
-def add_1D_hists(destination, file, key, union, first, keys, skip_errors):
-    outfile = uproot.open(destination)
-    try:
-        hist = file[key] # Try catch?
-    except:
-        if union:
-            print('New key')
-            return keys.append(), None
-        elif skip_errors:
-            return keys, None
-        else:
-            ValueError("Histogram ", key, " missing from other files")
-    if first:
-        member_data = np.array([
-                        hist.member('fEntries'), 
-                        hist.member('fTsumw'),
-                        hist.member('fTsumw2'),
-                        hist.member('fTsumwx'),
-                        hist.member('fTsumwx2'),
-                    ])
-        return keys, uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"),
-            hist.values(flow=True), *member_data, hist.variances(flow=True), hist.member("fXaxis"))
-    elif hist.member('fN') == outfile[key].member('fN'):
-        member_data = np.array([
-                        hist.member('fEntries'), 
-                        hist.member('fTsumw'),
-                        hist.member('fTsumw2'),
-                        hist.member('fTsumwx'),
-                        hist.member('fTsumwx2'),
-                    ])
-        h_sum = uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"), outfile[key].values(flow=True) + hist.values(flow=True), 
-                        *np.add(np.array([outfile[key].member('fEntries'), outfile[key].member('fTsumw'), outfile[key].member('fTsumw2'), outfile[key].member('fTsumwx'), 
-                                    outfile[key].member('fTsumwx2')]), member_data), outfile[key].variances(flow=True) + hist.variances(flow=True), hist.member("fXaxis"))    
-        file.close()
-        return keys, h_sum
-
-
-def add_2D_hists(destination, file, key, union, first, keys, skip_errors):
-       # bins = -1
-    # for path in files:
-    outfile = uproot.open(destination)
-    # keys = {keys}
-    # for key in keys:
-    try:
-        hist = file[key] # Try catch?
-    except:
-        if union:
-            print('New key')
-            keys.append()
-        elif skip_errors:
-            return keys, None
-        else:
-            ValueError("Histogram ", key, " missing from other files")
-    if first:
-        member_data = np.array([
-                        hist.member('fEntries'), 
-                        hist.member('fTsumw'),
-                        hist.member('fTsumw2'),
-                        hist.member('fTsumwx'),
-                        hist.member('fTsumwx2'),
-                        hist.member('fTsumwy'),
-                        hist.member('fTsumwy2'),
-                        hist.member('fTsumwxy')
-                    ])
-        return keys, uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"),
-            np.ravel(hist.values(flow=True), order='C'), *member_data, np.ravel(hist.variances(flow=True), order='C'), hist.member("fXaxis"), hist.member("fYaxis"))
-    elif hist.member('fN') == outfile[key].member('fN'):
-        member_data = np.array([
-                        hist.member('fEntries'), 
-                        hist.member('fTsumw'),
-                        hist.member('fTsumw2'),
-                        hist.member('fTsumwx'),
-                        hist.member('fTsumwx2'),
-                        hist.member('fTsumwy'),
-                        hist.member('fTsumwy2'),
-                        hist.member('fTsumwxy')
-                    ])
-        h_sum = uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"), outfile[key].values(flow=True) + hist.values(flow=True), 
-                        *np.add(np.array([outfile[key].member('fEntries'), outfile[key].member('fTsumw'), outfile[key].member('fTsumw2'), outfile[key].member('fTsumwx'), 
-                                    outfile[key].member('fTsumwx2')]), member_data), outfile[key].variances(flow=True) + hist.variances(flow=True), hist.member("fXaxis"))    
-        file.close()
-        return keys, h_sum
-
-
-def add_3D_hists(destination, file, key, union, first, keys, skip_errors):
-    outfile = uproot.open(destination)
-    try:
-        hist = file[key] # Try catch?
-    except:
-        if union:
-            print('New key')
-            return keys.append(), None
-        elif skip_errors:
-            return keys, None
-        else:
-            ValueError("Histogram ", key, " missing from other files")
-    if first:
-        member_data = np.array([
-                            hist.member('fEntries'), 
-                            hist.member('fTsumw'),
-                            hist.member('fTsumw2'),
-                            hist.member('fTsumwx'),
-                            hist.member('fTsumwx2'),
-                            hist.member('fTsumwy'),
-                            hist.member('fTsumwy2'),
-                            hist.member('fTsumwxy'),
-                            hist.member('fTsumwz'),
-                            hist.member('fTsumwz2'),
-                            hist.member('fTsumwxz'),
-                            hist.member('fTsumwyz')
-                    ])
-        return uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"),
-            np.ravel(hist.values(flow=True), order='C'), *member_data, np.ravel(hist.variances(flow=True), order='C'), hist.member("fXaxis"), hist.member("fYaxis"), hist.member("fZaxis"))
-    elif hist.member('fN') == outfile[key].member('fN'):
-        member_data = np.add(np.array([
-                            hist.member('fEntries'), 
-                            hist.member('fTsumw'),
-                            hist.member('fTsumw2'),
-                            hist.member('fTsumwx'),
-                            hist.member('fTsumwx2'),
-                            hist.member('fTsumwy'),
-                            hist.member('fTsumwy2'),
-                            hist.member('fTsumwxy'),
-                            hist.member('fTsumwz'),
-                            hist.member('fTsumwz2'),
-                            hist.member('fTsumwxz'),
-                            hist.member('fTsumwyz')
-                    ]), np.array(hist.member('fEntries'), 
-                            outfile[key].member('fTsumw'),
-                            outfile[key].member('fTsumw2'),
-                            outfile[key].member('fTsumwx'),
-                            outfile[key].member('fTsumwx2'),
-                            outfile[key].member('fTsumwy'),
-                            outfile[key].member('fTsumwy2'),
-                            outfile[key].member('fTsumwxy'),
-                            outfile[key].member('fTsumwz'),
-                            outfile[key].member('fTsumwz2'),
-                            outfile[key].member('fTsumwxz'),
-                            outfile[key].member('fTsumwyz')))
-        h_sum = uproot.writing.identify.to_TH1x(hist.member("fName"), hist.member("fTitle"), np.ravel(outfile[key].values(flow=True) + hist.values(flow=True), order='C'), 
-                        *member_data, np.ravel((outfile[key].variances(flow=True) + hist.variances(flow=True)), order='C'), hist.member("fXaxis"), hist.member("fYaxis"), hist.member("fZaxis"))    
-        file.close()
-        return keys, h_sum
-
-def add_hists(
-        destination, 
-        files, 
-        *,
-        target_compression=1, 
-        tree_reduction=False, 
-        append=False, 
-        force=False, 
-        no_trees=True, 
-        skip_errors=False, 
-        max_opened_files=0,
-        union=False, # Union vs intersection
-        same_name_only=True
-    ):
-
-    """
-    Args:
-        destination (path-like): Name of the output file or file path.
-        files (Str or list of str): List of local ROOT files to read histograms from.
-        hist_names (None, str, or list of str): Names of histograms to be added together, must be specified if files contain different numbers of histograms.
-        force (bool): If True, overwrites destination file if it exists.
-        append (bool): If True, appends histograms to an existing file.
-        skip_errors (bool): If True, skips corrupt or non-existant files without exiting.
-        max_opened_files (int): Limits the number of files to be open at the same time. 
-        skip_extra (bool): If True, ignores histograms that are not in all files. If False, writes all histograms to destination file.
-        no_extra (bool): If True, throws an error if files do not have the same histograms.
-        
-    Adds together histograms from local ROOT files of a collection of ROOT files, and writes them to one ROOT file.
-    """
-    if os.path.isfile(destination):
-        if not force and not append:
-            raise FileExistsError
-        elif force and append:
-            raise ValueError("Cannot append to a new file. Either force or append can be true.")
-    if force:
-        file_out = uproot.recreate(destination)
-
-    if type(files) != list: # Will this always work?
-        if files.endswith('.txt'):
-            readfile=readfile
-        import glob
-        files = sorted(
-            glob.glob(files + f"/**/*{'.root'}", recursive=True)
-        )
-
-    if no_trees:
-        with uproot.open(files[0]) as file:
-            # iterclassnames ? https://uproot.readthedocs.io/en/latest/uproot.reading.ReadOnlyDirectory.html
-            keys = file.keys(cycle=False) 
-            print(type(keys))
-            keys_axes = dict(zip(keys, (len(file[i].axes) for i in keys)))
-            # print(file.classnames())
-    else:
-        with uproot.open(files[0]) as file:
-            #filter for both TTrees and histograms
-            keys = file.keys(filter_classname='[TH[1|2|3][I|S|F|D|C]|TTREE]', cycle=False) # Actually might account for subdirectories and everything? https://uproot.readthedocs.io/en/latest/basic.html#finding-objects-in-a-file
-            keys_axes = dict(zip(keys, (len(file[i].axes) for i in keys)))
-
-    first = True
-    for file in files:
-        try:
-            file = uproot.open(file)
-        except:
-            Warning("File: " + {file} + " does not exist or is corrupt.")
-            continue
-        for key in keys:
-            if keys_axes[key] == 1:
-                keys, h_sum = add_1D_hists(destination, file, key, union, first, keys, skip_errors)
-            elif keys_axes[key] == 2:
-                keys, h_sum = add_2D_hists(destination, file, key, union, first, keys, skip_errors)
-            else:
-                keys, h_sum = add_3D_hists(destination, file, key, union, first, keys, skip_errors)
-            if h_sum != None:
-                file_out[key] = h_sum
-            first = False
-        file.close()
-
-def args():
-    argparser = argparse.ArgumentParser(description="Hadd ROOT histograms with Uproot")
-    argparser.add_argument("destination", type=str, help="path of output file")
-    argparser.add_argument("input_files", type=str, nargs="+", help="list or directory (glob syntax accepted) of input files")
-    argparser.add_argument("-f", action="store_true",default=False, help="force overwrite of output file")
-
-def tree_reduction(max_opened_files):
-    # Root checks system max opened files
-    work = work
-
diff --git a/src/odapt/operations/parquet_to_root.py b/src/odapt/operations/parquet_to_root.py
deleted file mode 100644
index 2acccc0..0000000
--- a/src/odapt/operations/parquet_to_root.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import dask_awkward as dak
-import uproot
-def parquet_to_root(read_path,
-    write_path,
-    *,
-    columns,
-    storage_options,
-    max_gap,
-    max_block,
-    footer_sample_size,
-    generate_bitmasks,
-    highlevel,
-    behavior,
-    ):
-    arrays = dak.from_parquet(read_path, split_row_groups=True)
-    tree = uproot.recreate(write_path)
-    tree.mktree("tree", {arrays.partitions[0]}) #name? But root files aren't just TTrees...
-    for i in range(1,arrays.npartitions):
-        tree["tree"].extend(arrays.partitions[i])
-        
\ No newline at end of file
diff --git a/tests/generate_hists_root.py b/tests/generate_hists_root.py
deleted file mode 100644
index a2499e8..0000000
--- a/tests/generate_hists_root.py
+++ /dev/null
@@ -1,99 +0,0 @@
-import ROOT
-import uproot
-
-# h1 = ROOT.TH1I("name", "title", 10, -4, 4)
-# h1.FillRandom("gaus")
-# h2 = ROOT.TH1I("name", "title", 10, -4, 4)
-# h2.FillRandom("gaus")
-
-def gen_gause_hists_uproot():
-    file_out = uproot.recreate("file1.root")
-    h1 = uproot.from_pyroot(h1)
-    h_1 = uproot.writing.identify.to_TH1x(h1.member("fName"),
-    h1.member("fTitle"),
-    h1.values(flow=True),
-    h1.member("fEntries"),
-    h1.member("fTsumw"),
-    h1.member("fTsumw2"),
-    h1.member("fTsumwx"),
-    h1.member("fTsumwx2"),
-    h1.variances(flow=True),
-    h1.member("fXaxis"),
-    )
-    print(h_1)
-    file_out[h_1.member("fName")] = h_1
-
-    file_out = uproot.recreate("file2.root")
-    h2 = uproot.from_pyroot(h2)
-    h_2 = uproot.writing.identify.to_TH1x(h2.member("fName"),
-    h2.member("fTitle"),
-    h2.values(flow=True),
-    h2.member("fEntries"),
-    h2.member("fTsumw"),
-    h2.member("fTsumw2"),
-    h2.member("fTsumwx"),
-    h2.member("fTsumwx2"),
-    h2.variances(flow=True),
-    h2.member("fXaxis"),
-    )
-
-    file_out[h_2.member("fName")] = h_2
-
-def gen_gaus_hists_pyroot(names, file_names):
-    # Will create histograms with same names and bins for files in file_names
-    for file in file_names:
-        for name in names:
-            h = ROOT.TH1I(name, name, 10, -4, 4)
-            h.FillRandom("gaus")
-            h.Sumw2()
-            h.SetDirectory(0)
-            outHistFile = ROOT.TFile.Open(file, "RECREATE")
-            outHistFile.cd()
-            h.Write()
-            outHistFile.Close()
-
-def gen_gaus_hists_pyroot():
-    h1 = ROOT.TH1I("name", "title", 10, -4, 4)
-    h1.FillRandom("gaus")
-    h1.Sumw2()
-    h1.SetDirectory(0)
-    outHistFile = ROOT.TFile.Open("file1.root", "RECREATE")
-    outHistFile.cd()
-    h1.Write()
-    outHistFile.Close()
-
-    h2 = ROOT.TH1I("name", "title", 10, -4, 4)
-    h2.FillRandom("gaus")
-    h2.Sumw2()
-    h2.SetDirectory(0)
-    outHistFile = ROOT.TFile.Open("file2.root", "RECREATE")
-    outHistFile.cd()
-    h2.Write()
-    outHistFile.Close()
-
-def gen_2dim_hists_pyroot(num_hists, num_files, names):
-    import numpy as np
-    xedges = [0, 1, 3, 5]
-    yedges = [0, 2, 3, 4, 6]
-    x = np.random.normal(2, 1, 100)
-    y = np.random.normal(1, 1, 100)
-    H, xedges, yedges = np.histogram2d(x, y, bins=(xedges, yedges))
-
-    h1 = ROOT.TH2I("name", "title", len(xedges), 0.0, 5.0, len(yedges), 0.0, 6.0)
-    h1.Sumw2()
-    h1.Fill(0,0)
-    h1.SetDirectory(0)
-    outHistFile = ROOT.TFile.Open("file2dim1.root", "RECREATE")
-    outHistFile.cd()
-    h1.Write()
-    outHistFile.Close()
-
-    
-    h2 = ROOT.TH2I("name", "title", len(xedges), 0.0, 5.0, len(yedges), 0.0, 6.0)
-    h2.Sumw2()
-    h2.Fill(0,0)
-    h2.SetDirectory(0)
-    outHistFile = ROOT.TFile.Open("file2dim2.root", "RECREATE")
-    outHistFile.cd()
-    h2.Write()
-    outHistFile.Close()
diff --git a/tests/test_add_histograms.py b/tests/test_add_histograms.py
deleted file mode 100644
index 84352c8..0000000
--- a/tests/test_add_histograms.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import os
-import uproot
-import sys
-sys.path.append("/Users/zobil/Documents/Proteus/src/")
-import proteus
-import ROOT
-import numpy as np
-
-def gen_1d_root(file_paths):
-    h1 = ROOT.TH1I("name", "title", 5, -4, 4)
-    h1.FillRandom("gaus")
-    h1.Sumw2()
-    h1.SetDirectory(0)
-    outHistFile = ROOT.TFile.Open(file_paths[0], "RECREATE")
-    outHistFile.cd()
-    h1.Write()
-    outHistFile.Close()
-    h1 = uproot.from_pyroot(h1)
-
-    h2 = ROOT.TH1I("name", "title", 5, -4, 4)
-    h2.FillRandom("gaus")
-    h2.Sumw2()
-    h2.SetDirectory(0)
-    outHistFile = ROOT.TFile.Open(file_paths[1], "RECREATE")
-    outHistFile.cd()
-    h2.Write()
-    outHistFile.Close()
-    h2 = uproot.from_pyroot(h2)
-
-    h3 = ROOT.TH1I("name", "title", 5, -4, 4)
-    h3.FillRandom("gaus")
-    h3.Sumw2()
-    h3.SetDirectory(0)
-    outHistFile = ROOT.TFile.Open(file_paths[2], "RECREATE")
-    outHistFile.cd()
-    h3.Write()
-    outHistFile.Close()
-    h3 = uproot.from_pyroot(h3)
-    return h1, h2, h3
-
-def test_simple(tmp_path, file_paths):
-    h1, h2, h3 = gen_1d_root(file_paths)
-
-    destination = os.path.join(tmp_path, "destination.root")
-    proteus.operations.add_hists(destination, file_paths, hist_names="name", tree_reduction=False)
-
-    with uproot.open(destination) as file:
-        assert file["name"].member("fN") == h1.member("fN")
-        assert file["name"].member("fTsumw") == h1.member("fTsumw") + h2.member("fTsumw") + h3.member("fTsumw")
-        assert np.equal(file["name"].values(flow=True), np.array(h1.values(flow=True) + h2.values(flow=True))).all
-
-def test_tree_reduction(tmp_path, file_paths):
-    h1, h2, h3 = gen_1d_root(file_paths)
-
-    destination = os.path.join(tmp_path, "destination.root")
-    proteus.operations.add_hists(destination, file_paths, hist_names="name", tree_reduction=True)
-
-    with uproot.open(destination) as file:
-        assert file["name"].member("fN") == h1.member("fN")
-        assert file["name"].member("fTsumw") == h1.member("fTsumw") + h2.member("fTsumw") + h3.member("fTsumw")
-        assert np.equal(file["name"].values(flow=True), np.array(h1.values(flow=True) + h2.values(flow=True))).all
-
-def test_3_glob(tmp_path, file_paths):
-    h1, h2, h3 = gen_1d_root(file_paths)
-
-    # destination = os.path.join(tmp_path, "destination.root")
-    proteus.operations.add_hists(os.path.join(tmp_path, "place.root"), "tests/directory")
-    
-    with uproot.open("tests/place.root") as file:
-        assert file["name"].member("fN") == h1.member("fN")
-        assert file["name"].member("fTsumw") == h1.member("fTsumw") + h2.member("fTsumw") + h3.member("fTsumw")
-        assert np.equal(file["name"].values(flow=True), np.array(h1.values(flow=True) + h2.values(flow=True) + h3.values(flow=True))).all
-
-def test_2dim(tmp_path):
-    xedges = [0, 1, 3, 5]
-    yedges = [0, 2, 3, 4, 6]
-    x = np.random.normal(2, 1, 100)
-    y = np.random.normal(1, 1, 100)
-    H, xedges, yedges = np.histogram2d(x, y, bins=(xedges, yedges))
-
-    h1 = ROOT.TH2I("name", "title", len(xedges), 0.0, 5.0, len(yedges), 0.0, 6.0)
-    h1.Sumw2()
-    h1.Fill(0,0)
-    h1.SetDirectory(0)
-    outHistFile = ROOT.TFile.Open("tests/file2dim1.root", "RECREATE")
-    outHistFile.cd()
-    h1.Write()
-    outHistFile.Close()
-
-    h2 = ROOT.TH2I("name", "title", len(xedges), 0.0, 5.0, len(yedges), 0.0, 6.0)
-    h2.Sumw2()
-    h2.Fill(0,0)
-    h2.SetDirectory(0)
-    outHistFile = ROOT.TFile.Open("tests/file2dim2.root", "RECREATE")
-    outHistFile.cd()
-    h2.Write()
-    outHistFile.Close()
-
-    proteus.operations.add_hists("tests/place2.root", ["file2dim1.root", "file2dim2.root"], hist_names="name")
-
-    with uproot.open("tests/place2.root") as file:
-        assert file["name"].member("fN") == h1.member("fN")
-        assert file["name"].member("fTsumw") == h1.member("fTsumw") + h2.member("fTsumw")
-        print(file["name"].values(flow=True))
-        assert np.equal(file["name"].values(flow=True), np.array(h1.values(flow=True) + h2.values(flow=True))).all
-
-# def test_partial_tree_reduction():
-
-# test_3_glob("",["tests/directory/file1.root","tests/directory/file2.root","tests/directory/file3.root"])
-test_simple("",["tests/directory/file1.root","tests/directory/file2.root","tests/directory/file3.root"])
\ No newline at end of file

From 16f2ffec72a8feafd9a0644681cde57263bf1b74 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Mon, 16 Oct 2023 10:00:16 +0200
Subject: [PATCH 10/22] format change

---
 src/odapt/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/odapt/__init__.py b/src/odapt/__init__.py
index a9308c7..b012cca 100644
--- a/src/odapt/__init__.py
+++ b/src/odapt/__init__.py
@@ -12,4 +12,4 @@
 
 from odapt.operations import add_histograms
 
-__all__ = ["add_histograms"]
\ No newline at end of file
+__all__ = ["add_histograms"]

From cc760b3cdcd1a13cd4dbd280202cd30dc3415749 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Mon, 16 Oct 2023 10:06:09 +0200
Subject: [PATCH 11/22] fix version file

---
 src/odapt/__init__.py | 2 +-
 src/odapt/version.py  | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)
 create mode 100644 src/odapt/version.py

diff --git a/src/odapt/__init__.py b/src/odapt/__init__.py
index b012cca..aeebff4 100644
--- a/src/odapt/__init__.py
+++ b/src/odapt/__init__.py
@@ -8,7 +8,7 @@
 
 from __future__ import annotations
 
-from odapt._version import version as __version__
+from odapt.version import __version__
 
 from odapt.operations import add_histograms
 
diff --git a/src/odapt/version.py b/src/odapt/version.py
new file mode 100644
index 0000000..063d69c
--- /dev/null
+++ b/src/odapt/version.py
@@ -0,0 +1,5 @@
+import re
+
+__version__ = "1.0"
+version = __version__
+version_info = tuple(re.split(r"[-\.]", __version__))

From c561118ec043a9edf0bff71f26f5536f55105aeb Mon Sep 17 00:00:00 2001
From: zbilodea <70441641+zbilodea@users.noreply.github.com>
Date: Mon, 16 Oct 2023 04:13:53 -0400
Subject: [PATCH 12/22] Delete src/odapt/_version.pyi

---
 src/odapt/_version.pyi | 4 ----
 1 file changed, 4 deletions(-)
 delete mode 100644 src/odapt/_version.pyi

diff --git a/src/odapt/_version.pyi b/src/odapt/_version.pyi
deleted file mode 100644
index 91744f9..0000000
--- a/src/odapt/_version.pyi
+++ /dev/null
@@ -1,4 +0,0 @@
-from __future__ import annotations
-
-version: str
-version_tuple: tuple[int, int, int] | tuple[int, int, int, str, str]

From 7498e782ed6bcf4ffca9b37ff54bf6248dfb1ab8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Mon, 16 Oct 2023 10:26:02 +0200
Subject: [PATCH 13/22] format fix

---
 src/odapt/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/odapt/__init__.py b/src/odapt/__init__.py
index aeebff4..2a6630d 100644
--- a/src/odapt/__init__.py
+++ b/src/odapt/__init__.py
@@ -8,8 +8,8 @@
 
 from __future__ import annotations
 
-from odapt.version import __version__
+from Odapt.version import __version__
 
-from odapt.operations import add_histograms
+from Odapt.operations import add_histograms
 
 __all__ = ["add_histograms"]

From 57a281d77cac21c444270532619cfe6e7281fb7a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Mon, 16 Oct 2023 10:27:15 +0200
Subject: [PATCH 14/22] fix attempt

---
 src/odapt/__init__.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/odapt/__init__.py b/src/odapt/__init__.py
index 2a6630d..12d2331 100644
--- a/src/odapt/__init__.py
+++ b/src/odapt/__init__.py
@@ -8,8 +8,6 @@
 
 from __future__ import annotations
 
-from Odapt.version import __version__
+from version import __version__
 
-from Odapt.operations import add_histograms
-
-__all__ = ["add_histograms"]
+__all__ = ["__version__"]

From 24650708ec5ab74eca9fa320fa4d58000c7584c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Mon, 16 Oct 2023 10:29:23 +0200
Subject: [PATCH 15/22] fix attempt

---
 src/odapt/__init__.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/odapt/__init__.py b/src/odapt/__init__.py
index 12d2331..27fbdb3 100644
--- a/src/odapt/__init__.py
+++ b/src/odapt/__init__.py
@@ -1,13 +1,9 @@
 """
 Copyright (c) 2023 Zoë Bilodeau. All rights reserved.
 
-Proteus: File conversion package.
+Odapt: File conversion package.
 """
 
-
-
-from __future__ import annotations
-
 from version import __version__
 
 __all__ = ["__version__"]

From 3b37e6722628e1d71cfa5421fdd233f2303415c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Mon, 16 Oct 2023 10:34:13 +0200
Subject: [PATCH 16/22] version fix

---
 src/odapt/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/odapt/__init__.py b/src/odapt/__init__.py
index 27fbdb3..27343ee 100644
--- a/src/odapt/__init__.py
+++ b/src/odapt/__init__.py
@@ -4,6 +4,6 @@
 Odapt: File conversion package.
 """
 
-from version import __version__
+from Odapt.version import __version__
 
 __all__ = ["__version__"]

From 9303dd98ea359ccabef73eb847774cf42e1a2deb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Mon, 16 Oct 2023 10:37:24 +0200
Subject: [PATCH 17/22] finally fixed

---
 src/odapt/__init__.py | 1 +
 src/odapt/version.py  | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/odapt/__init__.py b/src/odapt/__init__.py
index 27343ee..a5722ef 100644
--- a/src/odapt/__init__.py
+++ b/src/odapt/__init__.py
@@ -3,6 +3,7 @@
 
 Odapt: File conversion package.
 """
+from __future__ import annotations
 
 from Odapt.version import __version__
 
diff --git a/src/odapt/version.py b/src/odapt/version.py
index 063d69c..cf3683e 100644
--- a/src/odapt/version.py
+++ b/src/odapt/version.py
@@ -1,5 +1,6 @@
+from __future__ import annotations
+
 import re
 
 __version__ = "1.0"
-version = __version__
 version_info = tuple(re.split(r"[-\.]", __version__))

From 136295c428ff7db0b8660e24d0d00555e16722a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Mon, 16 Oct 2023 10:45:58 +0200
Subject: [PATCH 18/22] version not recognized

---
 src/odapt/_version.pyi | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 src/odapt/_version.pyi

diff --git a/src/odapt/_version.pyi b/src/odapt/_version.pyi
new file mode 100644
index 0000000..f5d880b
--- /dev/null
+++ b/src/odapt/_version.pyi
@@ -0,0 +1,4 @@
+from __future__ import annotations
+
+version: "1.0"
+version_tuple: [1,0,0]

From 02f24ccaf94c2cf9f1908dcf3d7e268fd8aff2e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Mon, 16 Oct 2023 10:49:56 +0200
Subject: [PATCH 19/22] version still not recognized

---
 src/odapt/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/odapt/__init__.py b/src/odapt/__init__.py
index a5722ef..5d74e98 100644
--- a/src/odapt/__init__.py
+++ b/src/odapt/__init__.py
@@ -5,6 +5,6 @@
 """
 from __future__ import annotations
 
-from Odapt.version import __version__
+from Odapt._version import __version__
 
 __all__ = ["__version__"]

From 8736eb59add8900645a374c132af4faf3f7d09ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Mon, 16 Oct 2023 10:52:03 +0200
Subject: [PATCH 20/22] version

---
 src/odapt/__init__.py  | 2 +-
 src/odapt/_version.pyi | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/odapt/__init__.py b/src/odapt/__init__.py
index 5d74e98..9ae33e3 100644
--- a/src/odapt/__init__.py
+++ b/src/odapt/__init__.py
@@ -5,6 +5,6 @@
 """
 from __future__ import annotations
 
-from Odapt._version import __version__
+from Odapt._version import version as __version__
 
 __all__ = ["__version__"]
diff --git a/src/odapt/_version.pyi b/src/odapt/_version.pyi
index f5d880b..91744f9 100644
--- a/src/odapt/_version.pyi
+++ b/src/odapt/_version.pyi
@@ -1,4 +1,4 @@
 from __future__ import annotations
 
-version: "1.0"
-version_tuple: [1,0,0]
+version: str
+version_tuple: tuple[int, int, int] | tuple[int, int, int, str, str]

From ec54d78482c7cd062e238098c7b91323dd94960a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Mon, 16 Oct 2023 11:12:13 +0200
Subject: [PATCH 21/22] fixing version

---
 pyproject.toml        | 18 +++++++++---------
 src/odapt/__init__.py |  2 +-
 src/odapt/version.py  |  6 ------
 3 files changed, 10 insertions(+), 16 deletions(-)
 delete mode 100644 src/odapt/version.py

diff --git a/pyproject.toml b/pyproject.toml
index 1f8954c..5edf88b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 
 [project]
-name = "proteus"
+name = "odapt"
 authors = [
   { name = "Zoë Bilodeau", email = "zobilodeau@gmail.com" },
 ]
@@ -49,15 +49,15 @@ docs = [
 ]
 
 [project.urls]
-Homepage = "https://github.com/zbilodea/Proteus"
-"Bug Tracker" = "https://github.com/zbilodea/Proteus/issues"
-Discussions = "https://github.com/zbilodea/Proteus/discussions"
-Changelog = "https://github.com/zbilodea/Proteus/releases"
+Homepage = "https://github.com/zbilodea/Odapt"
+"Bug Tracker" = "https://github.com/zbilodea/Odapt/issues"
+Discussions = "https://github.com/zbilodea/Odapt/discussions"
+Changelog = "https://github.com/zbilodea/Odapt/releases"
 
 
 [tool.hatch]
 version.source = "vcs"
-build.hooks.vcs.version-file = "src/proteus/_version.py"
+build.hooks.vcs.version-file = "src/odapt/_version.py"
 envs.default.dependencies = [
   "pytest",
   "pytest-cov",
@@ -78,7 +78,7 @@ testpaths = [
 
 
 [tool.coverage]
-run.source = ["proteus"]
+run.source = ["odapt"]
 port.exclude_lines = [
   'pragma: no cover',
   '\.\.\.',
@@ -97,7 +97,7 @@ disallow_untyped_defs = false
 disallow_incomplete_defs = false
 
 [[tool.mypy.overrides]]
-module = "proteus.*"
+module = "odapt.*"
 disallow_untyped_defs = true
 disallow_incomplete_defs = true
 
@@ -141,7 +141,7 @@ exclude = []
 flake8-unused-arguments.ignore-variadic-names = true
 isort.required-imports = ["from __future__ import annotations"]
 # Uncomment if using a _compat.typing backport
-# typing-modules = ["proteus._compat.typing"]
+# typing-modules = ["odapt._compat.typing"]
 
 [tool.ruff.per-file-ignores]
 "tests/**" = ["T20"]
diff --git a/src/odapt/__init__.py b/src/odapt/__init__.py
index 9ae33e3..872d7dc 100644
--- a/src/odapt/__init__.py
+++ b/src/odapt/__init__.py
@@ -5,6 +5,6 @@
 """
 from __future__ import annotations
 
-from Odapt._version import version as __version__
+from odapt._version import version as __version__
 
 __all__ = ["__version__"]
diff --git a/src/odapt/version.py b/src/odapt/version.py
deleted file mode 100644
index cf3683e..0000000
--- a/src/odapt/version.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from __future__ import annotations
-
-import re
-
-__version__ = "1.0"
-version_info = tuple(re.split(r"[-\.]", __version__))

From 9d52b9273e69d8437889d54ca1910324feb03b29 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= <zobilodeau@gmail.com>
Date: Mon, 16 Oct 2023 11:48:20 +0200
Subject: [PATCH 22/22] more name changes

---
 .github/CONTRIBUTING.md | 2 +-
 docs/conf.py            | 4 ++--
 docs/index.md           | 2 +-
 noxfile.py              | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index ab3900f..a99efb0 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -71,7 +71,7 @@ pytest
 Use pytest-cov to generate coverage reports:
 
 ```bash
-pytest --cov=Proteus
+pytest --cov=Odapt
 ```
 
 # Building docs
diff --git a/docs/conf.py b/docs/conf.py
index 1bb2c7d..6316841 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -2,10 +2,10 @@
 
 import importlib.metadata
 
-project = "Proteus"
+project = "Odapt"
 copyright = "2023, Zoë Bilodeau"
 author = "Zoë Bilodeau"
-version = release = importlib.metadata.version("proteus")
+version = release = importlib.metadata.version("odapt")
 
 extensions = [
     "myst_parser",
diff --git a/docs/index.md b/docs/index.md
index 4b55379..f926b6f 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,4 +1,4 @@
-# Proteus
+# Odapt
 
 ```{toctree}
 :maxdepth: 2
diff --git a/noxfile.py b/noxfile.py
index 90a3a7e..5632317 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -30,7 +30,7 @@ def pylint(session: nox.Session) -> None:
     # This needs to be installed into the package environment, and is slower
     # than a pre-commit check
     session.install(".", "pylint")
-    session.run("pylint", "proteus", *session.posargs)
+    session.run("pylint", "odapt", *session.posargs)
 
 
 @nox.session
@@ -99,7 +99,7 @@ def build_api_docs(session: nox.Session) -> None:
         "--module-first",
         "--no-toc",
         "--force",
-        "../src/proteus",
+        "../src/odapt",
     )