diff --git a/src/hepconvert/__main__.py b/src/hepconvert/__main__.py index 1341247..7ba917b 100644 --- a/src/hepconvert/__main__.py +++ b/src/hepconvert/__main__.py @@ -91,18 +91,23 @@ def parquet_to_root( "-db", "--drop-branches", default=None, - type=list or dict or str, + type=str, required=False, help="Specify branch names to remove from the ROOT file. Either a str, list of str (for multiple branches), or a dict with form {'tree': 'branches'} to remove branches from certain ttrees. Wildcarding accepted.", ) +@click.option("-kb", "--keep-branches", default=None, type=str, required=False) @click.option( - "-kb", "--keep-branches", default=None, type=list or dict or str, required=False + "-s", + "--step-size", + default="100 MB", + type=str, + help="If an integer, the maximum number of entries to include in each iteration step; if a string, the maximum memory size to include. The string must be a number followed by a memory unit, such as “100 MB”.", ) @click.option( "-dt", "--drop-trees", default=None, - type=list or str, + type=str, required=False, help="Specify tree names to remove from the ROOT file. Wildcarding accepted.", ) @@ -110,13 +115,13 @@ def parquet_to_root( "-kt", "--keep-trees", default=None, - type=list or str, + type=str, required=False, help="Specify tree names to keep in the ROOT file. All others will be removed. Wildcarding accepted.", ) @click.option("--progress-bar", is_flag=True) -@click.option("--cut", default=None, type=str or list, required=False) -@click.option("--expressions", default=None, type=str or list, required=False) +@click.option("--cut", default=None, type=str, required=False) +@click.option("--expressions", default=None, type=str, required=False) @click.option("--title", type=str, required=False, default="") @click.option( "--initial-basket-capacity", @@ -151,7 +156,7 @@ def copy_root( initial_basket_capacity=10, resize_factor=10.0, counter_name=lambda counted: "n" + counted, - step_size=100, + step_size="100 MB", compression="LZ4", compression_level=1, ): @@ -270,41 +275,40 @@ def add( help="When the TTree metadata needs to be rewritten, this specifies how many more TBasket slots to allocate as a multiplicative factor.", ) @click.option( + "-s", "--step-size", default="100 MB", - type=int or str, + type=str, help="If an integer, the maximum number of entries to include in each iteration step; if a string, the maximum memory size to include. The string must be a number followed by a memory unit, such as “100 MB”.", ) @click.option( "-db", "--drop-branches", default=None, - type=list or dict or str, + type=str, required=False, help="Specify branch names to remove from the ROOT file. Either a str, list of str (for multiple branches), or a dict with form {'tree': 'branches'} to remove branches from certain ttrees. Wildcarding accepted.", ) -@click.option( - "-kb", "--keep-branches", default=None, type=list or dict or str, required=False -) +@click.option("-kb", "--keep-branches", default=None, type=str, required=False) @click.option( "-dt", "--drop-trees", default=None, - type=list or str, + type=str, required=False, - help="Specify tree names to remove from the ROOT file. Wildcarding accepted.", + help="Specify tree name to remove from the ROOT file. Wildcarding accepted.", ) @click.option( "-kt", "--keep-trees", default=None, - type=list or str, + type=str, required=False, - help="Specify tree names to keep in the ROOT file.. Wildcarding accepted.", + help="Specify tree name to keep in the ROOT file.. Wildcarding accepted.", ) @click.option("--progress-bar", is_flag=True) -@click.option("--cut", default=None, type=str or list, required=False) -@click.option("--expressions", default=None, type=str or list, required=False) +@click.option("--cut", default=None, type=str, required=False) +@click.option("--expressions", default=None, type=str, required=False) @click.option( "-f", "--force", @@ -391,14 +395,14 @@ def merge_root( "-t", "--tree", default=False, - type=bool, + type=str, help="Specify the name of a tree to write to Parquet, if there are multiple trees in the ROOT file.", ) @click.option( "-db", "--drop-branches", default=None, - type=list or dict or str, + type=str, required=False, help="Specify branch names to remove from the ROOT file. Either a str, list of str (for multiple branches), or a dict with form {'tree': 'branches'} to remove branches from certain ttrees. Wildcarding accepted.", ) @@ -406,12 +410,12 @@ def merge_root( "-kb", "--keep-branches", default=None, - type=list or dict or str, + type=str, required=False, help="Specify branch names to keep in the ROOT file. Either a str, list of str (for multiple branches), or a dict with form {'tree': 'branches'} to keep only certain branches in certain ttrees. Wildcarding accepted.", ) -@click.option("--cut", default=None, type=str or list, required=False) -@click.option("--expressions", default=None, type=str or list, required=False) +@click.option("--cut", default=None, type=str, required=False) +@click.option("--expressions", default=None, type=str, required=False) @click.option( "-f", "--force", @@ -422,7 +426,7 @@ def merge_root( @click.option( "-s", "--step-size", - type=int or str, + type=str, default="100 MB", help="Specify batch size for reading ROOT file. If an integer, the maximum number of entries to include in each iteration step; if a string, the maximum memory size to include.", ) @@ -561,7 +565,7 @@ def root_to_parquet( cut=None, expressions=None, force=False, - step_size="100 MB", + step_size="100MB", list_to32=False, string_to32=True, bytestring_to32=True, diff --git a/src/hepconvert/_utils.py b/src/hepconvert/_utils.py index 4f806ca..2eadc10 100644 --- a/src/hepconvert/_utils.py +++ b/src/hepconvert/_utils.py @@ -80,7 +80,7 @@ def filter_branches(tree, keep_branches, drop_branches, count_branches): for b in tree.branches if b.name not in count_branches and b.name in keys ] - return [b.name for b in tree.branches] + return [b.name for b in tree.branches if b.name not in count_branches] def check_tqdm(): diff --git a/src/hepconvert/copy_root.py b/src/hepconvert/copy_root.py index 80637c3..be00a6d 100644 --- a/src/hepconvert/copy_root.py +++ b/src/hepconvert/copy_root.py @@ -146,6 +146,12 @@ def copy_root( ), ) first = (True,) + + try: # is this legal? + step_size = int(step_size) + except ValueError: + step_size = str(step_size) + try: f = uproot.open(in_file) except FileNotFoundError: diff --git a/src/hepconvert/merge.py b/src/hepconvert/merge.py index f0e7b6e..7613767 100644 --- a/src/hepconvert/merge.py +++ b/src/hepconvert/merge.py @@ -161,6 +161,11 @@ def merge_root( ) first = True + try: # is this legal? + step_size = int(step_size) + except ValueError: + step_size = str(step_size) + if not isinstance(files, list) and not isinstance(files, tuple): path = Path(files) files = sorted(path.glob("**/*.root")) diff --git a/src/hepconvert/root_to_parquet.py b/src/hepconvert/root_to_parquet.py index a8f8f74..0e90d06 100644 --- a/src/hepconvert/root_to_parquet.py +++ b/src/hepconvert/root_to_parquet.py @@ -214,6 +214,11 @@ def root_to_parquet( raise AttributeError(msg) from None tree = trees[0] + try: # is this legal? + step_size = int(step_size) + except ValueError: + step_size = str(step_size) + filter_b = _filter_branches(f[tree], keep_branches, drop_branches) # if there's a counter, rid of that too... ak.to_parquet_row_groups(