diff --git a/src/uproot/const.py b/src/uproot/const.py index 4f6248822..c755eced8 100644 --- a/src/uproot/const.py +++ b/src/uproot/const.py @@ -117,108 +117,136 @@ kStreamedMemberWise = numpy.uint16(1 << 14) -############ RNTuple https://github.com/root-project/root/blob/master/tree/ntuple/v7/doc/specifications.md +############ RNTuple https://github.com/root-project/root/blob/0b9cdbcfd326ba50ee6c2f202675656129eafbe7/tree/ntuple/v7/doc/BinaryFormatSpecification.md rntuple_col_num_to_dtype_dict = { - 1: "uint64", - 2: "uint32", - 3: "switch", # Switch - 4: "uint8", - 5: "uint8", # char - 6: "bit", - 7: "float64", - 8: "float32", - 9: "float16", - 10: "uint64", - 11: "uint32", - 12: "uint16", - 13: "uint8", - 14: "uint64", # SplitIndex64 delta encoding - 15: "uint32", # SplitIndex32 delta encoding - 16: "float64", # split - 17: "float32", # split - 18: "float16", # split - 19: "uint64", # split - 20: "uint32", # split - 21: "uint16", # split - 22: "int64", - 23: "int32", - 24: "int16", - 25: "int8", - 26: "int64", # split + zigzag encoding - 27: "int32", # split + zigzag encoding - 28: "int16", # split + zigzag encoding - 29: "float32trunc", - 30: "float32quant", + 0x00: "bit", + 0x01: "uint8", # uninterpreted byte + 0x02: "uint8", # char + 0x03: "int8", + 0x04: "uint8", + 0x05: "int16", + 0x06: "uint16", + 0x07: "int32", + 0x08: "uint32", + 0x09: "int64", + 0x0A: "uint64", + 0x0B: "float16", + 0x0C: "float32", + 0x0D: "float64", + 0x0E: "uint32", # Index32 + 0x0F: "uint64", # Index64 + 0x10: "switch", # Switch: (uint64, uint32) + 0x11: "int16", # SplitInt16: split + zigzag encoding + 0x12: "uint16", # SplitUInt16: split encoding + 0x13: "int32", # SplitInt32: split + zigzag encoding + 0x14: "uint32", # SplitUInt32: split encoding + 0x15: "int64", # SplitInt64: split + zigzag encoding + 0x16: "uint64", # SplitUInt64: split encoding + 0x17: "float16", # SplitReal16: split encoding + 0x18: "float32", # SplitReal32: split encoding + 0x19: "float64", # SplitReal64: split encoding + 0x1A: "uint32", # SplitIndex32: split + delta encoding + 0x1B: "uint64", # SplitIndex64: split + delta encoding + 0x1C: "real32trunc", # Real32Trunc: float32 with truncated mantissa + 0x1D: "real32quant", # Real32Quant: float32 with quantized integer representation } rntuple_col_num_to_size_dict = { - 1: 64, - 2: 32, - 3: 96, # Switch - 4: 8, - 5: 8, # char - 6: 1, - 7: 64, - 8: 32, - 9: 16, - 10: 64, - 11: 32, - 12: 16, - 13: 8, - 14: 64, # SplitIndex64 delta encoding - 15: 32, # SplitIndex32 delta encoding - 16: 64, # split - 17: 32, # split - 18: 16, # split - 19: 64, # split - 20: 32, # split - 21: 16, # split - 22: 64, - 23: 32, - 24: 16, - 25: 8, - 26: 64, # split + zigzag encoding - 27: 32, # split + zigzag encoding - 28: 16, # split + zigzag encoding - 29: 32, # TODO: variable size - 30: 32, # TODO: variable size + 0x00: 1, + 0x01: 8, + 0x02: 8, + 0x03: 8, + 0x04: 8, + 0x05: 16, + 0x06: 16, + 0x07: 32, + 0x08: 32, + 0x09: 64, + 0x0A: 64, + 0x0B: 16, + 0x0C: 32, + 0x0D: 64, + 0x0E: 32, + 0x0F: 64, + 0x10: 96, + 0x11: 16, + 0x12: 16, + 0x13: 32, + 0x14: 32, + 0x15: 64, + 0x16: 64, + 0x17: 16, + 0x18: 32, + 0x19: 64, + 0x1A: 32, + 0x1B: 64, + 0x1C: 31, # variable from 10 to 31 + 0x1D: 32, # variable from 1 to 32 } - rntuple_col_type_to_num_dict = { - "index64": 1, - "index32": 2, - "switch": 3, - "byte": 4, - "char": 5, - "bit": 6, - "real64": 7, - "real32": 8, - "real16": 9, - "uint64": 10, - "uint32": 11, - "uint16": 12, - "uint8": 13, - "splitindex64": 14, - "splitindex32": 15, - "splitreal64": 16, - "splitreal32": 17, - "splitreal16": 18, - "splitin64": 19, - "splitint32": 20, - "splitint16": 21, - "int64": 22, - "int32": 23, - "int16": 24, - "int8": 25, - "splitzigzagint64": 26, - "splitzigzagint32": 27, - "splitzigzagint16": 28, + "bit": 0x00, + "byte": 0x01, + "char": 0x02, + "int8": 0x03, + "uint8": 0x04, + "int16": 0x05, + "uint16": 0x06, + "int32": 0x07, + "uint32": 0x08, + "int64": 0x09, + "uint64": 0x0A, + "real16": 0x0B, + "real32": 0x0C, + "real64": 0x0D, + "index32": 0x0E, + "index64": 0x0F, + "switch": 0x10, + "splitint16": 0x11, + "splituint16": 0x12, + "splitint32": 0x13, + "splituint32": 0x14, + "splitint64": 0x15, + "splituint64": 0x16, + "splitreal16": 0x17, + "splitreal32": 0x18, + "splitreal64": 0x19, + "splitindex32": 0x1A, + "splitindex64": 0x1B, + "real32trunc": 0x1C, + "real32quant": 0x1D, } +rntuple_index_types = ( + rntuple_col_type_to_num_dict["index32"], + rntuple_col_type_to_num_dict["index64"], + rntuple_col_type_to_num_dict["splitindex32"], + rntuple_col_type_to_num_dict["splitindex64"], +) +rntuple_split_types = ( + rntuple_col_type_to_num_dict["splitint16"], + rntuple_col_type_to_num_dict["splituint16"], + rntuple_col_type_to_num_dict["splitint32"], + rntuple_col_type_to_num_dict["splituint32"], + rntuple_col_type_to_num_dict["splitint64"], + rntuple_col_type_to_num_dict["splituint64"], + rntuple_col_type_to_num_dict["splitreal16"], + rntuple_col_type_to_num_dict["splitreal32"], + rntuple_col_type_to_num_dict["splitreal64"], + rntuple_col_type_to_num_dict["splitindex32"], + rntuple_col_type_to_num_dict["splitindex64"], +) +rntuple_zigzag_types = ( + rntuple_col_type_to_num_dict["splitint16"], + rntuple_col_type_to_num_dict["splitint32"], + rntuple_col_type_to_num_dict["splitint64"], +) +rntuple_delta_types = ( + rntuple_col_type_to_num_dict["splitindex32"], + rntuple_col_type_to_num_dict["splitindex64"], +) class RNTupleLocatorType(IntEnum): STANDARD = 0x00 LARGE = 0x01 - DAOS = 0x02 class RNTupleEnvelopeType(IntEnum): @@ -230,10 +258,10 @@ class RNTupleEnvelopeType(IntEnum): class RNTupleFieldRole(IntEnum): LEAF = 0x00 - VECTOR = 0x01 - STRUCT = 0x02 - UNION = 0x03 - UNSPLIT = 0x04 + COLLECTION = 0x01 + RECORD = 0x02 + VARIANT = 0x03 + STREAMER = 0x04 class RNTupleFieldFlag(IntEnum): @@ -243,20 +271,13 @@ class RNTupleFieldFlag(IntEnum): class RNTupleColumnFlag(IntEnum): - DEFERRED = 0x08 - RANGE = 0x10 + DEFERRED = 0x01 + RANGE = 0x02 class RNTupleExtraTypeIdentifier(IntEnum): ROOT = 0x00 -class RNTupleUserMetadataType(IntEnum): - INT = 0x01 - BOOL = 0x02 - DOUBLE = 0x03 - STRING = 0x04 - - class RNTupleClusterFlag(IntEnum): SHARDED = 0x01 diff --git a/src/uproot/models/RNTuple.py b/src/uproot/models/RNTuple.py index 09f92f8dc..a72dbcbaf 100644 --- a/src/uproot/models/RNTuple.py +++ b/src/uproot/models/RNTuple.py @@ -1,7 +1,7 @@ # BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE """ -This module defines a versionless model for ``ROOT::Experimental::RNTuple``. +This module defines a versionless model for ``ROOT::RNTuple``. """ from __future__ import annotations @@ -10,44 +10,48 @@ from itertools import accumulate import numpy +import xxhash import uproot +import uproot.const -# https://github.com/root-project/root/blob/8635b1bc0da59623777c9fda3661a19363964915/tree/ntuple/v7/doc/specifications.md#anchor-schema +# https://github.com/root-project/root/blob/8cd9eed6f3a32e55ef1f0f1df8e5462e753c735d/tree/ntuple/v7/doc/BinaryFormatSpecification.md#anchor-schema _rntuple_anchor_format = struct.Struct(">HHHHQQQQQQQ") -# https://github.com/root-project/root/blob/8635b1bc0da59623777c9fda3661a19363964915/tree/ntuple/v7/doc/specifications.md#feature-flags +_rntuple_anchor_checksum_format = struct.Struct(">Q") +# https://github.com/root-project/root/blob/8cd9eed6f3a32e55ef1f0f1df8e5462e753c735d/tree/ntuple/v7/doc/BinaryFormatSpecification.md#feature-flags _rntuple_feature_flag_format = struct.Struct(" uproot.const.rntuple_col_type_to_num_dict["switch"]: - dt_str = uproot.const.rntuple_col_num_to_dtype_dict[dtype_byte] - if dt_str == "bit": - dt_str = "bool" - return ak.forms.NumpyForm( - dt_str, - form_key=form_key, - parameters=parameters, - ) - else: # offset index column + elif dtype_byte in uproot.const.rntuple_index_types and not cardinality: return form_key + dt_str = uproot.const.rntuple_col_num_to_dtype_dict[dtype_byte] + if dt_str == "bit": + dt_str = "bool" + return ak.forms.NumpyForm( + dt_str, + form_key=form_key, + parameters=parameters, + ) def col_form(self, field_id): ak = uproot.extras.awkward() cfid = field_id + if self.field_records[cfid].source_field_id is not None: + cfid = self.field_records[cfid].source_field_id if cfid in self._alias_columns_dict: cfid = self._alias_columns_dict[cfid] if cfid not in self._column_records_dict: @@ -580,7 +600,7 @@ def field_form(self, this_id, seen): inner = self.col_form(this_id) keyname = f"RegularForm-{this_id}" return ak.forms.RegularForm(inner, this_record.repetition, form_key=keyname) - elif structural_role == uproot.const.RNTupleFieldRole.VECTOR: + elif structural_role == uproot.const.RNTupleFieldRole.COLLECTION: if this_id not in self._related_ids or len(self._related_ids[this_id]) != 1: keyname = f"vector-{this_id}" newids = self._related_ids.get(this_id, []) @@ -589,6 +609,8 @@ def field_form(self, this_id, seen): namelist = [field_records[i].field_name for i in newids] return ak.forms.RecordForm(recordlist, namelist, form_key="whatever") cfid = this_id + if self.field_records[cfid].source_field_id is not None: + cfid = self.field_records[cfid].source_field_id if cfid in self._alias_columns_dict: cfid = self._alias_columns_dict[cfid] if cfid not in self._column_records_dict: @@ -604,7 +626,7 @@ def field_form(self, this_id, seen): child_id = self._related_ids[this_id][0] inner = self.field_form(child_id, seen) return ak.forms.ListOffsetForm("i64", inner, form_key=keyname) - elif structural_role == uproot.const.RNTupleFieldRole.STRUCT: + elif structural_role == uproot.const.RNTupleFieldRole.RECORD: newids = [] if this_id in self._related_ids: newids = self._related_ids[this_id] @@ -612,7 +634,7 @@ def field_form(self, this_id, seen): recordlist = [self.field_form(i, seen) for i in newids] namelist = [field_records[i].field_name for i in newids] return ak.forms.RecordForm(recordlist, namelist, form_key="whatever") - elif structural_role == uproot.const.RNTupleFieldRole.UNION: + elif structural_role == uproot.const.RNTupleFieldRole.VARIANT: keyname = self.col_form(this_id) newids = [] if this_id in self._related_ids: @@ -622,7 +644,7 @@ def field_form(self, this_id, seen): "i8", "i64", recordlist, form_key=keyname + "-union" ) return ak.forms.IndexedOptionForm("i64", inner, form_key=keyname) - elif structural_role == uproot.const.RNTupleFieldRole.UNSPLIT: + elif structural_role == uproot.const.RNTupleFieldRole.STREAMER: raise NotImplementedError( f"Unsplit fields are not supported. {this_record}" ) @@ -710,10 +732,7 @@ def read_col_pages( arrays = [self.read_col_page(ncol, i) for i in cluster_range] # Check if column stores offset values for jagged arrays (splitindex64) (applies to cardinality cols too): - if ( - dtype_byte == uproot.const.rntuple_col_type_to_num_dict["splitindex64"] - or dtype_byte == uproot.const.rntuple_col_type_to_num_dict["splitindex32"] - ): + if dtype_byte in uproot.const.rntuple_delta_types: # Extract the last offset values: last_elements = [ arr[-1] for arr in arrays[:-1] @@ -735,7 +754,7 @@ def read_col_pages( def read_col_page(self, ncol, cluster_i): linklist = self.page_list_envelopes.pagelinklist[cluster_i] - pagelist = linklist[ncol] + pagelist = linklist[ncol].pages if ncol < len(linklist) else [] dtype_byte = self.column_records[ncol].type dtype_str = uproot.const.rntuple_col_num_to_dtype_dict[dtype_byte] total_len = numpy.sum([desc.num_elements for desc in pagelist], dtype=int) @@ -746,10 +765,10 @@ def read_col_page(self, ncol, cluster_i): else: dtype = numpy.dtype(dtype_str) res = numpy.empty(total_len, dtype) - split = 14 <= dtype_byte <= 21 or 26 <= dtype_byte <= 28 - zigzag = 26 <= dtype_byte <= 28 - delta = dtype_byte in (14, 15) - index = dtype_byte in (0, 1, 14, 15) + split = dtype_byte in uproot.const.rntuple_split_types + zigzag = dtype_byte in uproot.const.rntuple_zigzag_types + delta = dtype_byte in uproot.const.rntuple_delta_types + index = dtype_byte in uproot.const.rntuple_index_types nbits = uproot.const.rntuple_col_num_to_size_dict[dtype_byte] tracker = 0 cumsum = 0 @@ -818,6 +837,7 @@ def _recursive_find(form, res): _recursive_find(form.content, res) +# https://github.com/root-project/root/blob/8cd9eed6f3a32e55ef1f0f1df8e5462e753c735d/tree/ntuple/v7/doc/BinaryFormatSpecification.md#page-locations class PageDescription: def read(self, chunk, cursor, context): out = MetaData(type(self).__name__) @@ -828,13 +848,40 @@ def read(self, chunk, cursor, context): return out +# https://github.com/root-project/root/blob/8cd9eed6f3a32e55ef1f0f1df8e5462e753c735d/tree/ntuple/v7/doc/BinaryFormatSpecification.md#page-locations +class ColumnPageListFrameReader: + def read(self, chunk, cursor, context): + local_cursor = cursor.copy() + num_bytes = local_cursor.field(chunk, _rntuple_frame_size_format, context) + assert num_bytes < 0, f"num_bytes={num_bytes}" + num_items = local_cursor.field(chunk, _rntuple_frame_num_items_format, context) + cursor.skip(-num_bytes) + out = MetaData("ColumnPages") + out.pages = [ + PageDescription().read(chunk, local_cursor, context) + for _ in range(num_items) + ] + out.element_offset = local_cursor.field( + chunk, _rntuple_column_element_offset_format, context + ) + out.suppressed = out.element_offset < 0 + if not out.suppressed: + out.compression_settings = local_cursor.field( + chunk, _rntuple_column_compression_settings_format, context + ) + else: + out.compression_settings = None + return out + + +# https://github.com/root-project/root/blob/8cd9eed6f3a32e55ef1f0f1df8e5462e753c735d/tree/ntuple/v7/doc/BinaryFormatSpecification.md#page-list-envelope class PageLink: def __init__(self): self.list_cluster_summaries = ListFrameReader( RecordFrameReader(ClusterSummaryReader()) ) self.nested_page_locations = ListFrameReader( - ListFrameReader(ListFrameReader(PageDescription())) + ListFrameReader(ColumnPageListFrameReader()) ) def read(self, chunk, cursor, context): @@ -850,6 +897,7 @@ def read(self, chunk, cursor, context): return out +# https://github.com/root-project/root/blob/8cd9eed6f3a32e55ef1f0f1df8e5462e753c735d/tree/ntuple/v7/doc/BinaryFormatSpecification.md#locators-and-envelope-links class LocatorReader: def read(self, chunk, cursor, context): out = MetaData("Locator") @@ -863,8 +911,6 @@ def read(self, chunk, cursor, context): out.offset = cursor.field( chunk, _rntuple_locator_offset_format, context ) - elif out.type == uproot.const.RNTupleLocatorType.DAOS: - raise NotImplementedError("DAOS locators are not supported.") else: raise NotImplementedError(f"Unknown locator type: {out.type}") else: @@ -873,6 +919,7 @@ def read(self, chunk, cursor, context): return out +# https://github.com/root-project/root/blob/8cd9eed6f3a32e55ef1f0f1df8e5462e753c735d/tree/ntuple/v7/doc/BinaryFormatSpecification.md#locators-and-envelope-links class EnvLinkReader: def read(self, chunk, cursor, context): out = MetaData("EnvLink") @@ -904,6 +951,7 @@ def __setattr__(self, name, val): self.__dict__["_fields"][name] = val +# https://github.com/root-project/root/blob/8cd9eed6f3a32e55ef1f0f1df8e5462e753c735d/tree/ntuple/v7/doc/BinaryFormatSpecification.md#frames class RecordFrameReader: def __init__(self, payload): self.payload = payload @@ -916,6 +964,7 @@ def read(self, chunk, cursor, context): return self.payload.read(chunk, local_cursor, context) +# https://github.com/root-project/root/blob/8cd9eed6f3a32e55ef1f0f1df8e5462e753c735d/tree/ntuple/v7/doc/BinaryFormatSpecification.md#frames class ListFrameReader: def __init__(self, payload): self.payload = payload @@ -942,27 +991,29 @@ def read(self, chunk, cursor, context): out.struct_role, out.flags, ) = cursor.fields(chunk, _rntuple_field_description_format, context) - if out.flags == uproot.const.RNTupleFieldFlag.REPETITIVE: + out.field_name, out.type_name, out.type_alias, out.field_desc = ( + cursor.rntuple_string(chunk, context) for _ in range(4) + ) + + if out.flags & uproot.const.RNTupleFieldFlag.REPETITIVE: out.repetition = cursor.field(chunk, _rntuple_repetition_format, context) - out.source_field_id = None - out.checksum = None - elif out.flags == uproot.const.RNTupleFieldFlag.PROJECTED: + else: out.repetition = 0 + + if out.flags & uproot.const.RNTupleFieldFlag.PROJECTED: out.source_field_id = cursor.field( chunk, _rntuple_source_field_id_format, context ) - out.checksum = None - elif out.flags == uproot.const.RNTupleFieldFlag.CHECKSUM: - out.repetition = 0 - out.source_field_id = None - out.checksum = cursor.field(chunk, _rntuple_checksum_format, context) else: - out.repetition = 0 out.source_field_id = None + + if out.flags & uproot.const.RNTupleFieldFlag.CHECKSUM: + out.checksum = cursor.field( + chunk, _rntuple_root_streamer_checksum_format, context + ) + else: out.checksum = None - out.field_name, out.type_name, out.type_alias, out.field_desc = ( - cursor.rntuple_string(chunk, context) for _ in range(4) - ) + return out @@ -988,6 +1039,7 @@ def read(self, chunk, cursor, context): return out +# https://github.com/root-project/root/blob/8cd9eed6f3a32e55ef1f0f1df8e5462e753c735d/tree/ntuple/v7/doc/BinaryFormatSpecification.md#alias-columns class AliasColumnReader: def read(self, chunk, cursor, context): out = MetaData("AliasColumn") @@ -998,17 +1050,19 @@ def read(self, chunk, cursor, context): return out +# https://github.com/root-project/root/blob/8cd9eed6f3a32e55ef1f0f1df8e5462e753c735d/tree/ntuple/v7/doc/BinaryFormatSpecification.md#extra-type-information class ExtraTypeInfoReader: def read(self, chunk, cursor, context): out = MetaData("ExtraTypeInfoReader") - out.content_id, out.type_ver_from, out.type_ver_to = cursor.fields( + out.content_id, out.type_ver = cursor.fields( chunk, _rntuple_extra_type_info_format, context ) out.type_name = cursor.rntuple_string(chunk, context) return out +# https://github.com/root-project/root/blob/8cd9eed6f3a32e55ef1f0f1df8e5462e753c735d/tree/ntuple/v7/doc/BinaryFormatSpecification.md#header-envelope class HeaderReader: def __init__(self): self.list_field_record_frames = ListFrameReader( @@ -1048,22 +1102,7 @@ def read(self, chunk, cursor, context): return out -class ColumnGroupIDReader: - def read(self, chunk, cursor, context): - out = MetaData("ColumnGroupID") - out.col_id = cursor.field(chunk, _rntuple_column_group_id_format, context) - return out - - -class ColumnGroupRecordReader: - def read(self, chunk, cursor, context): - out = MetaData("ColumnGroupRecord") - out.column_ids = ListFrameReader(RecordFrameReader(ColumnGroupIDReader())).read( - chunk, cursor, context - ) - return out - - +# https://github.com/root-project/root/blob/8cd9eed6f3a32e55ef1f0f1df8e5462e753c735d/tree/ntuple/v7/doc/BinaryFormatSpecification.md#cluster-summary-record-frame class ClusterSummaryReader: def read(self, chunk, cursor, context): out = MetaData("ClusterSummaryRecord") @@ -1072,11 +1111,12 @@ def read(self, chunk, cursor, context): ) out.flags = out.num_entries >> 56 out.num_entries &= 0xFFFFFFFFFFFFFF - if out.flags == uproot.const.RNTupleClusterFlag.SHARDED: + if out.flags & uproot.const.RNTupleClusterFlag.SHARDED: raise NotImplementedError("Sharded clusters are not supported.") return out +# https://github.com/root-project/root/blob/8cd9eed6f3a32e55ef1f0f1df8e5462e753c735d/tree/ntuple/v7/doc/BinaryFormatSpecification.md#page-locations class ClusterGroupRecordReader: def read(self, chunk, cursor, context): out = MetaData("ClusterGroupRecord") @@ -1087,6 +1127,7 @@ def read(self, chunk, cursor, context): return out +# https://github.com/root-project/root/blob/8cd9eed6f3a32e55ef1f0f1df8e5462e753c735d/tree/ntuple/v7/doc/BinaryFormatSpecification.md#schema-extension-record-frame class RNTupleSchemaExtension: def read(self, chunk, cursor, context): out = MetaData(type(self).__name__) @@ -1107,12 +1148,10 @@ def read(self, chunk, cursor, context): return out +# https://github.com/root-project/root/blob/8cd9eed6f3a32e55ef1f0f1df8e5462e753c735d/tree/ntuple/v7/doc/BinaryFormatSpecification.md#footer-envelope class FooterReader: def __init__(self): self.extension_header_links = RNTupleSchemaExtension() - self.column_group_record_frames = ListFrameReader( - RecordFrameReader(ColumnGroupRecordReader()) - ) self.cluster_summary_frames = ListFrameReader( RecordFrameReader(ClusterSummaryReader()) ) @@ -1129,9 +1168,6 @@ def read(self, chunk, cursor, context): out.feature_flag = cursor.field(chunk, _rntuple_feature_flag_format, context) out.header_checksum = cursor.field(chunk, _rntuple_checksum_format, context) out.extension_links = self.extension_header_links.read(chunk, cursor, context) - out.col_group_records = self.column_group_record_frames.read( - chunk, cursor, context - ) out.cluster_group_records = self.cluster_group_record_frames.read( chunk, cursor, context ) @@ -1279,6 +1315,4 @@ def iterate(self, filter_name="*", *args, step_size="100 MB", **kwargs): ) -uproot.classes["ROOT::Experimental::RNTuple"] = ( - Model_ROOT_3a3a_Experimental_3a3a_RNTuple -) +uproot.classes["ROOT::RNTuple"] = Model_ROOT_3a3a_RNTuple diff --git a/src/uproot/reading.py b/src/uproot/reading.py index 0dce3a970..f50154569 100644 --- a/src/uproot/reading.py +++ b/src/uproot/reading.py @@ -185,7 +185,7 @@ def __init__(self): "TNtuple", "TNtupleD", "TTreeSQL", - "ROOT::Experimental::RNTuple", + "ROOT::RNTuple", ] diff --git a/tests/test_0013_rntuple_anchor.py b/tests/test_0013_rntuple_anchor.py index 3931b5b28..980d2721e 100644 --- a/tests/test_0013_rntuple_anchor.py +++ b/tests/test_0013_rntuple_anchor.py @@ -5,31 +5,26 @@ import sys import numpy -import pytest import skhep_testdata import uproot -pytest.skip( - "Skipping until test files are available with RNTuple v1.0", allow_module_level=True -) - def test(): - filename = skhep_testdata.data_path("uproot-ntpl001_staff.root") + filename = skhep_testdata.data_path("ntpl001_staff_rntuple_v1-0-0-0.root") with uproot.open(filename) as f: obj = f["Staff"] - assert obj.member("fVersionEpoch") == 0 - assert obj.member("fVersionMajor") == 2 + assert obj.member("fVersionEpoch") == 1 + assert obj.member("fVersionMajor") == 0 assert obj.member("fVersionMinor") == 0 assert obj.member("fVersionPatch") == 0 assert obj.member("fSeekHeader") == 266 - assert obj.member("fNBytesHeader") == 391 - assert obj.member("fLenHeader") == 996 - assert obj.member("fSeekFooter") == 36420 - assert obj.member("fNBytesFooter") == 89 - assert obj.member("fLenFooter") == 172 - assert obj.member("fMaxKeySize") == 12065027575882477574 + assert obj.member("fNBytesHeader") == 319 + assert obj.member("fLenHeader") == 997 + assert obj.member("fSeekFooter") == 24504 + assert obj.member("fNBytesFooter") == 84 + assert obj.member("fLenFooter") == 148 + assert obj.member("fMaxKeySize") == 1073741824 header_start = obj.member("fSeekHeader") header_stop = header_start + obj.member("fNBytesHeader") @@ -56,29 +51,29 @@ def test(): # HEADER # --+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+- -# 76 52 1 126 1 0 228 3 0 226 97 52 14 191 32 119 70 87 1 0 -# L 4 --- ~ --- --- --- --- --- --- a 4 --- --- w F W --- --- +# 90 83 1 54 1 0 229 3 0 40 181 47 253 96 229 2 101 9 0 164 +# Z S --- 6 --- --- --- --- --- ( --- / --- ` --- --- e --- --- --- # --+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+- -# 228 3 0 1 0 144 5 0 0 0 83 116 97 102 102 13 0 241 5 13 -# --- --- --- --- --- --- --- --- --- --- S t a f f --- --- --- --- --- +# 11 1 0 229 3 0 5 0 0 0 83 116 97 102 102 14 0 0 0 82 +# --- --- --- --- --- --- --- --- --- --- S t a f f --- --- --- --- R # --+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+- -# 0 0 0 82 79 79 84 32 118 54 46 51 49 47 48 49 122 253 255 1 -# --- --- --- R O O T v 6 . 3 1 / 0 1 z --- --- --- +# 79 79 84 32 118 54 46 51 53 46 48 48 49 122 253 255 11 0 0 0 +# O O T v 6 . 3 5 . 0 0 1 z --- --- --- --- --- --- # --+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+- -# 0 111 11 0 0 0 60 0 1 0 3 244 13 8 0 0 0 67 97 116 -# --- o --- --- --- --- < --- --- --- --- --- --- --- --- --- --- C a t +# 60 0 8 0 0 0 67 97 116 101 103 111 114 121 12 0 0 0 115 116 +# < --- --- --- --- --- C a t e g o r y --- --- --- --- s t # FOOTER # --+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+- -# 76 52 1 80 0 0 172 0 0 143 248 61 98 249 16 31 87 72 2 0 -# L 4 --- P --- --- --- --- --- --- --- = b --- --- --- W H --- --- +# 90 83 1 75 0 0 148 0 0 40 181 47 253 32 148 21 2 0 116 2 +# Z S --- K --- --- --- --- --- ( --- / --- --- --- --- --- t --- # --+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+- -# 172 0 1 0 147 119 211 249 23 217 49 71 211 56 16 0 34 244 255 1 -# --- --- --- --- --- w --- --- --- --- 1 G --- 8 --- --- " --- --- --- +# 2 0 148 0 121 133 33 93 184 252 16 152 56 244 255 196 1 0 0 0 +# --- --- --- --- y --- ! ] --- --- --- --- 8 --- --- --- --- --- --- --- # --+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+- -# 0 15 12 0 33 19 196 12 0 106 1 0 0 0 48 0 1 0 34 26 -# --- --- --- --- ! --- --- --- --- j --- --- --- --- 0 --- --- --- " --- +# 48 0 26 13 92 2 194 0 0 0 212 94 62 61 172 86 23 254 154 10 +# 0 --- --- --- \ --- --- --- --- --- --- ^ > = --- V --- --- --- --- # --+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+- -# 13 8 0 0 28 0 34 92 2 12 0 110 234 0 0 0 56 141 120 0 -# --- --- --- --- --- --- " \ --- --- --- n --- --- --- --- 8 --- x --- +# 0 111 200 5 12 254 12 97 101 192 56 89 192 189 47 37 224 48 158 153 +# --- o --- --- --- --- --- a e --- 8 Y --- --- / % --- 0 --- --- diff --git a/tests/test_0630_rntuple_basics.py b/tests/test_0630_rntuple_basics.py index 74391022c..88d96a9bc 100644 --- a/tests/test_0630_rntuple_basics.py +++ b/tests/test_0630_rntuple_basics.py @@ -12,13 +12,9 @@ pytest.importorskip("awkward") -pytest.skip( - "Skipping until test files are available with RNTuple v1.0", allow_module_level=True -) - def test_flat(): - filename = skhep_testdata.data_path("test_ntuple_int_float.root") + filename = skhep_testdata.data_path("test_int_float_rntuple_v1-0-0-0.root") with uproot.open(filename) as f: R = f["ntuple"] assert R.keys() == ["one_integers", "two_floats"] @@ -36,7 +32,7 @@ def test_flat(): R.arrays(entry_start=1, entry_stop=3)["one_integers"] == numpy.array([8, 7]) ) - filename = skhep_testdata.data_path("test_ntuple_int_5e4.root") + filename = skhep_testdata.data_path("test_int_5e4_rntuple_v1-0-0-0.root") with uproot.open(filename) as f: R = f["ntuple"] assert all( @@ -46,7 +42,9 @@ def test_flat(): def test_jagged(): - filename = skhep_testdata.data_path("test_ntuple_int_vfloat_tlv_vtlv.root") + filename = skhep_testdata.data_path( + "test_int_vfloat_tlv_vtlv_rntuple_v1-0-0-0.root" + ) with uproot.open(filename) as f: R = f["ntuple"] assert R.keys() == ["one_integers", "two_v_floats", "three_LV", "four_v_LVs"] diff --git a/tests/test_0662_rntuple_stl_containers.py b/tests/test_0662_rntuple_stl_containers.py index 6aa8130c2..32ace7d11 100644 --- a/tests/test_0662_rntuple_stl_containers.py +++ b/tests/test_0662_rntuple_stl_containers.py @@ -12,13 +12,9 @@ ak = pytest.importorskip("awkward") -pytest.skip( - "Skipping until test files are available with RNTuple v1.0", allow_module_level=True -) - def test_rntuple_stl_containers(): - filename = skhep_testdata.data_path("test_ntuple_stl_containers.root") + filename = skhep_testdata.data_path("test_stl_containers_rntuple_v1-0-0-0.root") with uproot.open(filename) as f: R = f["ntuple"] assert R.keys() == [ diff --git a/tests/test_0705_rntuple_writing_metadata.py b/tests/test_0705_rntuple_writing_metadata.py index 609181cfe..5c652f281 100644 --- a/tests/test_0705_rntuple_writing_metadata.py +++ b/tests/test_0705_rntuple_writing_metadata.py @@ -13,10 +13,6 @@ ak = pytest.importorskip("awkward") -pytest.skip( - "Skipping until test files are available with RNTuple v1.0", allow_module_level=True -) - @pytest.mark.skip( reason="RNTuple writing is pending until specification 1.0.0 is released." diff --git a/tests/test_0962_RNTuple_update.py b/tests/test_0962_rntuple_update.py similarity index 51% rename from tests/test_0962_RNTuple_update.py rename to tests/test_0962_rntuple_update.py index 7dbdb53a4..b81a612c2 100644 --- a/tests/test_0962_RNTuple_update.py +++ b/tests/test_0962_rntuple_update.py @@ -6,13 +6,11 @@ import skhep_testdata import numpy as np -pytest.skip( - "Skipping until test files are available with RNTuple v1.0", allow_module_level=True -) - def test_new_support_RNTuple_split_int32_reading(): - with uproot.open(skhep_testdata.data_path("test_ntuple_int_5e4.root")) as f: + with uproot.open( + skhep_testdata.data_path("test_int_5e4_rntuple_v1-0-0-0.root") + ) as f: obj = f["ntuple"] df = obj.arrays() assert len(df) == 5e4 @@ -21,7 +19,7 @@ def test_new_support_RNTuple_split_int32_reading(): def test_new_support_RNTuple_bit_bool_reading(): - with uproot.open(skhep_testdata.data_path("test_ntuple_bit.root")) as f: + with uproot.open(skhep_testdata.data_path("test_bit_rntuple_v1-0-0-0.root")) as f: obj = f["ntuple"] df = obj.arrays() assert np.all(df.one_bit == np.asarray([1, 0, 0, 1, 0, 0, 1, 0, 0, 1])) @@ -29,7 +27,7 @@ def test_new_support_RNTuple_bit_bool_reading(): def test_new_support_RNTuple_split_int16_reading(): with uproot.open( - skhep_testdata.data_path("test_ntuple_int_multicluster.root") + skhep_testdata.data_path("test_int_multicluster_rntuple_v1-0-0-0.root") ) as f: obj = f["ntuple"] df = obj.arrays() @@ -38,28 +36,3 @@ def test_new_support_RNTuple_split_int16_reading(): assert df.one_integers[-1] == 1 assert np.all(np.unique(df.one_integers[: len(df.one_integers) // 2]) == [2]) assert np.all(np.unique(df.one_integers[len(df.one_integers) / 2 + 1 :]) == [1]) - - -pytest.importorskip("cramjam") - - -@pytest.mark.skip(reason="Need to find a similar file in RNTuple RC2 format") -def test_new_support_RNTuple_event_data(): - with uproot.open( - "https://xrootd-local.unl.edu:1094//store/user/AGC/nanoaod-rntuple/zstd/TT_TuneCUETP8M1_13TeV-powheg-pythia8/cmsopendata2015_ttbar_19980_PU25nsData2015v1_76X_mcRun2_asymptotic_v12_ext3-v1_00000_0000.root" - ) as f: - obj = f["Events"] - df = obj.arrays(["nTau"]) - assert len(df) == 1334428 - assert ak.to_list(df["nTau"][:10]) == [ - 0, - 0, - 2, - 0, - 1, - 1, - 1, - 1, - 2, - 0, - ] diff --git a/tests/test_1191_rntuple_fixes.py b/tests/test_1191_rntuple_fixes.py index 4be336943..af403629a 100644 --- a/tests/test_1191_rntuple_fixes.py +++ b/tests/test_1191_rntuple_fixes.py @@ -5,36 +5,36 @@ import uproot -pytest.skip( - "Skipping until test files are available with RNTuple v1.0", allow_module_level=True -) - def test_schema_extension(): - filename = skhep_testdata.data_path("test_ntuple_extension_columns.root") + filename = skhep_testdata.data_path("test_extension_columns_rntuple_v1-0-0-0.root") with uproot.open(filename) as f: - obj = f["EventData"] + obj = f["ntuple"] + + assert len(obj.page_list_envelopes.pagelinklist[0]) < len( + obj.page_list_envelopes.pagelinklist[1] + ) assert len(obj.column_records) > len(obj.header.column_records) - assert len(obj.column_records) == 936 - assert obj.column_records[903].first_element_index == 36 + assert len(obj.column_records) == 4 + assert obj.column_records[1].first_element_index == 200 + assert obj.column_records[2].first_element_index == 400 arrays = obj.arrays() - pbs = arrays[ - "HLT_AntiKt4EMPFlowJets_subresjesgscIS_ftf_TLAAux::fastDIPS20211215_pb" - ] - assert len(pbs) == 40 - assert all(len(l) == 0 for l in pbs[:36]) - assert next(i for i, l in enumerate(pbs) if len(l) != 0) == 36 + assert len(arrays.float_field) == 600 + assert len(arrays.intvec_field) == 600 + + assert all(arrays.float_field[:200] == 0) + assert all(len(l) == 0 for l in arrays.intvec_field[:400]) - jets = arrays["HLT_AntiKt4EMPFlowJets_subresjesgscIS_ftf_TLAAux:"] - assert len(jets.pt) == len(pbs) + assert next(i for i, l in enumerate(arrays.float_field) if l != 0) == 200 + assert next(i for i, l in enumerate(arrays.intvec_field) if len(l) != 0) == 400 def test_rntuple_cardinality(): filename = skhep_testdata.data_path( - "Run2012BC_DoubleMuParked_Muons_rntuple_1000evts.root" + "Run2012BC_DoubleMuParked_Muons_1000evts_rntuple_v1-0-0-0.root" ) with uproot.open(filename) as f: obj = f["Events"] @@ -42,44 +42,18 @@ def test_rntuple_cardinality(): assert arrays["nMuon"].tolist() == [len(l) for l in arrays["Muon_pt"]] -def test_skip_recursively_empty_structs(): - filename = skhep_testdata.data_path("DAOD_TRUTH3_RC2.root") +def test_multiple_page_delta_encoding(): + filename = skhep_testdata.data_path("test_index_multicluster_rntuple_v1-0-0-0.root") with uproot.open(filename) as f: - obj = f["RNT:CollectionTree"] - arrays = obj.arrays() - jets = arrays["AntiKt4TruthDressedWZJetsAux:"] - assert len(jets[0].pt) == 5 - - -def test_empty_page_list(): - filename = skhep_testdata.data_path("test_ntuple_extension_columns.root") - with uproot.open(filename) as f: - obj = f["EventData"] - col_id = 12 - assert obj.column_records[col_id].type == 14 - data = obj.read_col_page(col_id, 0) - assert len(data) == 1 - assert data[0] == 0 - - -@pytest.mark.skip( - reason="The file takes too long to download (about 5 seconds). Need to find a smaller test file." -) -def test_multiple_page_lists(): - url = "http://root.cern/files/tutorials/ntpl004_dimuon_v1rc2.root" - with uproot.open(f"simplecache::{url}") as f: - obj = f["Events"] + obj = f["ntuple"] data = obj.read_col_page(0, 0) - # each page of column 0 has 8192 elements - # so this checks that data was stitched together correctly - assert data[8192] - data[8191] == 2 - arrays = obj.arrays() - assert len(arrays.nMuon) == 4_000_000 + # first page has 64 elements, so this checks that data was stitched together correctly + assert data[64] - data[63] == 2 def test_split_encoding(): filename = skhep_testdata.data_path( - "Run2012BC_DoubleMuParked_Muons_rntuple_1000evts.root" + "Run2012BC_DoubleMuParked_Muons_1000evts_rntuple_v1-0-0-0.root" ) with uproot.open(filename) as f: obj = f["Events"] diff --git a/tests/test_1223_more_rntuple_types.py b/tests/test_1223_more_rntuple_types.py index 54b269457..ea2a986ae 100644 --- a/tests/test_1223_more_rntuple_types.py +++ b/tests/test_1223_more_rntuple_types.py @@ -1,17 +1,12 @@ # BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE -import pytest import skhep_testdata import uproot -pytest.skip( - "Skipping until test files are available with RNTuple v1.0", allow_module_level=True -) - def test_atomic(): - filename = skhep_testdata.data_path("test_ntuple_atomic_bitset.root") + filename = skhep_testdata.data_path("test_atomic_bitset_rntuple_v1-0-0-0.root") with uproot.open(filename) as f: obj = f["ntuple"] @@ -21,7 +16,7 @@ def test_atomic(): def test_bitset(): - filename = skhep_testdata.data_path("test_ntuple_atomic_bitset.root") + filename = skhep_testdata.data_path("test_atomic_bitset_rntuple_v1-0-0-0.root") with uproot.open(filename) as f: obj = f["ntuple"] @@ -72,7 +67,9 @@ def test_bitset(): def test_empty_struct(): - filename = skhep_testdata.data_path("test_ntuple_emptystruct_invalidvar.root") + filename = skhep_testdata.data_path( + "test_emptystruct_invalidvar_rntuple_v1-0-0-0.root" + ) with uproot.open(filename) as f: obj = f["ntuple"] @@ -82,10 +79,12 @@ def test_empty_struct(): def test_invalid_variant(): - filename = skhep_testdata.data_path("test_ntuple_emptystruct_invalidvar.root") + filename = skhep_testdata.data_path( + "test_emptystruct_invalidvar_rntuple_v1-0-0-0.root" + ) with uproot.open(filename) as f: obj = f["ntuple"] a = obj.arrays("variant") - assert a.variant.tolist() == [1, 1, None] + assert a.variant.tolist() == [1, None, {"i": 2}] diff --git a/tests/test_1250_rntuple_improvements.py b/tests/test_1250_rntuple_improvements.py index 54c2c1dc1..32d39c6ea 100644 --- a/tests/test_1250_rntuple_improvements.py +++ b/tests/test_1250_rntuple_improvements.py @@ -5,25 +5,27 @@ import uproot -pytest.skip( - "Skipping until test files are available with RNTuple v1.0", allow_module_level=True -) - def test_field_class(): - filename = skhep_testdata.data_path("DAOD_TRUTH3_RC2.root") + filename = skhep_testdata.data_path("test_nested_structs_rntuple_v1-0-0-0.root") with uproot.open(filename) as f: - obj = f["RNT:CollectionTree"] - jets = obj["AntiKt4TruthDressedWZJetsAux:"] - assert len(jets) == 6 + obj = f["ntuple"] + my_struct = obj["my_struct"] + assert len(my_struct) == 2 + + sub_struct = my_struct["sub_struct"] + assert len(my_struct) == 2 + + sub_sub_struct = sub_struct["sub_sub_struct"] + assert len(sub_sub_struct) == 2 - pt = jets["pt"] - assert len(pt) == 0 + v = sub_sub_struct["v"] + assert len(v) == 0 def test_array_methods(): filename = skhep_testdata.data_path( - "Run2012BC_DoubleMuParked_Muons_rntuple_1000evts.root" + "Run2012BC_DoubleMuParked_Muons_1000evts_rntuple_v1-0-0-0.root" ) with uproot.open(filename) as f: obj = f["Events"] @@ -36,16 +38,10 @@ def test_array_methods(): assert len(nMuon_arrays) == 1000 assert nMuon_arrays["nMuon"].tolist() == nMuon_array.tolist() - filename = skhep_testdata.data_path("DAOD_TRUTH3_RC2.root") - with uproot.open(filename) as f: - obj = f["RNT:CollectionTree"] - jets = obj["AntiKt4TruthDressedWZJetsAux:"].arrays() - assert len(jets[0].pt) == 5 - def test_iterate(): filename = skhep_testdata.data_path( - "Run2012BC_DoubleMuParked_Muons_rntuple_1000evts.root" + "Run2012BC_DoubleMuParked_Muons_1000evts_rntuple_v1-0-0-0.root" ) with uproot.open(filename) as f: obj = f["Events"] diff --git a/tests/test_1285_rntuple_multicluster_concatenation.py b/tests/test_1285_rntuple_multicluster_concatenation.py index c6335f079..fae322e96 100644 --- a/tests/test_1285_rntuple_multicluster_concatenation.py +++ b/tests/test_1285_rntuple_multicluster_concatenation.py @@ -5,15 +5,9 @@ import uproot -import pytest - -pytest.skip( - "Skipping until test files are available with RNTuple v1.0", allow_module_level=True -) - def test_schema_extension(): - filename = skhep_testdata.data_path("test_ntuple_index_multicluster.root") + filename = skhep_testdata.data_path("test_index_multicluster_rntuple_v1-0-0-0.root") with uproot.open(filename) as f: obj = f["ntuple"]