Skip to content

Commit

Permalink
Read extra info from page locations
Browse files Browse the repository at this point in the history
  • Loading branch information
ariostas committed Nov 15, 2024
1 parent 7aed03d commit 3e4265d
Showing 1 changed file with 32 additions and 4 deletions.
36 changes: 32 additions & 4 deletions src/uproot/models/RNTuple.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
_rntuple_cluster_summary_format = struct.Struct("<QQ")
# https://github.com/root-project/root/blob/8cd9eed6f3a32e55ef1f0f1df8e5462e753c735d/tree/ntuple/v7/doc/BinaryFormatSpecification.md#page-locations
_rntuple_page_num_elements_format = struct.Struct("<i")
_rntuple_column_element_offset_format = struct.Struct("<q")
_rntuple_column_compression_settings_format = struct.Struct("<I")


def _from_zigzag(n):
Expand Down Expand Up @@ -159,7 +161,7 @@ def _num_entries_for(in_ntuple, target_num_bytes, filter_name):
for cluster in range(start_cluster_idx, stop_cluster_idx):
pages = in_ntuple.ntuple.page_list_envelopes.pagelinklist[cluster][
key_nr
]
].pages
total_bytes += sum(page.locator.num_bytes for page in pages)

total_entries = entry_stop
Expand Down Expand Up @@ -755,7 +757,7 @@ def read_col_page(self, ncol, cluster_i):
pagelist = linklist[ncol]
dtype_byte = self.column_records[ncol].type
dtype_str = uproot.const.rntuple_col_num_to_dtype_dict[dtype_byte]
total_len = numpy.sum([desc.num_elements for desc in pagelist], dtype=int)
total_len = numpy.sum([desc.num_elements for desc in pagelist.pages], dtype=int)
if dtype_str == "switch":
dtype = numpy.dtype([("index", "int64"), ("tag", "int32")])
elif dtype_str == "bit":
Expand All @@ -770,7 +772,7 @@ def read_col_page(self, ncol, cluster_i):
nbits = uproot.const.rntuple_col_num_to_size_dict[dtype_byte]
tracker = 0
cumsum = 0
for page_desc in pagelist:
for page_desc in pagelist.pages:
n_elements = page_desc.num_elements
tracker_end = tracker + n_elements
self.read_pagedesc(
Expand Down Expand Up @@ -846,14 +848,40 @@ def read(self, chunk, cursor, context):
return out


# https://github.com/root-project/root/blob/8cd9eed6f3a32e55ef1f0f1df8e5462e753c735d/tree/ntuple/v7/doc/BinaryFormatSpecification.md#page-locations
class ColumnPageListFrameReader:
def read(self, chunk, cursor, context):
local_cursor = cursor.copy()
num_bytes = local_cursor.field(chunk, _rntuple_frame_size_format, context)
assert num_bytes < 0, f"num_bytes={num_bytes}"
num_items = local_cursor.field(chunk, _rntuple_frame_num_items_format, context)
cursor.skip(-num_bytes)
out = MetaData("ColumnPages")
out.pages = [
PageDescription().read(chunk, local_cursor, context)
for _ in range(num_items)
]
out.element_offset = local_cursor.field(
chunk, _rntuple_column_element_offset_format, context
)
out.suppressed = out.element_offset < 0
if not out.suppressed:
out.compression_settings = local_cursor.field(
chunk, _rntuple_column_compression_settings_format, context
)
else:
out.compression_settings = None
return out


# https://github.com/root-project/root/blob/8cd9eed6f3a32e55ef1f0f1df8e5462e753c735d/tree/ntuple/v7/doc/BinaryFormatSpecification.md#page-list-envelope
class PageLink:
def __init__(self):
self.list_cluster_summaries = ListFrameReader(
RecordFrameReader(ClusterSummaryReader())
)
self.nested_page_locations = ListFrameReader(
ListFrameReader(ListFrameReader(PageDescription()))
ListFrameReader(ColumnPageListFrameReader())
)

def read(self, chunk, cursor, context):
Expand Down

0 comments on commit 3e4265d

Please sign in to comment.