Skip to content

Commit

Permalink
chore: miscellaneous RNTuple improvements (#1250)
Browse files Browse the repository at this point in the history
* Fixed __len__ method

* Added a few more useful methods

* Use the right number in arrays method

* Updated to match spec and did some cleanup

* Fixed order of extra type information

* Extract column summary flags

* style: pre-commit fixes

* Fixed conflict resolution

* Fixed test

* Switched to using enums

* Fixed RNTuple anchor

* Updated locator types

* Removed UserMetadata envelope

* Started implementing new real32 types

* Updated sharded cluster to match spec

* Removed user metadata from footer

* Fixed ClusterSummaryReader

* Fix cascadentuple

* Introduced RNTupleField class

* Added test for #1285

* Fixed test

* Fix test (attempt 2)

* Finalized first version of RNTupleField

* Added tests for RNTupleField

* Implemented iterate method

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
ariostas and pre-commit-ci[bot] authored Nov 5, 2024
1 parent 53f917c commit 94d9684
Show file tree
Hide file tree
Showing 8 changed files with 620 additions and 119 deletions.
58 changes: 51 additions & 7 deletions src/uproot/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"""
from __future__ import annotations

import struct
from enum import IntEnum

import numpy

Expand Down Expand Up @@ -118,8 +118,6 @@
kStreamedMemberWise = numpy.uint16(1 << 14)

############ RNTuple https://github.com/root-project/root/blob/master/tree/ntuple/v7/doc/specifications.md
_rntuple_frame_format = struct.Struct("<Q")
rntuple_env_header = _rntuple_frame_format.pack(0) # TODO: need to check this
rntuple_col_num_to_dtype_dict = {
1: "uint64",
2: "uint32",
Expand Down Expand Up @@ -149,6 +147,8 @@
26: "int64", # split + zigzag encoding
27: "int32", # split + zigzag encoding
28: "int16", # split + zigzag encoding
29: "float32trunc",
30: "float32quant",
}
rntuple_col_num_to_size_dict = {
1: 64,
Expand Down Expand Up @@ -179,6 +179,8 @@
26: 64, # split + zigzag encoding
27: 32, # split + zigzag encoding
28: 16, # split + zigzag encoding
29: 32, # TODO: variable size
30: 32, # TODO: variable size
}

rntuple_col_type_to_num_dict = {
Expand Down Expand Up @@ -212,7 +214,49 @@
"splitzigzagint16": 28,
}

rntuple_role_leaf = 0
rntuple_role_vector = 1
rntuple_role_struct = 2
rntuple_role_union = 3

class RNTupleLocatorType(IntEnum):
STANDARD = 0x00
LARGE = 0x01
DAOS = 0x02


class RNTupleEnvelopeType(IntEnum):
RESERVED = 0x00
HEADER = 0x01
FOOTER = 0x02
PAGELIST = 0x03


class RNTupleFieldRole(IntEnum):
LEAF = 0x00
VECTOR = 0x01
STRUCT = 0x02
UNION = 0x03
UNSPLIT = 0x04


class RNTupleFieldFlag(IntEnum):
REPETITIVE = 0x01
PROJECTED = 0x02
CHECKSUM = 0x04


class RNTupleColumnFlag(IntEnum):
DEFERRED = 0x08
RANGE = 0x10


class RNTupleExtraTypeIdentifier(IntEnum):
ROOT = 0x00


class RNTupleUserMetadataType(IntEnum):
INT = 0x01
BOOL = 0x02
DOUBLE = 0x03
STRING = 0x04


class RNTupleClusterFlag(IntEnum):
SHARDED = 0x01
Loading

0 comments on commit 94d9684

Please sign in to comment.