Skip to content

Commit

Permalink
[WIP] Make the TTree based ROOT backend store the GenericParameters t…
Browse files Browse the repository at this point in the history
…he same way as the RNTuple based one (#615)

* Refactor root utilities and reduce code duplication

- Lift common type defs into one header to not redefine them
- Keep all root related utilitie classes in one header

* Rename CollectionInfo to CategoryInfo to better convey intention

* Remove unnecessary member variables

* Add functionality to get all values for a type to GenericParameters

* Move some global state into category state

* Avoid unnecessary copy

* Fix include guards to conform to style guide

* Make sure to take lock long enough

* Switch to tuple for consistency

* Remove obsolete reset

* Make the ROOTWriter write split GenericParameters

* Make the ROOTReader read the files again

* Add tests to make sure that v00-99 files stay readable

* Make older compilers happy with in place construction

* Make sure to only run SIO legacy tests when input data is available

* Make tests work by hiding forward declaration from CLING

* Add comment for reasoning about excluding forward decl for CLING
  • Loading branch information
tmadlener authored Jun 11, 2024
1 parent 335a4ae commit 50bfe41
Show file tree
Hide file tree
Showing 20 changed files with 380 additions and 193 deletions.
23 changes: 0 additions & 23 deletions include/podio/CollectionBranches.h

This file was deleted.

27 changes: 27 additions & 0 deletions include/podio/GenericParameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <algorithm>
#include <iostream>
#include <iterator>
#include <map>
#include <memory>
#include <mutex>
Expand All @@ -30,6 +31,12 @@ class RNTupleWriter;

namespace podio {

#if !defined(__CLING__)
// cling doesn't really deal well (i.e. at all in this case) with the forward
// declaration here and errors out, breaking e.g. python bindings.
class ROOTReader;
#endif

/// The types which are supported in the GenericParameters
using SupportedGenericDataTypes = std::tuple<int, float, std::string, double>;

Expand Down Expand Up @@ -105,6 +112,10 @@ class GenericParameters {
template <typename T, typename = EnableIfValidGenericDataType<T>>
std::vector<std::string> getKeys() const;

/// Get all the available values for a given type
template <typename T, typename = EnableIfValidGenericDataType<T>>
std::vector<std::vector<T>> getValues() const;

/// erase all elements
void clear() {
_intMap.clear();
Expand All @@ -129,6 +140,10 @@ class GenericParameters {
friend RNTupleWriter;
#endif

#if !defined(__CLING__)
friend ROOTReader;
#endif

/// Get a reference to the internal map for a given type
template <typename T>
const MapType<detail::GetVectorType<T>>& getMap() const {
Expand Down Expand Up @@ -243,5 +258,17 @@ std::vector<std::string> GenericParameters::getKeys() const {
return keys;
}

template <typename T, typename>
std::vector<std::vector<T>> GenericParameters::getValues() const {
std::vector<std::vector<T>> values;
{
auto& mtx = getMutex<T>();
const auto& map = getMap<T>();
std::lock_guard lock{mtx};
values.reserve(map.size());
std::transform(map.begin(), map.end(), std::back_inserter(values), [](const auto& pair) { return pair.second; });
}
return values;
}
} // namespace podio
#endif
2 changes: 0 additions & 2 deletions include/podio/RNTupleReader.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#ifndef PODIO_RNTUPLEREADER_H
#define PODIO_RNTUPLEREADER_H

#include "podio/CollectionBranches.h"
#include "podio/ICollectionProvider.h"
#include "podio/ROOTFrameData.h"
#include "podio/SchemaEvolution.h"
#include "podio/podioVersion.h"
Expand Down
56 changes: 28 additions & 28 deletions include/podio/RNTupleWriter.h
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#ifndef PODIO_RNTUPLEWRITER_H
#define PODIO_RNTUPLEWRITER_H

#include "podio/CollectionBase.h"
#include "podio/Frame.h"
#include "podio/GenericParameters.h"
#include "podio/SchemaEvolution.h"
#include "podio/utilities/DatamodelRegistryIOHelpers.h"
#include "podio/utilities/RootHelpers.h"

#include "TFile.h"
#include <ROOT/RNTuple.hxx>
Expand Down Expand Up @@ -102,42 +102,42 @@ class RNTupleWriter {
checkConsistency(const std::vector<std::string>& collsToWrite, const std::string& category) const;

private:
template <typename T>
void fillParams(GenericParameters& params, ROOT::Experimental::REntry* entry);
std::unique_ptr<ROOT::Experimental::RNTupleModel>
createModels(const std::vector<root_utils::StoreCollection>& collections);

/// Helper struct to group all the necessary information for one category.
struct CategoryInfo {
std::unique_ptr<ROOT::Experimental::RNTupleWriter> writer{nullptr}; ///< The RNTupleWriter for this category

// The following are assumed to run in parallel!
std::vector<uint32_t> ids{}; ///< The ids of all collections
std::vector<std::string> names{}; ///< The names of all collections
std::vector<std::string> types{}; ///< The types of all collections
std::vector<short> subsetCollections{}; ///< The flags identifying the subcollections
std::vector<SchemaVersionT> schemaVersions{}; ///< The schema versions of all collections

// Storage for the keys & values of all the parameters of this category
// (resp. at least the current entry)
root_utils::ParamStorage<int> intParams{};
root_utils::ParamStorage<float> floatParams{};
root_utils::ParamStorage<double> doubleParams{};
root_utils::ParamStorage<std::string> stringParams{};
};
CategoryInfo& getCategoryInfo(const std::string& category);

using StoreCollection = std::pair<const std::string&, podio::CollectionBase*>;
std::unique_ptr<ROOT::Experimental::RNTupleModel> createModels(const std::vector<StoreCollection>& collections);
template <typename T>
void fillParams(const GenericParameters& params, CategoryInfo& catInfo, ROOT::Experimental::REntry* entry);

std::unique_ptr<ROOT::Experimental::RNTupleModel> m_metadata{};
std::unique_ptr<ROOT::Experimental::RNTupleWriter> m_metadataWriter{};
template <typename T>
root_utils::ParamStorage<T>& getParamStorage(CategoryInfo& catInfo);

std::unique_ptr<TFile> m_file{};

DatamodelDefinitionCollector m_datamodelCollector{};

struct CollectionInfo {
std::vector<uint32_t> id{};
std::vector<std::string> name{};
std::vector<std::string> type{};
std::vector<short> isSubsetCollection{};
std::vector<SchemaVersionT> schemaVersion{};
std::unique_ptr<ROOT::Experimental::RNTupleWriter> writer{nullptr};
};
CollectionInfo& getCategoryInfo(const std::string& category);

std::unordered_map<std::string, CollectionInfo> m_categories{};
std::unordered_map<std::string, CategoryInfo> m_categories{};

bool m_finished{false};

std::vector<std::string> m_intkeys{}, m_floatkeys{}, m_doublekeys{}, m_stringkeys{};

std::vector<std::vector<int>> m_intvalues{};
std::vector<std::vector<float>> m_floatvalues{};
std::vector<std::vector<double>> m_doublevalues{};
std::vector<std::vector<std::string>> m_stringvalues{};

template <typename T>
std::pair<std::vector<std::string>&, std::vector<std::vector<T>>&> getKeyValueVectors();
};

} // namespace podio
Expand Down
5 changes: 2 additions & 3 deletions include/podio/ROOTLegacyReader.h
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
#ifndef PODIO_ROOTLEGACYREADER_H
#define PODIO_ROOTLEGACYREADER_H

#include "podio/CollectionBranches.h"
#include "podio/ROOTFrameData.h"
#include "podio/podioVersion.h"
#include "podio/utilities/RootHelpers.h"

#include "TChain.h"

#include <iostream>
#include <memory>
#include <string>
#include <tuple>
Expand Down Expand Up @@ -114,7 +113,7 @@ class ROOTLegacyReader {
private:
std::pair<TTree*, unsigned> getLocalTreeAndEntry(const std::string& treename);

void createCollectionBranches(const std::vector<std::tuple<uint32_t, std::string, bool, unsigned int>>& collInfo);
void createCollectionBranches(const std::vector<root_utils::CollectionWriteInfoT>& collInfo);

podio::GenericParameters readEventMetaData();

Expand Down
7 changes: 5 additions & 2 deletions include/podio/ROOTReader.h
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
#ifndef PODIO_ROOTREADER_H
#define PODIO_ROOTREADER_H

#include "podio/CollectionBranches.h"
#include "podio/ROOTFrameData.h"
#include "podio/podioVersion.h"
#include "podio/utilities/DatamodelRegistryIOHelpers.h"
#include "podio/utilities/RootHelpers.h"

#include "TChain.h"

#include <iostream>
#include <memory>
#include <string>
#include <string_view>
Expand Down Expand Up @@ -157,6 +156,10 @@ class ROOTReader {
/// Read the parameters for the entry specified in the passed CategoryInfo
GenericParameters readEntryParameters(CategoryInfo& catInfo, bool reloadBranches, unsigned int localEntry);

template <typename T>
static void readParams(CategoryInfo& catInfo, podio::GenericParameters& params, bool reloadBranches,
unsigned int localEntry);

/// Read the data entry specified in the passed CategoryInfo, and increase the
/// counter afterwards. In case the requested entry is larger than the
/// available number of entries, return a nullptr.
Expand Down
34 changes: 18 additions & 16 deletions include/podio/ROOTWriter.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#ifndef PODIO_ROOTWRITER_H
#define PODIO_ROOTWRITER_H

#include "podio/CollectionBranches.h"
#include "podio/CollectionIDTable.h"
#include "podio/utilities/DatamodelRegistryIOHelpers.h"
#include "podio/utilities/RootHelpers.h"

#include "TFile.h"

Expand Down Expand Up @@ -100,32 +100,34 @@ class ROOTWriter {
checkConsistency(const std::vector<std::string>& collsToWrite, const std::string& category) const;

private:
using StoreCollection = std::pair<const std::string&, podio::CollectionBase*>;

// collectionID, collectionType, subsetCollection
// @note same as in rootUtils.h private header!
using CollectionInfoT = std::tuple<uint32_t, std::string, bool, unsigned int>;

/// Helper struct to group together all necessary state to write / process a
/// given category. Created during the first writing of a category
struct CategoryInfo {
TTree* tree{nullptr}; ///< The TTree to which this category is written
std::vector<root_utils::CollectionBranches> branches{}; ///< The branches for this category
std::vector<CollectionInfoT> collInfo{}; ///< Collection info for this category
podio::CollectionIDTable idTable{}; ///< The collection id table for this category
std::vector<std::string> collsToWrite{}; ///< The collections to write for this category
TTree* tree{nullptr}; ///< The TTree to which this category is written
std::vector<root_utils::CollectionBranches> branches{}; ///< The branches for this category
std::vector<root_utils::CollectionWriteInfoT> collInfo{}; ///< Collection info for this category
podio::CollectionIDTable idTable{}; ///< The collection id table for this category
std::vector<std::string> collsToWrite{}; ///< The collections to write for this category

// Storage for the keys & values of all the parameters of this category
// (resp. at least the current entry)
root_utils::ParamStorage<int> intParams{};
root_utils::ParamStorage<float> floatParams{};
root_utils::ParamStorage<double> doubleParams{};
root_utils::ParamStorage<std::string> stringParams{};
};

/// Initialize the branches for this category
void initBranches(CategoryInfo& catInfo, const std::vector<StoreCollection>& collections,
void initBranches(CategoryInfo& catInfo, const std::vector<root_utils::StoreCollection>& collections,
/*const*/ podio::GenericParameters& parameters);

/// Get the (potentially uninitialized category information for this category)
CategoryInfo& getCategoryInfo(const std::string& category);

static void resetBranches(std::vector<root_utils::CollectionBranches>& branches,
const std::vector<ROOTWriter::StoreCollection>& collections,
/*const*/ podio::GenericParameters* parameters);
static void resetBranches(CategoryInfo& categoryInfo, const std::vector<root_utils::StoreCollection>& collections);

/// Fill the parameter keys and values into the CategoryInfo storage
static void fillParams(CategoryInfo& catInfo, const GenericParameters& params);

std::unique_ptr<TFile> m_file{nullptr}; ///< The storage file
std::unordered_map<std::string, CategoryInfo> m_categories{}; ///< All categories
Expand Down
81 changes: 81 additions & 0 deletions include/podio/utilities/RootHelpers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#ifndef PODIO_UTILITIES_ROOTHELPERS_H
#define PODIO_UTILITIES_ROOTHELPERS_H

#include "TBranch.h"

#include <string>
#include <tuple>
#include <vector>

namespace podio {
class CollectionBase;

namespace root_utils {

// A collection of additional information that describes the collection: the
// collectionID, the collection (data) type, whether it is a subset
// collection, and its schema version
using CollectionWriteInfoT = std::tuple<uint32_t, std::string, bool, unsigned int>;
// for backwards compatibility
using CollectionInfoWithoutSchemaT = std::tuple<int, std::string, bool>;

/// A collection name and a base pointer grouped together for writing
using StoreCollection = std::tuple<const std::string&, podio::CollectionBase*>;

/// Small helper struct to collect all branches that are necessary to read or
/// write a collection. Needed to cache the branch pointers and avoid having to
/// get them from a TTree/TChain for every event.
struct CollectionBranches {
CollectionBranches() = default;
~CollectionBranches() = default;
CollectionBranches(const CollectionBranches&) = delete;
CollectionBranches& operator=(const CollectionBranches&) = delete;
CollectionBranches(CollectionBranches&&) = default;
CollectionBranches& operator=(CollectionBranches&&) = default;

CollectionBranches(TBranch* dataBranch) : data(dataBranch) {
}

TBranch* data{nullptr};
std::vector<TBranch*> refs{};
std::vector<TBranch*> vecs{};
std::vector<std::string> refNames{}; ///< The names of the relation branches
std::vector<std::string> vecNames{}; ///< The names of the vector member branches
};

/// Pair of keys and values for one type of the ones that can be stored in
/// GenericParameters
template <typename T>
struct ParamStorage {
ParamStorage() = default;
~ParamStorage() = default;
ParamStorage(const ParamStorage&) = delete;
ParamStorage& operator=(const ParamStorage&) = delete;
ParamStorage(ParamStorage&&) = default;
ParamStorage& operator=(ParamStorage&&) = default;

ParamStorage(const std::vector<std::string>& ks, const std::vector<std::vector<T>>& vs) : keys(ks), values(vs) {
}

auto keysPtr() {
m_keysPtr = &keys;
return &m_keysPtr;
}

auto valuesPtr() {
m_valuesPtr = &values;
return &m_valuesPtr;
}

std::vector<std::string> keys{};
std::vector<std::vector<T>> values{};

private:
std::vector<std::string>* m_keysPtr{nullptr};
std::vector<std::vector<T>>* m_valuesPtr{nullptr};
};

} // namespace root_utils
} // namespace podio

#endif // PODIO_UTILITIES_ROOTHELPERS_H
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ SET(root_headers
${PROJECT_SOURCE_DIR}/include/podio/ROOTLegacyReader.h
${PROJECT_SOURCE_DIR}/include/podio/ROOTWriter.h
${PROJECT_SOURCE_DIR}/include/podio/ROOTFrameData.h
${PROJECT_SOURCE_DIR}/include/podio/utilities/RootHelpers.h
)
if(ENABLE_RNTUPLE)
list(APPEND root_headers
Expand Down
Loading

0 comments on commit 50bfe41

Please sign in to comment.