From fa5917cfc6b945f09d3d2cac21d60c40f6a7d8e9 Mon Sep 17 00:00:00 2001 From: Wouter Deconinck Date: Tue, 5 Mar 2024 19:49:34 -0600 Subject: [PATCH 1/5] fix: rework podio options to allow specifying list, includes, and excludes --- src/services/io/podio/JEventProcessorPODIO.cc | 79 +++++++++++-------- src/services/io/podio/JEventProcessorPODIO.h | 1 + 2 files changed, 47 insertions(+), 33 deletions(-) diff --git a/src/services/io/podio/JEventProcessorPODIO.cc b/src/services/io/podio/JEventProcessorPODIO.cc index 2625283bb6..4b5a4ff31b 100644 --- a/src/services/io/podio/JEventProcessorPODIO.cc +++ b/src/services/io/podio/JEventProcessorPODIO.cc @@ -41,7 +41,7 @@ JEventProcessorPODIO::JEventProcessorPODIO() { ); // Get the list of output collections to include/exclude - std::vector output_include_collections={ + std::vector output_collections={ // Header and other metadata "EventHeader", @@ -238,16 +238,22 @@ JEventProcessorPODIO::JEventProcessorPODIO() { // DIRC "DIRCRawHits" }; - std::vector output_exclude_collections; // need to get as vector, then convert to set + japp->SetDefaultParameter( + "podio:output_collections", + output_collections, + "Comma separated list of collection names to write out. If explicitly set to an empty list, all collections including input collections will be written." + ); + std::vector output_include_collections; // need to get as vector, then convert to set japp->SetDefaultParameter( "podio:output_include_collections", output_include_collections, - "Comma separated list of collection names to write out. If not set, all collections will be written (including ones from input file). Don't set this and use PODIO:OUTPUT_EXCLUDE_COLLECTIONS to write everything except a selection." + "Comma separated list of collection names to include. If empty, only default collections will be written. Use this to add to default collections." ); + std::vector output_exclude_collections; // need to get as vector, then convert to set japp->SetDefaultParameter( "podio:output_exclude_collections", output_exclude_collections, - "Comma separated list of collection names to not write out." + "Comma separated list of collection names to exclude. If empty, only default collections will be written. Use this to remove default collections." ); japp->SetDefaultParameter( "podio:print_collections", @@ -255,6 +261,8 @@ JEventProcessorPODIO::JEventProcessorPODIO() { "Comma separated list of collection names to print to screen, e.g. for debugging." ); + m_output_collections = std::set(output_collections.begin(), + output_collections.end()); m_output_include_collections = std::set(output_include_collections.begin(), output_include_collections.end()); m_output_exclude_collections = std::set(output_exclude_collections.begin(), @@ -278,40 +286,45 @@ void JEventProcessorPODIO::Init() { void JEventProcessorPODIO::FindCollectionsToWrite(const std::shared_ptr& event) { // Set up the set of collections_to_write. + m_log->debug("Persisting podio types from includes list"); + + // Get all possible collections std::vector all_collections = event->GetAllCollectionNames(); + std::set all_collections_set = std::set(all_collections.begin(), + all_collections.end()); - if (m_output_include_collections.empty()) { - // User has not specified an include list, so we include _all_ PODIO collections present in the first event. - for (const std::string& col : all_collections) { - if (m_output_exclude_collections.find(col) == m_output_exclude_collections.end()) { - m_collections_to_write.push_back(col); - m_log->info("Persisting collection '{}'", col); - } - } + // User has specified an empty include list, so we include _all_ PODIO collections present in the first event. + if (m_output_collections.empty()) { + m_output_collections.merge(all_collections_set); } - else { - m_log->debug("Persisting podio types from includes list"); - m_user_included_collections = true; - - // We match up the include list with what is actually present in the event - std::set all_collections_set = std::set(all_collections.begin(), all_collections.end()); - - for (const auto& col : m_output_include_collections) { - if (m_output_exclude_collections.find(col) == m_output_exclude_collections.end()) { - // Included and not excluded - if (all_collections_set.find(col) == all_collections_set.end()) { - // Included, but not a valid PODIO type - m_log->warn("Explicitly included collection '{}' not present in factory set, omitting.", col); - } - else { - // Included, not excluded, and a valid PODIO type - m_collections_to_write.push_back(col); - m_log->info("Persisting collection '{}'", col); - } - } - } + + // User has specified explicit collections to include + m_output_collections.merge(m_output_include_collections); + + // User has specified explicit collections to exclude + for (const auto& col : m_output_exclude_collections) { + const auto& it = m_output_collections.find(col); + if (it != m_output_collections.end() ) { + m_output_collections.erase(it); + } + } + + // Subtract the available collections to find those not present + std::vector not_present_collections; + std::set_difference(m_output_collections.begin(), m_output_collections.end(), + all_collections_set.begin(), all_collections_set.end(), + std::back_inserter(not_present_collections)); + for (const auto& col : not_present_collections) { + m_log->warn("Explicitly included collection '{}' not present in factory set, omitting.", col); } + // Intersection with available collections to limit to those that are present + std::set_intersection(m_output_collections.begin(), m_output_collections.end(), + all_collections_set.begin(), all_collections_set.end(), + std::back_inserter(m_collections_to_write)); + for (const auto& col : m_collections_to_write) { + m_log->info("Persisting collection '{}'", col); + } } void JEventProcessorPODIO::Process(const std::shared_ptr &event) { diff --git a/src/services/io/podio/JEventProcessorPODIO.h b/src/services/io/podio/JEventProcessorPODIO.h index 4ab126a140..3801cad611 100644 --- a/src/services/io/podio/JEventProcessorPODIO.h +++ b/src/services/io/podio/JEventProcessorPODIO.h @@ -33,6 +33,7 @@ class JEventProcessorPODIO : public JEventProcessor { std::string m_output_file = "podio_output.root"; std::string m_output_file_copy_dir = ""; + std::set m_output_collections; // config. parameter std::set m_output_include_collections; // config. parameter std::set m_output_exclude_collections; // config. parameter std::vector m_collections_to_write; // derived from above config. parameters From 12e0d62b0820f3415f0bb4ede6c703ebf048c6a1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 6 Mar 2024 01:56:12 +0000 Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/services/io/podio/JEventProcessorPODIO.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/services/io/podio/JEventProcessorPODIO.cc b/src/services/io/podio/JEventProcessorPODIO.cc index 4b5a4ff31b..e5f9b02661 100644 --- a/src/services/io/podio/JEventProcessorPODIO.cc +++ b/src/services/io/podio/JEventProcessorPODIO.cc @@ -241,7 +241,7 @@ JEventProcessorPODIO::JEventProcessorPODIO() { japp->SetDefaultParameter( "podio:output_collections", output_collections, - "Comma separated list of collection names to write out. If explicitly set to an empty list, all collections including input collections will be written." + "Comma separated list of collection names to write out. If explicitly set to an empty list, all collections including input collections will be written." ); std::vector output_include_collections; // need to get as vector, then convert to set japp->SetDefaultParameter( From cd9ccff636647ccbb3fdd781eff06e7bc7d2af8f Mon Sep 17 00:00:00 2001 From: Wouter Deconinck Date: Wed, 6 Mar 2024 09:03:14 -0600 Subject: [PATCH 3/5] fix: iwyu --- src/services/io/podio/JEventProcessorPODIO.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/services/io/podio/JEventProcessorPODIO.cc b/src/services/io/podio/JEventProcessorPODIO.cc index e5f9b02661..317059b5fe 100644 --- a/src/services/io/podio/JEventProcessorPODIO.cc +++ b/src/services/io/podio/JEventProcessorPODIO.cc @@ -9,7 +9,9 @@ #include #include #include +#include #include +#include #include "services/log/Log_service.h" From 7832a6e90e3b5ede6e5e5efd58891a02254990e4 Mon Sep 17 00:00:00 2001 From: Wouter Deconinck Date: Wed, 6 Mar 2024 10:34:02 -0600 Subject: [PATCH 4/5] fix: also ignore edm4eic::*::createBuffers --- .github/lsan.supp | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/lsan.supp b/.github/lsan.supp index abaee04a3d..41e934ec2f 100644 --- a/.github/lsan.supp +++ b/.github/lsan.supp @@ -1,6 +1,7 @@ leak:CherenkovPID::AddMassHypothesis leak:dd4hep::DetElement::DetElement leak:dd4hep::Header::Header +leak:edm4eic::*::createBuffers leak:edm4hep::*::createBuffers leak:libActsCore.so leak:libCling.so From 5b24b7588b5336b7f31c72e96173072ab3b1de23 Mon Sep 17 00:00:00 2001 From: Wouter Deconinck Date: Wed, 6 Mar 2024 11:21:16 -0600 Subject: [PATCH 5/5] fix: use output_collections in two-stage running --- .github/workflows/linux-eic-shell.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/linux-eic-shell.yml b/.github/workflows/linux-eic-shell.yml index 4a6782ca99..1613fa0a1b 100644 --- a/.github/workflows/linux-eic-shell.yml +++ b/.github/workflows/linux-eic-shell.yml @@ -401,7 +401,7 @@ jobs: export DETECTOR_CONFIG=${DETECTOR}_${{ matrix.detector_config }} export LD_LIBRARY_PATH=$PWD/install/lib:$LD_LIBRARY_PATH export JANA_PLUGIN_PATH=$PWD/install/lib/EICrecon/plugins${JANA_PLUGIN_PATH:+:${JANA_PLUGIN_PATH}} - $PWD/install/bin/eicrecon -Ppodio:output_include_collections=EventHeader,MCParticles,EcalBarrelScFiRawHits,EcalBarrelImagingRawHits -Ppodio:output_file=two_stage_raw_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}.edm4eic.root sim_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}.edm4hep.root -Pplugins=dump_flags,janadot -Pdump_flags:json=${{ matrix.particle }}_${{ matrix.detector_config }}_flags.json -Pjana:warmup_timeout=0 -Pjana:timeout=0 + $PWD/install/bin/eicrecon -Ppodio:output_collections=EventHeader,MCParticles,EcalBarrelScFiRawHits,EcalBarrelImagingRawHits -Ppodio:output_file=two_stage_raw_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}.edm4eic.root sim_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}.edm4hep.root -Pplugins=dump_flags,janadot -Pdump_flags:json=${{ matrix.particle }}_${{ matrix.detector_config }}_flags.json -Pjana:warmup_timeout=0 -Pjana:timeout=0 - name: Upload digitization output uses: actions/upload-artifact@v4 with: @@ -417,7 +417,7 @@ jobs: export DETECTOR_CONFIG=${DETECTOR}_${{ matrix.detector_config }} export LD_LIBRARY_PATH=$PWD/install/lib:$LD_LIBRARY_PATH export JANA_PLUGIN_PATH=$PWD/install/lib/EICrecon/plugins${JANA_PLUGIN_PATH:+:${JANA_PLUGIN_PATH}} - $PWD/install/bin/eicrecon -Ppodio:output_include_collections=EcalBarrelClusters,EcalBarrelClusterAssociations -Ppodio:output_file=two_stage_rec_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}.edm4eic.root two_stage_raw_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}.edm4eic.root -Pplugins=dump_flags,janadot -Pdump_flags:json=${{ matrix.particle }}_${{ matrix.detector_config }}_flags.json -Pjana:warmup_timeout=0 -Pjana:timeout=0 + $PWD/install/bin/eicrecon -Ppodio:output_collections=EcalBarrelClusters,EcalBarrelClusterAssociations -Ppodio:output_file=two_stage_rec_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}.edm4eic.root two_stage_raw_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}.edm4eic.root -Pplugins=dump_flags,janadot -Pdump_flags:json=${{ matrix.particle }}_${{ matrix.detector_config }}_flags.json -Pjana:warmup_timeout=0 -Pjana:timeout=0 - name: Upload reconstruction output uses: actions/upload-artifact@v4 with: