diff --git a/src/index/IndexBuilderMain.cpp b/src/index/IndexBuilderMain.cpp index 2e4173d305..1b500c9dde 100644 --- a/src/index/IndexBuilderMain.cpp +++ b/src/index/IndexBuilderMain.cpp @@ -305,12 +305,14 @@ int main(int argc, char** argv) { } bool parseInParallel = getParameterValue(i, parseParallel, false); + bool parseInParallelSetExplicitly = i < parseParallel.size(); auto& filename = inputFile.at(i); if (filename == "-") { filename = "/dev/stdin"; } fileSpecs.emplace_back(filename, getFiletype(type, filename), - std::move(defaultGraph), parseInParallel); + std::move(defaultGraph), parseInParallel, + parseInParallelSetExplicitly); } return fileSpecs; }; diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 1e69d9676b..93b2fc09ea 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -291,36 +291,43 @@ std::pair IndexImpl::createInternalPSOandPOS( void IndexImpl::updateInputFileSpecificationsAndLog( std::vector& spec, std::optional parallelParsingSpecifiedViaJson) { - if (spec.size() == 1) { - LOG(INFO) << "Processing triples from " << spec.at(0).filename_ << " ..." - << std::endl; - } else { - LOG(INFO) << "Processing triples from " << spec.size() - << " input streams ..." << std::endl; - } - if (parallelParsingSpecifiedViaJson.value_or(false) == true) { + std::string pleaseUseParallelParsingOption = + "please use the command-line option --parse-parallel or -p"; + // Parallel parsing specified in the `settings.json` file. This is deprecated + // for a single input stream and forbidden for multiple input streams. + if (parallelParsingSpecifiedViaJson.has_value()) { if (spec.size() == 1) { - LOG(WARN) << "Parallel parsing set to `true` in the `.settings.json` " - "file; this is deprecated, please use the command-line " - " option --parse-parallel or -p instead" - << std::endl; - spec.at(0).parseInParallel_ = true; + LOG(WARN) << "Parallel parsing set in the `.settings.json` file; this is " + "deprecated, " + << pleaseUseParallelParsingOption << std::endl; + spec.at(0).parseInParallel_ = parallelParsingSpecifiedViaJson.value(); } else { - throw std::runtime_error{ - "For more than one input file, the parallel parsing must not be " - "specified via the `.settings.json` file, but has to be specified " - " via the command-line option --parse-parallel or -p"}; + throw std::runtime_error{absl::StrCat( + "Parallel parsing set in the `.settings.json` file; this is " + "forbidden for multiple input streams, ", + pleaseUseParallelParsingOption)}; } } - - if (spec.size() == 1 && !parallelParsingSpecifiedViaJson.has_value()) { + // For a single input stream, if parallel parsing is not specified explicitly + // on the command line, we set if implicitly for backward compatibility. + if (!parallelParsingSpecifiedViaJson.has_value() && spec.size() == 1 && + !spec.at(0).parseInParallelSetExplicitly_) { LOG(WARN) << "Implicitly using the parallel parser for a single input file " "for reasons of backward compatibility; this is deprecated, " - "please use the command-line option --parse-parallel or -p " - "instead" - << std::endl; + << pleaseUseParallelParsingOption << std::endl; spec.at(0).parseInParallel_ = true; } + // For a single input stream, show the name and whether we parse in parallel. + // For multiple input streams, only show the number of streams. + if (spec.size() == 1) { + LOG(INFO) << "Parsing triples from single input stream " + << spec.at(0).filename_ << " (parallel = " + << (spec.at(0).parseInParallel_ ? "true" : "false") << ") ..." + << std::endl; + } else { + LOG(INFO) << "Processing triples from " << spec.size() + << " input streams ..." << std::endl; + } } // _____________________________________________________________________________ diff --git a/src/index/InputFileSpecification.h b/src/index/InputFileSpecification.h index 9b5afde367..efc8c204ce 100644 --- a/src/index/InputFileSpecification.h +++ b/src/index/InputFileSpecification.h @@ -1,7 +1,7 @@ -// Copyright 2024, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Johannes Kalmbach(joka921) -// +// Copyright 2024, University of Freiburg +// Chair of Algorithms and Data Structures +// Author: Johannes Kalmbach + #pragma once #include @@ -27,6 +27,10 @@ struct InputFileSpecification { // Turtle files with all prefixes at the beginning and no multiline literals. bool parseInParallel_ = false; + // Remember if the value for parallel parsing was set explicitly (via the + // command line). + bool parseInParallelSetExplicitly_ = false; + bool operator==(const InputFileSpecification&) const = default; }; } // namespace qlever diff --git a/test/IndexTest.cpp b/test/IndexTest.cpp index 6ce7f6f732..5bb641b1ef 100644 --- a/test/IndexTest.cpp +++ b/test/IndexTest.cpp @@ -1,6 +1,8 @@ -// Copyright 2015, University of Freiburg, +// Copyright 2015 - 2024, University of Freiburg, // Chair of Algorithms and Data Structures. -// Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de) +// Authors: Björn Buchhold [2015 - 2017] +// Johannes Kalmbach +// Hannah Bast #include #include @@ -527,50 +529,82 @@ TEST(IndexTest, trivialGettersAndSetters) { TEST(IndexTest, updateInputFileSpecificationsAndLog) { using enum qlever::Filetype; - std::vector files{ - {"singleFile.ttl", Turtle, std::nullopt, false}}; + std::vector singleFileSpec = { + {"singleFile.ttl", Turtle, std::nullopt}}; + std::vector twoFilesSpec = { + {"firstFile.ttl", Turtle, std::nullopt}, + {"secondFile.ttl", Turtle, std::nullopt}}; using namespace ::testing; + + // Parallel parsing not specified anywhere. For a single input stream, we then + // default to `true` for reasons of backwards compatibility, but this is + // deprecated. For multiple input streams, we default to `false` and this is + // normal behavior. { + singleFileSpec.at(0).parseInParallelSetExplicitly_ = false; testing::internal::CaptureStdout(); - IndexImpl::updateInputFileSpecificationsAndLog(files, false); + IndexImpl::updateInputFileSpecificationsAndLog(singleFileSpec, + std::nullopt); + EXPECT_THAT(testing::internal::GetCapturedStdout(), + AllOf(HasSubstr("singleFile.ttl"), HasSubstr("deprecated"))); + EXPECT_TRUE(singleFileSpec.at(0).parseInParallel_); + } + { + twoFilesSpec.at(0).parseInParallelSetExplicitly_ = false; + twoFilesSpec.at(1).parseInParallelSetExplicitly_ = false; + testing::internal::CaptureStdout(); + IndexImpl::updateInputFileSpecificationsAndLog(twoFilesSpec, std::nullopt); EXPECT_THAT( testing::internal::GetCapturedStdout(), - AllOf(HasSubstr("from singleFile.ttl"), Not(HasSubstr("parallel")))); - EXPECT_FALSE(files.at(0).parseInParallel_); + AllOf(HasSubstr("from 2 input streams"), Not(HasSubstr("deprecated")))); + EXPECT_FALSE(twoFilesSpec.at(0).parseInParallel_); + EXPECT_FALSE(twoFilesSpec.at(1).parseInParallel_); } - { + // Parallel parsing specified on the command line and not in the + // `settings.json`. This is normal behavior (no deprecation warning). + for (auto parallelParsing : {true, false}) { + singleFileSpec.at(0).parseInParallel_ = parallelParsing; + singleFileSpec.at(0).parseInParallelSetExplicitly_ = true; testing::internal::CaptureStdout(); - IndexImpl::updateInputFileSpecificationsAndLog(files, true); - EXPECT_THAT(testing::internal::GetCapturedStdout(), - AllOf(HasSubstr("from singleFile.ttl"), - HasSubstr("settings.json"), HasSubstr("deprecated"))); - EXPECT_TRUE(files.at(0).parseInParallel_); + IndexImpl::updateInputFileSpecificationsAndLog(singleFileSpec, + std::nullopt); + EXPECT_THAT( + testing::internal::GetCapturedStdout(), + AllOf(HasSubstr("singleFile.ttl"), Not(HasSubstr("deprecated")))); + EXPECT_EQ(singleFileSpec.at(0).parseInParallel_, parallelParsing); } { + twoFilesSpec.at(0).parseInParallel_ = true; + twoFilesSpec.at(1).parseInParallel_ = false; + twoFilesSpec.at(0).parseInParallelSetExplicitly_ = true; + twoFilesSpec.at(1).parseInParallelSetExplicitly_ = true; testing::internal::CaptureStdout(); - IndexImpl::updateInputFileSpecificationsAndLog(files, std::nullopt); - EXPECT_THAT(testing::internal::GetCapturedStdout(), - AllOf(HasSubstr("from singleFile.ttl"), - HasSubstr("single input"), HasSubstr("deprecated"))); - EXPECT_TRUE(files.at(0).parseInParallel_); + IndexImpl::updateInputFileSpecificationsAndLog(twoFilesSpec, std::nullopt); + EXPECT_THAT( + testing::internal::GetCapturedStdout(), + AllOf(HasSubstr("from 2 input streams"), Not(HasSubstr("deprecated")))); + EXPECT_TRUE(twoFilesSpec.at(0).parseInParallel_); + EXPECT_FALSE(twoFilesSpec.at(1).parseInParallel_); } + // Parallel parsing not specified on the command line, but explicitly set in + // the `settings.json` file. This is deprecated for a single input + // stream and forbidden for multiple input streams. { - files.emplace_back("secondFile.ttl", Turtle, std::nullopt, false); - auto filesCopy = files; + singleFileSpec.at(0).parseInParallelSetExplicitly_ = false; testing::internal::CaptureStdout(); - IndexImpl::updateInputFileSpecificationsAndLog(files, false); + IndexImpl::updateInputFileSpecificationsAndLog(singleFileSpec, true); EXPECT_THAT(testing::internal::GetCapturedStdout(), - AllOf(HasSubstr("from 2 input streams"), - Not(HasSubstr("is deprecated")))); - EXPECT_EQ(files, filesCopy); + AllOf(HasSubstr("singleFile.ttl"), HasSubstr("deprecated"))); + EXPECT_TRUE(singleFileSpec.at(0).parseInParallel_); } - { + twoFilesSpec.at(0).parseInParallelSetExplicitly_ = false; + twoFilesSpec.at(1).parseInParallelSetExplicitly_ = false; AD_EXPECT_THROW_WITH_MESSAGE( - IndexImpl::updateInputFileSpecificationsAndLog(files, true), - HasSubstr("but has to be specified")); + IndexImpl::updateInputFileSpecificationsAndLog(twoFilesSpec, true), + AllOf(Not(HasSubstr("from 2 input streams")), HasSubstr("forbidden"))); } }