Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into updateMetadata
Browse files Browse the repository at this point in the history
  • Loading branch information
Hannah Bast committed Dec 19, 2024
2 parents a4f7f33 + 97c195a commit 2caed8a
Show file tree
Hide file tree
Showing 63 changed files with 1,076 additions and 274 deletions.
3 changes: 2 additions & 1 deletion benchmark/JoinAlgorithmBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1328,7 +1328,8 @@ class GeneralInterfaceImplementation : public BenchmarkInterface {
*/
bool addNewRowToBenchmarkTable(
ResultTable* table,
const ad_utility::SameAsAny<float, size_t> auto changingParameterValue,
const QL_CONCEPT_OR_NOTHING(
ad_utility::SameAsAny<float, size_t>) auto changingParameterValue,
ad_utility::InvocableWithExactReturnType<bool, float, size_t, size_t,
size_t, size_t, float,
float> auto stopFunction,
Expand Down
5 changes: 3 additions & 2 deletions benchmark/infrastructure/Benchmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "../benchmark/infrastructure/BenchmarkMeasurementContainer.h"
#include "../benchmark/infrastructure/BenchmarkMetadata.h"
#include "backports/concepts.h"
#include "util/ConfigManager/ConfigManager.h"
#include "util/CopyableUniquePtr.h"
#include "util/Exception.h"
Expand Down Expand Up @@ -67,8 +68,8 @@ class BenchmarkResults {
@param constructorArgs Arguments to pass to the constructor of the object,
that the new `CopyableUniquePtr` will own.
*/
template <
ad_utility::SameAsAny<ResultTable, ResultEntry, ResultGroup> EntryType>
template <QL_CONCEPT_OR_TYPENAME(
ad_utility::SameAsAny<ResultTable, ResultEntry, ResultGroup>) EntryType>
static EntryType& addEntryToContainerVector(
PointerVector<EntryType>& targetVector, auto&&... constructorArgs) {
targetVector.push_back(ad_utility::make_copyable_unique<EntryType>(
Expand Down
2 changes: 1 addition & 1 deletion benchmark/infrastructure/BenchmarkMeasurementContainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ std::ostream& operator<<(std::ostream& os, const ResultGroup& resultGroup) {
}

// ____________________________________________________________________________
template <ad_utility::SameAsAny<ResultEntry, ResultTable> T>
template <typename T>
void ResultGroup::deleteEntryImpl(T& entry) {
// The vector, that holds our entries.
auto& vec = [this]() -> auto& {
Expand Down
2 changes: 1 addition & 1 deletion benchmark/infrastructure/BenchmarkMeasurementContainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ class ResultGroup : public BenchmarkMetadataGetter {

private:
// The implementation for the general deletion of entries.
template <ad_utility::SameAsAny<ResultEntry, ResultTable> T>
template <typename T>
void deleteEntryImpl(T& entry);
};

Expand Down
33 changes: 24 additions & 9 deletions e2e/scientists_queries.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,31 +55,43 @@ queries:
?t ql:contains-word "RElaT* phySIKalische rela*"
}
checks:
- num_cols: 5
- selected: [ "?x", "?ql_score_t_var_x", "?t", "?ql_matchingword_t_relat", "?ql_matchingword_t_rela" ]
- num_cols: 8
- selected: [ "?x", "?ql_score_t_var_x", "?t", "?ql_score_prefix_t_RElaT", "?ql_matchingword_t_relat", "?ql_score_word_t_phySIKalische", "?ql_score_prefix_t_rela", "?ql_matchingword_t_rela" ]
- contains_row:
- "<Albert_Einstein>"
- null
- null
- null
- "relationship"
- null
- null
- "relationship"
- contains_row:
- "<Albert_Einstein>"
- null
- null
- null
- "relationship"
- null
- null
- "relativity"
- contains_row:
- "<Albert_Einstein>"
- null
- null
- null
- "relativity"
- null
- null
- "relationship"
- contains_row:
- "<Albert_Einstein>"
- null
- null
- null
- "relativity"
- null
- null
- "relativity"

- query: algo-star-female-scientists
Expand Down Expand Up @@ -151,7 +163,7 @@ queries:
}
TEXTLIMIT 2
checks:
- num_cols: 7
- num_cols: 9
- num_rows: 18

- query: algor-star-female-born-before-1940
Expand Down Expand Up @@ -192,7 +204,7 @@ queries:
}
ORDER BY DESC(?ql_score_text_fixedEntity__60_Ada_95_Lovelace_62_)
checks:
- num_cols: 5
- num_cols: 6
- num_rows: 7
- contains_row:
- "<Ada_Lovelace>"
Expand All @@ -202,6 +214,7 @@ queries:
Charles Babbage, also known as' the father of computers', and in
particular, Babbage's work on the Analytical Engine."
- null
- null
- "relationship"
- order_numeric: {"dir": "DESC",
"var" : "?ql_score_text_fixedEntity__60_Ada_95_Lovelace_62_"}
Expand All @@ -219,7 +232,7 @@ queries:
ORDER BY DESC(?ql_score_text_fixedEntity__60_Ada_95_Lovelace_62_)
TEXTLIMIT 2
checks:
- num_cols: 5
- num_cols: 6
- num_rows: 3
- contains_row:
- "<Ada_Lovelace>"
Expand All @@ -229,6 +242,7 @@ queries:
Charles Babbage, also known as' the father of computers', and in
particular, Babbage's work on the Analytical Engine."
- null
- null
- "relationship"
- order_numeric: {"dir": "DESC",
"var" : "?ql_score_text_fixedEntity__60_Ada_95_Lovelace_62_"}
Expand All @@ -246,7 +260,7 @@ queries:
}
TEXTLIMIT 1
checks:
- num_cols: 6
- num_cols: 7
- num_rows: 2
- contains_row:
- "<Ada_Lovelace>"
Expand All @@ -255,6 +269,7 @@ queries:
with Somerville to visit Babbage as often as she could."
- null
- null
- null
- "relationship"


Expand Down Expand Up @@ -1391,10 +1406,10 @@ queries:
?t ql:contains-word "algo* herm* primary"
}
checks:
- num_cols: 5
- num_cols: 8
- num_rows: 1
- selected: [ "?x", "?ql_score_t_var_x", "?t", "?ql_matchingword_t_algo", "?ql_matchingword_t_herm" ]
- contains_row: [ "<Grete_Hermann>",null,"Hermann's algorithm for primary decomposition is still in use now.","algorithm","hermann" ]
- selected: [ "?x", "?ql_score_t_var_x", "?t", "?ql_score_prefix_t_algo", "?ql_matchingword_t_algo", "?ql_score_prefix_t_herm", "?ql_matchingword_t_herm", "?ql_score_word_t_primary" ]
- contains_row: [ "<Grete_Hermann>",null,"Hermann's algorithm for primary decomposition is still in use now.",null,"algorithm",null,"hermann",null ]


- query : select_asterisk_regex-lastname-stein
Expand Down
13 changes: 2 additions & 11 deletions src/backports/algorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#include <range/v3/all.hpp>
#include <utility>

#include "backports/concepts.h"

// The following defines namespaces `ql::ranges` and `ql::views` that are almost
// drop-in replacements for `std::ranges` and `std::views`. In C++20 mode (when
// the `QLEVER_CPP_17` macro is not used), these namespaces are simply aliases
Expand All @@ -19,7 +21,6 @@
// currently not aware of, because they only affect functions that we currently
// don't use. For those, the following header can be expanded in the future.
#ifndef QLEVER_CPP_17
#include <concepts>
#include <ranges>
#endif

Expand All @@ -46,14 +47,4 @@ using namespace std::views;
#endif
} // namespace views

// The namespace `ql::concepts` includes concepts that are contained in the
// C++20 standard as well as in `range-v3`.
namespace concepts {
#ifdef QLEVER_CPP_17
using namespace ::concepts;
#else
using namespace std;
#endif
} // namespace concepts

} // namespace ql
58 changes: 48 additions & 10 deletions src/backports/concepts.h
Original file line number Diff line number Diff line change
@@ -1,17 +1,55 @@
// Copyright 2024, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach <[email protected]>
// Copyright 2024, University of Freiburg
// Chair of Algorithms and Data Structures
// Author: Johannes Kalmbach <[email protected]>

#pragma once

#include <concepts/concepts.hpp>
#ifndef QLEVER_CPP_17
#include <concepts>
#endif

// Define the following macros:
// `QL_OPT_CONCEPT(arg)` which expands to `arg` in C++20 mode, and to nothing in
// C++17 mode. It can be used to easily opt out of concepts that are only used
// for documentation and increased safety and not for overload resolution.
// Example usage:
// `(QL_OPT_CONCEPT(std::view) auto x = someFunction();`
//
// `QL_CONCEPT_OR_NOTHING(arg)`: expands to `arg` in C++20 mode, and to
// nothing in C++17 mode. It can be used to easily opt out of concepts that are
// only used for documentation and increased safety and not for overload
// resolution.
//
// `QL_CONCEPT_OR_TYPENAME(arg)`: expands to `arg` in C++20 mode, and to
// `typename` in C++17 mode. Example usage:
//
// Example usages:
//
// `QL_CONCEPT_OR_NOTHING(std::view) auto x = someFunction();`
//
// `QL_CONCEPT_OR_NOTHING(SameAsAny<int, float>)`
//
// `void f(QL_CONCEPT_OR_NOTHING(std::view) auto x) {...}`
//
// `template <QL_CONCEPT_OR_TYPENAME(ql::same_as<int>) T> void f(){...}`
//
// NOTE: The macros are variadic to allow for commas in the argument, like in
// the second example above.

#ifdef QLEVER_CPP_17
#define QL_OPT_CONCEPT(arg)
#define QL_CONCEPT_OR_NOTHING(...)
#define QL_CONCEPT_OR_TYPENAME(...) typename
#else
#define QL_OPT_CONCEPT(arg) arg
#define QL_CONCEPT_OR_NOTHING(...) __VA_ARGS__
#define QL_CONCEPT_OR_TYPENAME(...) __VA_ARGS__
#endif

// The namespace `ql::concepts` includes concepts that are contained in the
// C++20 standard as well as in `range-v3`.
namespace ql {
namespace concepts {

#ifdef QLEVER_CPP_17
using namespace ::concepts;
#else
using namespace std;
#endif

} // namespace concepts
} // namespace ql
9 changes: 7 additions & 2 deletions src/engine/AddCombinedRowToTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,13 @@ class AddCombinedRowToIdTable {
if (nextIndex_ != 0) {
AD_CORRECTNESS_CHECK(inputLeftAndRight_.has_value());
flush();
} else {
// Clear vocab when no rows were written.
} else if (resultTable_.empty()) {
// Clear local vocab when no rows were written.
//
// TODO<joka921, robinTF> This is a conservative approach. We could
// optimize this case (clear the local vocab more often, but still
// correctly) by considering the situation after all the relevant inputs
// have been processed.
mergedVocab_ = LocalVocab{};
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
add_subdirectory(sparqlExpressions)
add_library(SortPerformanceEstimator SortPerformanceEstimator.cpp)
qlever_target_link_libraries(SortPerformanceEstimator)
qlever_target_link_libraries(SortPerformanceEstimator parser)
add_library(engine
Engine.cpp QueryExecutionTree.cpp Operation.cpp Result.cpp LocalVocab.cpp
IndexScan.cpp Join.cpp Sort.cpp
Expand Down
2 changes: 1 addition & 1 deletion src/engine/Distinct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ VariableToColumnMap Distinct::computeVariableToColumnMap() const {

// _____________________________________________________________________________
template <size_t WIDTH>
Result::Generator Distinct::lazyDistinct(Result::Generator input,
Result::Generator Distinct::lazyDistinct(Result::LazyResult input,
bool yieldOnce) const {
IdTable aggregateTable{subtree_->getResultWidth(), allocator()};
LocalVocab aggregateVocab{};
Expand Down
3 changes: 2 additions & 1 deletion src/engine/Distinct.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ class Distinct : public Operation {
// if every `IdTable` from `input` should yield it's own `IdTable` or if all
// of them should get aggregated into a single big `IdTable`.
template <size_t WIDTH>
Result::Generator lazyDistinct(Result::Generator input, bool yieldOnce) const;
Result::Generator lazyDistinct(Result::LazyResult input,
bool yieldOnce) const;

// Removes all duplicates from input with regards to the columns
// in keepIndices. The input needs to be sorted on the keep indices,
Expand Down
2 changes: 0 additions & 2 deletions src/engine/GroupBy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,6 @@ uint64_t GroupBy::getSizeEstimateBeforeLimit() {
return _subtree->getMultiplicity(_subtree->getVariableColumn(var));
};

// TODO<joka921> Once we can use `std::views` this can be solved
// more elegantly.
float minMultiplicity = ql::ranges::min(
_groupByVariables | ql::views::transform(varToMultiplicity));
return _subtree->getSizeEstimate() / minMultiplicity;
Expand Down
6 changes: 3 additions & 3 deletions src/engine/IndexScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -489,13 +489,13 @@ void IndexScan::updateRuntimeInfoForLazyScan(const LazyScanMetadata& metadata) {
// resulting from the generator.
struct IndexScan::SharedGeneratorState {
// The generator that yields the tables to be joined with the index scan.
Result::Generator generator_;
Result::LazyResult generator_;
// The column index of the join column in the tables yielded by the generator.
ColumnIndex joinColumn_;
// Metadata and blocks of this index scan.
Permutation::MetadataAndBlocks metaBlocks_;
// The iterator of the generator that is currently being consumed.
std::optional<Result::Generator::iterator> iterator_ = std::nullopt;
std::optional<Result::LazyResult::iterator> iterator_ = std::nullopt;
// Values returned by the generator that have not been re-yielded yet.
// Typically we expect only 3 or less values to be prefetched (this is an
// implementation detail of `BlockZipperJoinImpl`).
Expand Down Expand Up @@ -648,7 +648,7 @@ Result::Generator IndexScan::createPrefilteredIndexScanSide(

// _____________________________________________________________________________
std::pair<Result::Generator, Result::Generator> IndexScan::prefilterTables(
Result::Generator input, ColumnIndex joinColumn) {
Result::LazyResult input, ColumnIndex joinColumn) {
AD_CORRECTNESS_CHECK(numVariables_ <= 3 && numVariables_ > 0);
auto metaBlocks = getMetadataForScan();

Expand Down
2 changes: 1 addition & 1 deletion src/engine/IndexScan.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ class IndexScan final : public Operation {
// there are undef values, the second generator represents the full index
// scan.
std::pair<Result::Generator, Result::Generator> prefilterTables(
Result::Generator input, ColumnIndex joinColumn);
Result::LazyResult input, ColumnIndex joinColumn);

private:
// Implementation detail that allows to consume a generator from two other
Expand Down
6 changes: 4 additions & 2 deletions src/engine/Join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,10 @@ using LazyInputView =
// Convert a `generator<IdTableVocab>` to a `generator<IdTableAndFirstCol>` for
// more efficient access in the join columns below and apply the given
// permutation to each table.
LazyInputView convertGenerator(Result::Generator gen,
OptionalPermutation permutation = {}) {
CPP_template(typename Input)(
requires ad_utility::SameAsAny<Input, Result::Generator,
Result::LazyResult>) LazyInputView
convertGenerator(Input gen, OptionalPermutation permutation = {}) {
for (auto& [table, localVocab] : gen) {
applyPermutation(table, permutation);
// Make sure to actually move the table into the wrapper so that the tables
Expand Down
4 changes: 2 additions & 2 deletions src/engine/QueryPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1002,14 +1002,14 @@ QueryPlanner::SubtreePlan QueryPlanner::getTextLeafPlan(
: *(node._variables.begin());
plan = makeSubtreePlan<TextIndexScanForEntity>(_qec, cvar, evar, word);
textLimits[cvar].entityVars_.push_back(evar);
textLimits[cvar].scoreVars_.push_back(cvar.getScoreVariable(evar));
textLimits[cvar].scoreVars_.push_back(cvar.getEntityScoreVariable(evar));
} else {
// Fixed entity case
AD_CORRECTNESS_CHECK(node._variables.size() == 1);
plan = makeSubtreePlan<TextIndexScanForEntity>(
_qec, cvar, node.triple_.o_.toString(), word);
textLimits[cvar].scoreVars_.push_back(
cvar.getScoreVariable(node.triple_.o_.toString()));
cvar.getEntityScoreVariable(node.triple_.o_.toString()));
}
} else {
plan = makeSubtreePlan<TextIndexScanForWord>(_qec, cvar, word);
Expand Down
Loading

0 comments on commit 2caed8a

Please sign in to comment.