Skip to content

Commit

Permalink
Fix the coverage issue and hopefully also the MacOS build.
Browse files Browse the repository at this point in the history
Signed-off-by: Johannes Kalmbach <[email protected]>
  • Loading branch information
joka921 committed Nov 20, 2024
1 parent 69eb0b3 commit 2f515e9
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 60 deletions.
86 changes: 61 additions & 25 deletions src/engine/Describe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "engine/Join.h"
#include "engine/Values.h"

// _____________________________________________________________________________
Describe::Describe(QueryExecutionContext* qec,
std::shared_ptr<QueryExecutionTree> subtree,
parsedQuery::Describe describe)
Expand All @@ -18,11 +19,14 @@ Describe::Describe(QueryExecutionContext* qec,
AD_CORRECTNESS_CHECK(subtree_ != nullptr);
}

// _____________________________________________________________________________
std::vector<QueryExecutionTree*> Describe::getChildren() {
return {subtree_.get()};
}

// _____________________________________________________________________________
string Describe::getCacheKeyImpl() const {
// The cache key must repesent the `resources_` as well as the `subtree_`.

Check failure on line 29 in src/engine/Describe.cpp

View workflow job for this annotation

GitHub Actions / Check for spelling errors

repesent ==> represent
std::string resourceKey;
for (const auto& resource : describe_.resources_) {
if (std::holds_alternative<TripleComponent::Iri>(resource)) {
Expand All @@ -39,13 +43,14 @@ string Describe::getCacheKeyImpl() const {
return absl::StrCat("DESCRIBE ", subtree_->getCacheKey(), resourceKey);
}

// _____________________________________________________________________________
string Describe::getDescriptor() const { return "DESCRIBE"; }

// _____________________________________________________________________________
size_t Describe::getResultWidth() const { return 3; }

// As DESCRIBE is never part of the query planning, we can return dummy values
// for the following functions.

// As DESCRIBE is never part of the query planning (it is always the root
// operation), we can return dummy values for the following functions.
size_t Describe::getCostEstimate() { return 2 * subtree_->getCostEstimate(); }

uint64_t Describe::getSizeEstimateBeforeLimit() {
Expand All @@ -56,8 +61,14 @@ float Describe::getMultiplicity([[maybe_unused]] size_t col) { return 1.0f; }

bool Describe::knownEmptyResult() { return false; }

// The result cannot easily be sorted, as it involves recursive expanding of
// graphs.
vector<ColumnIndex> Describe::resultSortedOn() const { return {}; }

// The result always consists of three hardcoded variables `?subject`,
// `?predicate`, `?object`. Note: The variable names must be in sync with the
// implicit CONSTRUCT query created by the parser (see
// `SparqlQleverVisitor::visitDescribe`) for details.
VariableToColumnMap Describe::computeVariableToColumnMap() const {
using V = Variable;
auto col = makeAlwaysDefinedColumn;
Expand All @@ -66,6 +77,12 @@ VariableToColumnMap Describe::computeVariableToColumnMap() const {
{V("?object"), col(2)}};
}

// A helper function for the recursive BFS. Return the subset of `input` (as an
// `IdTable` with one column) that fulfills the following properties:
// 1. The ID is a blank node
// 2. The ID is not part of `alreadySeen`.
// The returned IDs are then also added to `alreadySeen`. The result contains no
// duplicates.
static IdTable getNewBlankNodes(
const auto& allocator, ad_utility::HashSetWithMemoryLimit<Id>& alreadySeen,
std::span<Id> input) {
Expand All @@ -88,41 +105,50 @@ static IdTable getNewBlankNodes(
return result;
}

// TODO<joka921> Comment.
// _____________________________________________________________________________
void Describe::recursivelyAddBlankNodes(
IdTable& finalResult, ad_utility::HashSetWithMemoryLimit<Id>& alreadySeen,
IdTable blankNodes) {
AD_CORRECTNESS_CHECK(blankNodes.numColumns() == 1);

// Stop condition for the recursion, no new start nodes found.
if (blankNodes.empty()) {
return;
}

// Set up a join between the `blankNodes` and the full index.
using V = Variable;
SparqlTripleSimple triple{V{"?subject"}, V{"?predicate"}, V{"?object"}};
auto subjectVar = V{"?subject"};
SparqlTripleSimple triple{subjectVar, V{"?predicate"}, V{"?object"}};
auto indexScan = ad_utility::makeExecutionTree<IndexScan>(
getExecutionContext(), Permutation::SPO, triple);
auto valuesOp = ad_utility::makeExecutionTree<ValuesForTesting>(
getExecutionContext(), std::move(blankNodes),
std::vector<std::optional<Variable>>{V{"?subject"}});
std::vector<std::optional<Variable>>{subjectVar});

auto joinColValues = valuesOp->getVariableColumn(subjectVar);
auto joinColScan = indexScan->getVariableColumn(subjectVar);

// TODO<joka921> It might be, that the Column index is definitely not 0, we
// have to store this separately.
auto join = ad_utility::makeExecutionTree<Join>(
getExecutionContext(), valuesOp, std::move(indexScan), 0, 0);
getExecutionContext(), std::move(valuesOp), std::move(indexScan),
joinColValues, joinColScan);

auto result = join->getResult();
// TODO<joka921> A lot of those things are inefficient, lets get it working
// first.
// TODO<joka921, RobinTF> As soon as the join is lazy, we can compute the
// result lazy, and therefore avoid the copy via `clone` of the IdTable.
auto table = result->idTable().clone();
finalResult.reserve(finalResult.size() + table.size());
auto s = join->getVariableColumn(V{"?subject"});
auto p = join->getVariableColumn(V{"?predicate"});
auto o = join->getVariableColumn(V{"?object"});
CI s = join->getVariableColumn(V{"?subject"});
CI p = join->getVariableColumn(V{"?predicate"});
CI o = join->getVariableColumn(V{"?object"});
table.setColumnSubset(std::vector{s, p, o});
// TODO<joka921> Make the result of DESCRIBE lazy, then we avoid the
// additional Copying here.
finalResult.insertAtEnd(table);

// Compute the set of newly found blank nodes and recurse.
// Note: The stop condition is at the beginning of the recursive call.
auto newBlankNodes =
getNewBlankNodes(allocator(), alreadySeen, table.getColumn(2));
// recurse
recursivelyAddBlankNodes(finalResult, alreadySeen, std::move(newBlankNodes));
}

Expand All @@ -133,39 +159,49 @@ ProtoResult Describe::computeResult([[maybe_unused]] bool requestLaziness) {
if (std::holds_alternative<TripleComponent::Iri>(resource)) {
valuesToDescribe.push_back(std::get<TripleComponent::Iri>(resource));
} else {
// TODO<joka921> Implement this, it should be fairly simple.
throw std::runtime_error("DESCRIBE with a variable is not yet supported");
}
}
parsedQuery::SparqlValues values;
using V = Variable;
values._variables.push_back(V{"?subject"});
Variable subjectVar = V{"?subject"};
values._variables.push_back(subjectVar);

// Set up and execute a JOIN between the described IRIs and the full index.
// TODO<joka921> There is a lot of code duplication in the following block
// with the recursive BFS on the blank nodes, factor that out.
for (const auto& v : valuesToDescribe) {
values._values.push_back(std::vector{v});
}
SparqlTripleSimple triple{V{"?subject"}, V{"?predicate"}, V{"?object"}};
SparqlTripleSimple triple{subjectVar, V{"?predicate"}, V{"?object"}};
auto indexScan = ad_utility::makeExecutionTree<IndexScan>(
getExecutionContext(), Permutation::SPO, triple);
auto valuesOp =
ad_utility::makeExecutionTree<Values>(getExecutionContext(), values);
auto joinColValues = valuesOp->getVariableColumn(subjectVar);
auto joinColScan = indexScan->getVariableColumn(subjectVar);

// TODO<joka921> It might be, that the Column index is definitely not 0, we
// have to store this separately.
// TODO<joka921> We have to (here, as well as in the recursion) also respect
// the GRAPHS by which the result is filtered ( + add unit tests for that
// case).
auto join = ad_utility::makeExecutionTree<Join>(
getExecutionContext(), valuesOp, std::move(indexScan), 0, 0);
getExecutionContext(), std::move(valuesOp), std::move(indexScan),
joinColValues, joinColScan);

// TODO<joka921> The following code (which extracts the required columns and
// writes the initial triples) is also duplicated, factor it out.
auto result = join->getResult();
IdTable resultTable = result->idTable().clone();

auto s = join->getVariableColumn(V{"?subject"});
auto p = join->getVariableColumn(V{"?predicate"});
auto o = join->getVariableColumn(V{"?object"});
using CI = ColumnIndex;
CI s = join->getVariableColumn(V{"?subject"});
CI p = join->getVariableColumn(V{"?predicate"});
CI o = join->getVariableColumn(V{"?object"});

resultTable.setColumnSubset(std::vector{s, p, o});

// Recursively follow blank nodes
// Recursively follow all blank nodes.
ad_utility::HashSetWithMemoryLimit<Id> alreadySeen{allocator()};
auto blankNodes =
getNewBlankNodes(allocator(), alreadySeen, resultTable.getColumn(2));
Expand Down
25 changes: 20 additions & 5 deletions src/engine/Describe.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,35 @@
#include "engine/Operation.h"
#include "parser/GraphPatternOperation.h"

// An `Operation` which implements SPARQL DESCRIBE queries according to the
// Concise Bounded Description (CBD, see
// https://www.w3.org/submissions/2005/SUBM-CBD-20050603/)
class Describe : public Operation {
private:
// The subtree that computes the WHERE clause of the DESCRIBE query.
std::shared_ptr<QueryExecutionTree> subtree_;

// The specification of the DESCRIBE clause.
parsedQuery::Describe describe_;

public:
// Constructor. For details see the documentation of the member variables
// above.
Describe(QueryExecutionContext* qec,
std::shared_ptr<QueryExecutionTree> subtree,
parsedQuery::Describe describe);

// Getter for testing.
const auto& getDescribe() const { return describe_; }

// The following functions are all overridden from the base class `Operation`,
// see there for documentation.
std::vector<QueryExecutionTree*> getChildren() override;

string getCacheKeyImpl() const override;

public:
string getDescriptor() const override;
size_t getResultWidth() const override;

size_t getCostEstimate() override;

private:
Expand All @@ -40,11 +48,18 @@ class Describe : public Operation {
private:
[[nodiscard]] vector<ColumnIndex> resultSortedOn() const override;
ProtoResult computeResult(bool requestLaziness) override;
// Compute the variable to column index mapping. Is used internally by
// `getInternallyVisibleVariableColumns`.
VariableToColumnMap computeVariableToColumnMap() const override;

// TODO<joka921> Comment.
// Add all triples, where the subject is one of the `blankNodes` to the
// `finalResult`. `blankNodes` is a table with one column. Recursively
// continue for all newly found blank nodes (objects of the newly found
// triples, which are not contained in `alreadySeen`). This is a recursive
// implementation of breadth-first-search (BFS) where `blankNodes` is the set
// of start nodes, and `alreadySeen` is the set of nodes which have already
// been explored, which is needed to handle cycles in the graph. The explored
// graph is `all triples currently stored by QLever`.
// TODO<joka921> We have to extend this by the information of the allowed
// graphs.
void recursivelyAddBlankNodes(
IdTable& finalResult, ad_utility::HashSetWithMemoryLimit<Id>& alreadySeen,
IdTable blankNodes);
Expand Down
2 changes: 1 addition & 1 deletion src/parser/DatasetClauses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Chair of Algorithms and Data Structures
// Author: Johannes Kalmbach <[email protected]>

#include "DatasetClauses.h"
#include "parser/DatasetClauses.h"

// _____________________________________________________________________________
parsedQuery::DatasetClauses parsedQuery::DatasetClauses::fromClauses(
Expand Down
7 changes: 6 additions & 1 deletion src/parser/GraphPatternOperation.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,16 @@ class Subquery {
const ParsedQuery& get() const;
};

// A SPARQL `DESCRIBE` construct.
// A SPARQL `DESCRIBE` query.
struct Describe {
using VarOrIri = std::variant<TripleComponent::Iri, Variable>;
// The resources (variables or IRIs) that are to be described, for example
// `?x` and `<y>` in `DESCRIBE ?x <y>`.
std::vector<VarOrIri> resources_;
// The FROM clauses of the DESCRIBE query
DatasetClauses datasetClauses_;
// The WHERE clause of the describe query. It is used to compute the values
// for variables that are to be described.
Subquery whereClause_;
};

Expand Down
64 changes: 45 additions & 19 deletions src/parser/sparqlParser/SparqlQleverVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,45 +279,71 @@ ParsedQuery Visitor::visit(Parser::ConstructQueryContext* ctx) {

// ____________________________________________________________________________________
ParsedQuery Visitor::visit(Parser::DescribeQueryContext* ctx) {
auto clause = parsedQuery::Describe{};
auto specs = visitVector(ctx->varOrIri());
if (specs.empty()) {
reportNotSupported(ctx, "DESCRIBE * is");
}
auto describeClause = parsedQuery::Describe{};
auto describedResources = visitVector(ctx->varOrIri());

std::vector<Variable> describedVariables;
for (GraphTerm& spec : specs) {
if (std::holds_alternative<Variable>(spec)) {
const auto& variable = std::get<Variable>(spec);
clause.resources_.push_back(variable);
describedVariables.push_back(variable);
} else if (std::holds_alternative<Iri>(spec)) {
auto iri =
TripleComponent::Iri::fromIriref(std::get<Iri>(spec).toSparql());
clause.resources_.push_back(std::move(iri));
}
// Convert the describe resources (variables or IRIs) from the format that the
// parser delivers to the one that the `Describe` struct expects.

auto addDescribedVariable = [&describeClause,
&describedVariables](const Variable& variable) {
describeClause.resources_.push_back(variable);
describedVariables.push_back(variable);
};
auto addDescribedIri = [&describeClause](const Iri& iri) {
auto iriTc =
TripleComponent::Iri::fromIriref(std::get<Iri>(resource).toSparql());
describeClause.resources_.push_back(std::move(iriTc));
};

for (GraphTerm& resource : describedResources) {
std::visit(ad_utility::OverloadCallOperator{addDescribedVariable, addDescribedIri},
resource);
}

// Parse the FROM (NAMED) clauses and store them in the `describeClause`.
auto datasetClauses = parsedQuery::DatasetClauses::fromClauses(
visitVector(ctx->datasetClause()));
clause.datasetClauses_ = datasetClauses;
describeClause.datasetClauses_ = datasetClauses;

// Parse the WHERE clause.
// TODO<joka921> The following for lines are duplicated for all the different
// types of queries. add a `visitWhereClause` function.
if (ctx->whereClause()) {
auto [pattern, visibleVariables] = visit(ctx->whereClause());
parsedQuery_._rootGraphPattern = std::move(pattern);
parsedQuery_.registerVariablesVisibleInQueryBody(visibleVariables);
}

parsedQuery_.addSolutionModifiers(visit(ctx->solutionModifier()));
// HANDLE `DESCRIBE *`
if (describedResources.empty()) {
std::ranges::for_each(parsedQuery_.selectClause().getVisibleVariables(),
addDescribedVariable);
}

auto& selectClause = parsedQuery_.selectClause();
selectClause.setSelected(std::move(describedVariables));
clause.whereClause_ = std::move(parsedQuery_);

// So far we have actually computed the subquery/WHERE clause of the DESCRIBE.
// We now store it inside the `describeClause` and setup the outer query,
// which is implemented as a CONSTRUCT query with a special DESCRIBE
// operation.
describeClause.whereClause_ = std::move(parsedQuery_);

parsedQuery_ = ParsedQuery{};
parsedQuery_._rootGraphPattern._graphPatterns.push_back(std::move(clause));
// The solution modifiers (in particular ORDER BY) have to be part of the
// outer query.
parsedQuery_.addSolutionModifiers(visit(ctx->solutionModifier()));

parsedQuery_._rootGraphPattern._graphPatterns.push_back(
std::move(describeClause));
parsedQuery_.datasetClauses_ = datasetClauses;
auto constructClause = ParsedQuery::ConstructClause{};
using G = GraphTerm;
using V = Variable;
// The outer query has the form `CONSTRUCT { ?subject ?predicate ?object}
// {...}`
constructClause.triples_.push_back(
std::array{G(V("?subject")), G(V("?predicate")), G(V("?object"))});
parsedQuery_._clause = std::move(constructClause);
Expand Down
6 changes: 2 additions & 4 deletions test/QueryPlannerTestHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -389,19 +389,17 @@ constexpr auto Union = MatchTypeAndOrderedChildren<::Union>;

// Match a `DESCRIBE` operation
inline QetMatcher Describe(
const Matcher<const parsedQuery::Describe&> describeMatcher,
const Matcher<const parsedQuery::Describe&>& describeMatcher,
const QetMatcher& childMatcher) {
return RootOperation<::Describe>(
AllOf(children(childMatcher),
AD_PROPERTY(::Describe, getDescribe, describeMatcher)));
}

// Match a `DISTINCT` operation

//
inline QetMatcher QetWithWarnings(
const std::vector<std::string>& warningSubstrings,
QetMatcher actualMatcher) {
const QetMatcher& actualMatcher) {
auto warningMatchers = ad_utility::transform(
warningSubstrings,
[](const std::string& s) { return ::testing::HasSubstr(s); });
Expand Down
Loading

0 comments on commit 2f515e9

Please sign in to comment.