Skip to content

Commit

Permalink
Implement ASK queries (#1562)
Browse files Browse the repository at this point in the history
  • Loading branch information
joka921 authored Oct 24, 2024
1 parent f856919 commit 2ccfb50
Show file tree
Hide file tree
Showing 8 changed files with 330 additions and 31 deletions.
121 changes: 101 additions & 20 deletions src/engine/ExportQueryExecutionTrees.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Copyright 2022, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach <[email protected]>
// Copyright 2022 - 2024, University of Freiburg
// Chair of Algorithms and Data Structures
// Author: Johannes Kalmbach <[email protected]>

#include "ExportQueryExecutionTrees.h"

Expand All @@ -12,6 +12,63 @@
#include "util/ConstexprUtils.h"
#include "util/http/MediaTypes.h"

// Return true iff the `result` is nonempty.
bool getResultForAsk(const std::shared_ptr<const Result>& result) {
if (result->isFullyMaterialized()) {
return !result->idTable().empty();
} else {
return std::ranges::any_of(result->idTables(), [](const auto& pair) {
return !pair.idTable_.empty();
});
}
}

// _____________________________________________________________________________
ad_utility::streams::stream_generator computeResultForAsk(
[[maybe_unused]] const ParsedQuery& parsedQuery,
const QueryExecutionTree& qet, ad_utility::MediaType mediaType,
[[maybe_unused]] const ad_utility::Timer& requestTimer) {
// Compute the result of the ASK query.
bool result = getResultForAsk(qet.getResult(true));

// Lambda that returns the result bool in XML format.
auto getXmlResult = [result]() {
std::string xmlTemplate = R"(<?xml version="1.0"?>
<sparql xmlns="http://www.w3.org/2005/sparql-results#">
<head/>
<boolean>true</boolean>
</sparql>)";

if (result) {
return xmlTemplate;
} else {
return absl::StrReplaceAll(xmlTemplate, {{"true", "false"}});
}
};

// Lambda that returns the result bool in SPARQL JSON format.
auto getSparqlJsonResult = [result]() {
nlohmann::json j;
j["head"] = nlohmann::json::object_t{};
j["boolean"] = result;
return j.dump();
};

// Return the result in the requested format.
using enum ad_utility::MediaType;
switch (mediaType) {
case sparqlXml:
co_yield getXmlResult();
break;
case sparqlJson:
co_yield getSparqlJsonResult();
break;
default:
throw std::runtime_error{
"ASK queries are not supported for TSV or CSV or binary format."};
}
}

// __________________________________________________________________________
cppcoro::generator<ExportQueryExecutionTrees::TableConstRefWithVocab>
ExportQueryExecutionTrees::getIdTables(const Result& result) {
Expand Down Expand Up @@ -366,6 +423,17 @@ static nlohmann::json stringAndTypeToBinding(std::string_view entitystr,
return b;
}

// _____________________________________________________________________________
cppcoro::generator<std::string> askQueryResultToQLeverJSON(
std::shared_ptr<const Result> result) {
AD_CORRECTNESS_CHECK(result != nullptr);
std::string_view value = getResultForAsk(result) ? "true" : "false";
std::string resultLit =
absl::StrCat("\"", value, "\"^^<", XSD_BOOLEAN_TYPE, ">");
nlohmann::json resultJson = std::vector{std::move(resultLit)};
co_yield resultJson.dump();
}

// _____________________________________________________________________________
cppcoro::generator<std::string>
ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON(
Expand Down Expand Up @@ -732,15 +800,19 @@ cppcoro::generator<std::string> ExportQueryExecutionTrees::computeResult(
if constexpr (format == MediaType::qleverJson) {
return computeResultAsQLeverJSON(parsedQuery, qet, requestTimer,
std::move(cancellationHandle));
} else {
if (parsedQuery.hasAskClause()) {
return computeResultForAsk(parsedQuery, qet, mediaType, requestTimer);
}
return parsedQuery.hasSelectClause()
? selectQueryResultToStream<format>(
qet, parsedQuery.selectClause(),
parsedQuery._limitOffset, std::move(cancellationHandle))
: constructQueryResultToStream<format>(
qet, parsedQuery.constructClause().triples_,
parsedQuery._limitOffset, qet.getResult(true),
std::move(cancellationHandle));
}
return parsedQuery.hasSelectClause()
? selectQueryResultToStream<format>(
qet, parsedQuery.selectClause(), parsedQuery._limitOffset,
std::move(cancellationHandle))
: constructQueryResultToStream<format>(
qet, parsedQuery.constructClause().triples_,
parsedQuery._limitOffset, qet.getResult(true),
std::move(cancellationHandle));
};

using enum MediaType;
Expand Down Expand Up @@ -773,23 +845,32 @@ ExportQueryExecutionTrees::computeResultAsQLeverJSON(
if (query.hasSelectClause()) {
jsonPrefix["selected"] =
query.selectClause().getSelectedVariablesAsStrings();
} else {
} else if (query.hasConstructClause()) {
jsonPrefix["selected"] =
std::vector<std::string>{"?subject", "?predicate", "?object"};
} else {
AD_CORRECTNESS_CHECK(query.hasAskClause());
jsonPrefix["selected"] = std::vector<std::string>{"?result"};
}

std::string prefixStr = jsonPrefix.dump();
co_yield absl::StrCat(prefixStr.substr(0, prefixStr.size() - 1),
R"(,"res":[)");

auto bindings =
query.hasSelectClause()
? selectQueryResultBindingsToQLeverJSON(
qet, query.selectClause(), query._limitOffset,
std::move(result), std::move(cancellationHandle))
: constructQueryResultBindingsToQLeverJSON(
qet, query.constructClause().triples_, query._limitOffset,
std::move(result), std::move(cancellationHandle));
auto bindings = [&]() {
if (query.hasSelectClause()) {
return selectQueryResultBindingsToQLeverJSON(
qet, query.selectClause(), query._limitOffset, std::move(result),
std::move(cancellationHandle));
} else if (query.hasConstructClause()) {
return constructQueryResultBindingsToQLeverJSON(
qet, query.constructClause().triples_, query._limitOffset,
std::move(result), std::move(cancellationHandle));
} else {
// TODO<joka921>: Refactor this to use std::visit.
return askQueryResultToQLeverJSON(std::move(result));
}
}();

size_t resultSize = 0;
for (const std::string& b : bindings) {
Expand Down
6 changes: 4 additions & 2 deletions src/parser/ParsedQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,7 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) {
// expressions
selectClause.deleteAliasesButKeepVariables();
}
} else {
AD_CORRECTNESS_CHECK(hasConstructClause());
} else if (hasConstructClause()) {
if (_groupByVariables.empty()) {
return;
}
Expand All @@ -232,6 +231,9 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) {
noteForGroupByError);
}
}
} else {
// TODO<joka921> refactor this to use `std::visit`. It is much safer.
AD_CORRECTNESS_CHECK(hasAskClause());
}
}

Expand Down
17 changes: 13 additions & 4 deletions src/parser/ParsedQuery.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ class ParsedQuery {

using DatasetClauses = parsedQuery::DatasetClauses;

// ASK queries have no further context in the header, so we use an empty
// struct
struct AskClause : public parsedQuery::ClauseBase {};

ParsedQuery() = default;

GraphPattern _rootGraphPattern;
Expand All @@ -99,10 +103,11 @@ class ParsedQuery {
LimitOffsetClause _limitOffset{};
string _originalString;

// explicit default initialisation because the constructor
// of SelectClause is private
std::variant<SelectClause, ConstructClause, UpdateClause> _clause{
SelectClause{}};
using HeaderClause =
std::variant<SelectClause, ConstructClause, UpdateClause, AskClause>;
// Use explicit default initialization for `SelectClause` because its
// constructor is private.
HeaderClause _clause{SelectClause{}};

// The IRIs from the FROM and FROM NAMED clauses.
DatasetClauses datasetClauses_;
Expand All @@ -119,6 +124,10 @@ class ParsedQuery {
return std::holds_alternative<UpdateClause>(_clause);
}

bool hasAskClause() const {
return std::holds_alternative<AskClause>(_clause);
}

[[nodiscard]] decltype(auto) selectClause() const {
return std::get<SelectClause>(_clause);
}
Expand Down
24 changes: 22 additions & 2 deletions src/parser/sparqlParser/SparqlQleverVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,28 @@ ParsedQuery Visitor::visit(const Parser::DescribeQueryContext* ctx) {
}

// ____________________________________________________________________________________
ParsedQuery Visitor::visit(const Parser::AskQueryContext* ctx) {
reportNotSupported(ctx, "ASK queries are");
ParsedQuery Visitor::visit(Parser::AskQueryContext* ctx) {
parsedQuery_._clause = ParsedQuery::AskClause{};
parsedQuery_.datasetClauses_ = parsedQuery::DatasetClauses::fromClauses(
visitVector(ctx->datasetClause()));
auto [pattern, visibleVariables] = visit(ctx->whereClause());
parsedQuery_._rootGraphPattern = std::move(pattern);
parsedQuery_.registerVariablesVisibleInQueryBody(visibleVariables);
// NOTE: It can make sense to have solution modifiers with an ASK query, for
// example, a GROUP BY with a HAVING.
auto getSolutionModifiers = [this, ctx]() {
auto solutionModifiers = visit(ctx->solutionModifier());
const auto& limitOffset = solutionModifiers.limitOffset_;
if (!limitOffset.isUnconstrained() || limitOffset.textLimit_.has_value()) {
reportError(
ctx->solutionModifier(),
"ASK queries may not contain LIMIT, OFFSET, or TEXTLIMIT clauses");
}
solutionModifiers.limitOffset_._limit = 1;
return solutionModifiers;
};
parsedQuery_.addSolutionModifiers(getSolutionModifiers());
return parsedQuery_;
}

// ____________________________________________________________________________________
Expand Down
2 changes: 1 addition & 1 deletion src/parser/sparqlParser/SparqlQleverVisitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ class SparqlQleverVisitor {
[[noreturn]] static ParsedQuery visit(
const Parser::DescribeQueryContext* ctx);

[[noreturn]] static ParsedQuery visit(const Parser::AskQueryContext* ctx);
ParsedQuery visit(Parser::AskQueryContext* ctx);

DatasetClause visit(Parser::DatasetClauseContext* ctx);

Expand Down
87 changes: 87 additions & 0 deletions test/ExportQueryExecutionTreesTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,19 @@ struct TestCaseSelectQuery {
std::string resultXml;
};

// A test case that tests the correct execution and exporting of an ASK query
// in various formats.
struct TestCaseAskQuery {
std::string kg; // The knowledge graph (TURTLE)
std::string query; // The query (SPARQL)
nlohmann::json resultQLeverJSON; // The expected result in QLeverJSON format.
// Note: this member only contains the inner
// result array with the bindings and NOT
// the metadata.
nlohmann::json resultSparqlJSON; // The expected result in SparqlJSON format.
std::string resultXml;
};

struct TestCaseConstructQuery {
std::string kg; // The knowledge graph (TURTLE)
std::string query; // The query (SPARQL)
Expand Down Expand Up @@ -152,6 +165,34 @@ void runConstructQueryTestCase(
testCase.resultTurtle);
}

// Run a single test case for an ASK query.
void runAskQueryTestCase(
const TestCaseAskQuery& testCase,
ad_utility::source_location l = ad_utility::source_location::current()) {
auto trace = generateLocationTrace(l, "runAskQueryTestCase");
using enum ad_utility::MediaType;
// TODO<joka921> match the exception
EXPECT_ANY_THROW(runQueryStreamableResult(testCase.kg, testCase.query, tsv));
EXPECT_ANY_THROW(runQueryStreamableResult(testCase.kg, testCase.query, csv));
EXPECT_ANY_THROW(
runQueryStreamableResult(testCase.kg, testCase.query, octetStream));
EXPECT_ANY_THROW(
runQueryStreamableResult(testCase.kg, testCase.query, turtle));
auto qleverJSONStreamResult = nlohmann::json::parse(
runQueryStreamableResult(testCase.kg, testCase.query, qleverJson));
ASSERT_EQ(qleverJSONStreamResult["query"], testCase.query);
ASSERT_EQ(qleverJSONStreamResult["resultsize"], 1u);
EXPECT_EQ(qleverJSONStreamResult["res"], testCase.resultQLeverJSON);

EXPECT_EQ(nlohmann::json::parse(runQueryStreamableResult(
testCase.kg, testCase.query, sparqlJson)),
testCase.resultSparqlJSON);

auto xmlAsString =
runQueryStreamableResult(testCase.kg, testCase.query, sparqlXml);
EXPECT_EQ(testCase.resultXml, xmlAsString);
}

// Create a `json` that can be used as the `resultQLeverJSON` of a
// `TestCaseSelectQuery`. This function can only be used when there is a single
// variable in the result. The `values` then become the bindings of that
Expand Down Expand Up @@ -1176,6 +1217,52 @@ TEST(ExportQueryExecutionTrees, CornerCases) {
::testing::ContainsRegex("should be unreachable"));
}

// Test the correct exporting of ASK queries.
TEST(ExportQueryExecutionTrees, AskQuery) {
auto askResultTrue = [](bool lazy) {
TestCaseAskQuery testCase;
if (lazy) {
testCase.kg = "<x> <y> <z>";
testCase.query = "ASK { <x> ?p ?o}";
} else {
testCase.query = "ASK { BIND (3 as ?x) FILTER (?x > 0)}";
}
testCase.resultQLeverJSON = nlohmann::json{std::vector<std::string>{
"\"true\"^^<http://www.w3.org/2001/XMLSchema#boolean>"}};
testCase.resultSparqlJSON =
nlohmann::json::parse(R"({"head":{ }, "boolean" : true})");
testCase.resultXml =
"<?xml version=\"1.0\"?>\n<sparql "
"xmlns=\"http://www.w3.org/2005/sparql-results#\">\n <head/>\n "
"<boolean>true</boolean>\n</sparql>";

return testCase;
};

auto askResultFalse = [](bool lazy) {
TestCaseAskQuery testCase;
if (lazy) {
testCase.kg = "<x> <y> <z>";
testCase.query = "ASK { <y> ?p ?o}";
} else {
testCase.query = "ASK { BIND (3 as ?x) FILTER (?x < 0)}";
}
testCase.resultQLeverJSON = nlohmann::json{std::vector<std::string>{
"\"false\"^^<http://www.w3.org/2001/XMLSchema#boolean>"}};
testCase.resultSparqlJSON =
nlohmann::json::parse(R"({"head":{ }, "boolean" : false})");
testCase.resultXml =
"<?xml version=\"1.0\"?>\n<sparql "
"xmlns=\"http://www.w3.org/2005/sparql-results#\">\n <head/>\n "
"<boolean>false</boolean>\n</sparql>";
return testCase;
};
runAskQueryTestCase(askResultTrue(true));
runAskQueryTestCase(askResultTrue(false));
runAskQueryTestCase(askResultFalse(true));
runAskQueryTestCase(askResultFalse(false));
}

using enum ad_utility::MediaType;

// ____________________________________________________________________________
Expand Down
Loading

0 comments on commit 2ccfb50

Please sign in to comment.