Skip to content

Commit

Permalink
A first draft of adding triples for the langmatches optimization.
Browse files Browse the repository at this point in the history
Signed-off-by: Johannes Kalmbach <[email protected]>
  • Loading branch information
joka921 committed Nov 18, 2024
1 parent 97d5037 commit cf323f1
Show file tree
Hide file tree
Showing 8 changed files with 84 additions and 19 deletions.
1 change: 1 addition & 0 deletions src/engine/sparqlExpressions/SparqlExpressionPimpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class SparqlExpressionPimpl {
struct LangFilterData {
Variable variable_;
std::string language_;
bool isLangmatches_ = false;
};
std::optional<LangFilterData> getLanguageFilterExpression() const;

Expand Down
21 changes: 20 additions & 1 deletion src/engine/sparqlExpressions/StringExpressions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -479,9 +479,28 @@ using EncodeForUriExpression =
}
};

using LangMatches =
using LangMatchesImpl =
StringExpressionImpl<2, decltype(langMatching), StringValueGetter>;

class LangMatches : public LangMatchesImpl {
public:
using LangMatchesImpl::LangMatchesImpl;
std::optional<LangFilterData> getLanguageFilterExpression() const override {
AD_CORRECTNESS_CHECK(children().size() == 2);
auto* var = dynamic_cast<const VariableExpression*>(children()[0].get());
auto* str =
dynamic_cast<const StringLiteralExpression*>(children()[1].get());
if (!(var && str)) {
return std::nullopt;
}
// TODO<joka921> We need to check whether the literal is plain. (no language
// tag or something else).
return LangFilterData{
var->value(),
std::string(asStringViewUnsafe(str->value().getContent())), true};
}
};

// STRING WITH LANGUAGE TAG
[[maybe_unused]] inline auto strLangTag =
[](std::optional<std::string> input,
Expand Down
8 changes: 6 additions & 2 deletions src/index/IndexBuilderTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,15 +243,15 @@ auto getIdMapLambdas(
// the allocation and deallocation of these hash maps (that are newly
// created for each batch) much cheaper (see `CachingMemoryResource.h` and
// `IndexImpl.cpp`).
itemArray[j]->map_.map_.reserve(5 * maxNumberOfTriples / NumThreads);
itemArray[j]->map_.map_.reserve(6 * maxNumberOfTriples / NumThreads);
// The LANGUAGE_PREDICATE gets the first ID in each map. TODO<joka921>
// This is not necessary for the actual QLever code, but certain unit tests
// currently fail without it.
itemArray[j]->getId(TripleComponent{
ad_utility::triple_component::Iri::fromIriref(LANGUAGE_PREDICATE)});
}
using OptionalIds =
std::array<std::optional<std::array<Id, NumColumnsIndexBuilding>>, 3>;
std::array<std::optional<std::array<Id, NumColumnsIndexBuilding>>, 4>;

/* given an index idx, returns a lambda that
* - Takes a triple and a language tag
Expand All @@ -278,6 +278,8 @@ auto getIdMapLambdas(
.iriOrLiteral_.getIri();
auto langTaggedPredId = map.getId(TripleComponent{
ad_utility::convertToLanguageTaggedPredicate(iri, lt.langtag_)});
auto langMatchesTaggedPredId = map.getId(TripleComponent{
ad_utility::convertToLangmatchesTaggedPredicate(iri, lt.langtag_)});
auto& spoIds = *res[0]; // ids of original triple
// TODO replace the std::array by an explicit IdTriple class,
// then the emplace calls don't need the explicit type.
Expand All @@ -299,6 +301,8 @@ auto getIdMapLambdas(
ad_utility::triple_component::Iri::fromIriref(
LANGUAGE_PREDICATE)}),
langTagId, tripleGraphId});
res[3].emplace(
Arr{spoIds[0], langMatchesTaggedPredId, spoIds[2], tripleGraphId});
}
return res;
};
Expand Down
5 changes: 3 additions & 2 deletions src/parser/GraphPattern.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ class GraphPattern {

// Modify query to take care of language filter. `variable` is the variable,
// `languageInQuotes` is the language.
void addLanguageFilter(const Variable& variable,
const std::string& languageInQuotes);
[[nodiscard]] bool addLanguageFilter(const Variable& variable,
const std::string& languageInQuotes,
bool isLangmatches = false);

bool _optional;

Expand Down
20 changes: 16 additions & 4 deletions src/parser/ParsedQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,9 @@ void ParsedQuery::registerVariableVisibleInQueryBody(const Variable& variable) {
ParsedQuery::GraphPattern::GraphPattern() : _optional(false) {}

// __________________________________________________________________________
void ParsedQuery::GraphPattern::addLanguageFilter(const Variable& variable,
const std::string& langTag) {
bool ParsedQuery::GraphPattern::addLanguageFilter(const Variable& variable,
const std::string& langTag,
bool isLangmatches) {
// Find all triples where the object is the `variable` and the predicate is
// a simple `IRIREF` (neither a variable nor a complex property path).
// Search in all the basic graph patterns, as filters have the complete
Expand All @@ -275,6 +276,10 @@ void ParsedQuery::GraphPattern::addLanguageFilter(const Variable& variable,
// Subqueries etc.
// TODO<joka921> Also support property paths (^rdfs:label,
// skos:altLabel|rdfs:label, ...)

if (isLangmatches && langTag.find('-') != std::string::npos) {
return false;
}
std::vector<SparqlTriple*> matchingTriples;
using BasicPattern = parsedQuery::BasicGraphPattern;
namespace ad = ad_utility;
Expand All @@ -295,14 +300,20 @@ void ParsedQuery::GraphPattern::addLanguageFilter(const Variable& variable,

// Replace all the matching triples.
for (auto* triplePtr : matchingTriples) {
triplePtr->p_._iri = ad_utility::convertToLanguageTaggedPredicate(
triplePtr->p_._iri, langTag);
triplePtr->p_._iri = isLangmatches
? ad_utility::convertToLangmatchesTaggedPredicate(
triplePtr->p_._iri, langTag)
: ad_utility::convertToLanguageTaggedPredicate(
triplePtr->p_._iri, langTag);
}

// Handle the case, that no suitable triple (see above) was found. In this
// case a triple `?variable ql:langtag "language"` is added at the end of
// the graph pattern.
if (matchingTriples.empty()) {
if (isLangmatches) {
return false;
}
LOG(DEBUG) << "language filter variable " + variable.name() +
" did not appear as object in any suitable "
"triple. "
Expand All @@ -326,6 +337,7 @@ void ParsedQuery::GraphPattern::addLanguageFilter(const Variable& variable,
langEntity);
t.push_back(std::move(triple));
}
return true;
}

// ____________________________________________________________________________
Expand Down
8 changes: 6 additions & 2 deletions src/parser/sparqlParser/SparqlQleverVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -703,8 +703,12 @@ GraphPattern Visitor::visit(Parser::GroupGraphPatternContext* ctx) {
if (auto langFilterData =
filter.expression_.getLanguageFilterExpression();
langFilterData.has_value()) {
const auto& [variable, language] = langFilterData.value();
pattern.addLanguageFilter(variable, language);
const auto& [variable, language, isLangmatches] =
langFilterData.value();
if (!pattern.addLanguageFilter(variable, language, isLangmatches)) {
// TODO<joka921> Code duplication.
pattern._filters.push_back(std::move(filter));
}
} else {
pattern._filters.push_back(std::move(filter));
}
Expand Down
26 changes: 22 additions & 4 deletions src/util/Conversions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,39 @@
namespace ad_utility {

// _________________________________________________________
triple_component::Iri convertLangtagToEntityUri(const string& tag) {
triple_component::Iri convertLangtagToEntityUri(std::string_view tag) {
return triple_component::Iri::fromIriref(makeQleverInternalIri("@", tag));
}

// _________________________________________________________
std::string convertToLanguageTaggedPredicate(const string& pred,
const string& langtag) {
std::string convertToLanguageTaggedPredicate(std::string_view pred,
std::string_view langtag) {
return absl::StrCat("@", langtag, "@", pred);
}

static std::string_view getPrimaryLanguage(std::string_view language) {
return language.substr(0, language.find('-'));
}

// _________________________________________________________
triple_component::Iri convertToLanguageTaggedPredicate(
const triple_component::Iri& pred, const std::string& langtag) {
const triple_component::Iri& pred, std::string_view langtag) {
return triple_component::Iri::fromIriref(absl::StrCat(
"@", langtag, "@<", asStringViewUnsafe(pred.getContent()), ">"));
}

// _________________________________________________________
std::string convertToLangmatchesTaggedPredicate(std::string_view pred,
std::string_view langtag) {
return absl::StrCat("@@", getPrimaryLanguage(langtag), "@@", pred);
}

// _________________________________________________________
triple_component::Iri convertToLangmatchesTaggedPredicate(
const triple_component::Iri& pred, std::string_view langtag) {
return triple_component::Iri::fromIriref(
absl::StrCat("@@", getPrimaryLanguage(langtag), "@@<",
asStringViewUnsafe(pred.getContent()), ">"));
}

} // namespace ad_utility
14 changes: 10 additions & 4 deletions src/util/Conversions.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,15 @@ constexpr std::string_view languageTaggedPredicatePrefix = "@";
// TODO<joka921> The overload that takes and returns `std::string` can be
// removed as soon as we also store strongly-typed IRIs in the predicates of the
// `SparqlTriple` class.
triple_component::Iri convertLangtagToEntityUri(const std::string& tag);
std::string convertToLanguageTaggedPredicate(const std::string& pred,
const std::string& langtag);
triple_component::Iri convertLangtagToEntityUri(std::string_view tag);
std::string convertToLanguageTaggedPredicate(std::string_view pred,
std::string_view langtag);
triple_component::Iri convertToLanguageTaggedPredicate(
const triple_component::Iri& pred, const std::string& langtag);
const triple_component::Iri& pred, std::string_view langtag);

// TODO<joka921> Comment.
std::string convertToLangmatchesTaggedPredicate(std::string_view pred,
std::string_view langtag);
triple_component::Iri convertToLangmatchesTaggedPredicate(
const triple_component::Iri& pred, std::string_view langtag);
} // namespace ad_utility

0 comments on commit cf323f1

Please sign in to comment.