Skip to content

Commit

Permalink
Final changes in 1-1 with Johannes
Browse files Browse the repository at this point in the history
  • Loading branch information
Hannah Bast committed Dec 10, 2024
1 parent d282aa9 commit 44bbb56
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 31 deletions.
15 changes: 11 additions & 4 deletions src/parser/RdfParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -823,10 +823,17 @@ bool RdfStreamParser<T>::resetStateAndRead(
template <class T>
void RdfStreamParser<T>::initialize(const string& filename) {
this->clear();
// IMPORTANT: The current buffer must not end with a `.` (unless we are at the
// end of the file). The reason is that with a `.` at the end, we cannot
// decide whether we are in the middle of a `PN_LOCAL` (that continues in the
// next buffer) or at the end of a statement.
// Make sure that a block of data ends with a newline. This is important for
// two reasons:
//
// 1. A block of data must not end in the middle of a comment. Otherwise the
// remaining part of the comment, which is prepended to the next block, is
// not recognized as a comment.
//
// 2. A block of data must not end with a `.` (without subsequent newline).
// The reason is that with a `.` at the end, we cannot decide whether we are
// in the middle of a `PN_LOCAL` (that continues in the next buffer) or at the
// end of a statement.
fileBuffer_ =
std::make_unique<ParallelBufferWithEndRegex>(bufferSize_, "([\\r\\n]+)");
fileBuffer_->open(filename);
Expand Down
30 changes: 11 additions & 19 deletions src/parser/RdfParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,28 +193,22 @@ class TurtleParser : public RdfParserBase {
// `TripleComponent` since it can hold any parsing result, not only objects.
TripleComponent lastParseResult_;

// Map that maps prefix names to their IRI, initially empty.
ad_utility::HashMap<std::string, TripleComponent::Iri> prefixMap_{
{baseForRelativeIriKey_, TripleComponent::Iri{}},
{baseForAbsoluteIriKey_, TripleComponent::Iri{}}};
// Map that maps prefix names to their IRI. For our tests, it is important
// that without any BASE declaration, the two base prefixes are mapped to the
// empty IRI.
static const inline ad_utility::HashMap<std::string, TripleComponent::Iri>
prefixMapDefault_{{baseForRelativeIriKey_, TripleComponent::Iri{}},
{baseForAbsoluteIriKey_, TripleComponent::Iri{}}};
ad_utility::HashMap<std::string, TripleComponent::Iri> prefixMap_ =
prefixMapDefault_;

// Getters for the two base prefixes. Without BASE declaration, these will
// both return the empty IRI.
//
// TODO<hannah> I would prefer to just call `prefixMap_.at(...)`, but then
// some of the tests fails because the keys are not in the map (despite the
// initialization above).
const TripleComponent::Iri& baseForRelativeIri() {
// return prefixMap_.at(baseForRelativeIriKey_);
return prefixMap_
.try_emplace(baseForRelativeIriKey_, TripleComponent::Iri{})
.first->second;
return prefixMap_.at(baseForRelativeIriKey_);
}
const TripleComponent::Iri& baseForAbsoluteIri() {
// return prefixMap_.at(baseForAbsoluteIriKey_);
return prefixMap_
.try_emplace(baseForAbsoluteIriKey_, TripleComponent::Iri{})
.first->second;
return prefixMap_.at(baseForAbsoluteIriKey_);
}

// There are turtle constructs that reuse prefixes, subjects and predicates
Expand Down Expand Up @@ -248,7 +242,7 @@ class TurtleParser : public RdfParserBase {
activePredicate_ = TripleComponent::Iri::fromIriref("<>");
activePrefix_.clear();

prefixMap_.clear();
prefixMap_ = prefixMapDefault_;

tok_.reset(nullptr, 0);
triples_.clear();
Expand Down Expand Up @@ -544,8 +538,6 @@ class RdfStringParser : public Parser {
this->tok_.reset(tmpToParse_.data(), tmpToParse_.size());
}

void setPrefixMap(decltype(prefixMap_) m) { prefixMap_ = std::move(m); }

const auto& getPrefixMap() const { return prefixMap_; }

// __________________________________________________________
Expand Down
10 changes: 2 additions & 8 deletions test/parser/LiteralOrIriTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,8 @@ TEST(IriTest, getBaseIri) {
"<http://purl.uniprot.org/>");
EXPECT_EQ(getBaseIri("<http://purl.uniprot.org>", false),
"<http://purl.uniprot.org/>");
// This leads to the following assertion failure and I don't understand why.
//
// C++ exception with description "Assertion `input.starts_with("<") &&
// input.ends_with(">")` failed. Please report this to the developers. In file
// "/local/data-ssd/qlever/qlever-code/src/parser/RdfEscaping.cpp " at line
// 232" thrown in the test body.
// EXPECT_EQ(getBaseIri("<http://purl.uniprot.org/", true),
// "<http://purl.uniprot.org/>");
EXPECT_EQ(getBaseIri("<http://purl.uniprot.org/>", true),
"<http://purl.uniprot.org/>");
EXPECT_EQ(getBaseIri("<http://purl.uniprot.org>", true),
"<http://purl.uniprot.org/>");
// IRI without scheme.
Expand Down

0 comments on commit 44bbb56

Please sign in to comment.