diff --git a/src/parser/RdfParser.cpp b/src/parser/RdfParser.cpp index 1ed8d62585..6311980834 100644 --- a/src/parser/RdfParser.cpp +++ b/src/parser/RdfParser.cpp @@ -823,10 +823,17 @@ bool RdfStreamParser::resetStateAndRead( template void RdfStreamParser::initialize(const string& filename) { this->clear(); - // IMPORTANT: The current buffer must not end with a `.` (unless we are at the - // end of the file). The reason is that with a `.` at the end, we cannot - // decide whether we are in the middle of a `PN_LOCAL` (that continues in the - // next buffer) or at the end of a statement. + // Make sure that a block of data ends with a newline. This is important for + // two reasons: + // + // 1. A block of data must not end in the middle of a comment. Otherwise the + // remaining part of the comment, which is prepended to the next block, is + // not recognized as a comment. + // + // 2. A block of data must not end with a `.` (without subsequent newline). + // The reason is that with a `.` at the end, we cannot decide whether we are + // in the middle of a `PN_LOCAL` (that continues in the next buffer) or at the + // end of a statement. fileBuffer_ = std::make_unique(bufferSize_, "([\\r\\n]+)"); fileBuffer_->open(filename); diff --git a/src/parser/RdfParser.h b/src/parser/RdfParser.h index 002083223c..76929a44bb 100644 --- a/src/parser/RdfParser.h +++ b/src/parser/RdfParser.h @@ -193,28 +193,22 @@ class TurtleParser : public RdfParserBase { // `TripleComponent` since it can hold any parsing result, not only objects. TripleComponent lastParseResult_; - // Map that maps prefix names to their IRI, initially empty. - ad_utility::HashMap prefixMap_{ - {baseForRelativeIriKey_, TripleComponent::Iri{}}, - {baseForAbsoluteIriKey_, TripleComponent::Iri{}}}; + // Map that maps prefix names to their IRI. For our tests, it is important + // that without any BASE declaration, the two base prefixes are mapped to the + // empty IRI. + static const inline ad_utility::HashMap + prefixMapDefault_{{baseForRelativeIriKey_, TripleComponent::Iri{}}, + {baseForAbsoluteIriKey_, TripleComponent::Iri{}}}; + ad_utility::HashMap prefixMap_ = + prefixMapDefault_; // Getters for the two base prefixes. Without BASE declaration, these will // both return the empty IRI. - // - // TODO I would prefer to just call `prefixMap_.at(...)`, but then - // some of the tests fails because the keys are not in the map (despite the - // initialization above). const TripleComponent::Iri& baseForRelativeIri() { - // return prefixMap_.at(baseForRelativeIriKey_); - return prefixMap_ - .try_emplace(baseForRelativeIriKey_, TripleComponent::Iri{}) - .first->second; + return prefixMap_.at(baseForRelativeIriKey_); } const TripleComponent::Iri& baseForAbsoluteIri() { - // return prefixMap_.at(baseForAbsoluteIriKey_); - return prefixMap_ - .try_emplace(baseForAbsoluteIriKey_, TripleComponent::Iri{}) - .first->second; + return prefixMap_.at(baseForAbsoluteIriKey_); } // There are turtle constructs that reuse prefixes, subjects and predicates @@ -248,7 +242,7 @@ class TurtleParser : public RdfParserBase { activePredicate_ = TripleComponent::Iri::fromIriref("<>"); activePrefix_.clear(); - prefixMap_.clear(); + prefixMap_ = prefixMapDefault_; tok_.reset(nullptr, 0); triples_.clear(); @@ -544,8 +538,6 @@ class RdfStringParser : public Parser { this->tok_.reset(tmpToParse_.data(), tmpToParse_.size()); } - void setPrefixMap(decltype(prefixMap_) m) { prefixMap_ = std::move(m); } - const auto& getPrefixMap() const { return prefixMap_; } // __________________________________________________________ diff --git a/test/parser/LiteralOrIriTest.cpp b/test/parser/LiteralOrIriTest.cpp index a967301537..3c7ad863af 100644 --- a/test/parser/LiteralOrIriTest.cpp +++ b/test/parser/LiteralOrIriTest.cpp @@ -44,14 +44,8 @@ TEST(IriTest, getBaseIri) { ""); EXPECT_EQ(getBaseIri("", false), ""); - // This leads to the following assertion failure and I don't understand why. - // - // C++ exception with description "Assertion `input.starts_with("<") && - // input.ends_with(">")` failed. Please report this to the developers. In file - // "/local/data-ssd/qlever/qlever-code/src/parser/RdfEscaping.cpp " at line - // 232" thrown in the test body. - // EXPECT_EQ(getBaseIri(""); + EXPECT_EQ(getBaseIri("", true), + ""); EXPECT_EQ(getBaseIri("", true), ""); // IRI without scheme.