Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Warn instead of throwing when parsing invalid WKT POINTs #1525

Merged
merged 6 commits into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 28 additions & 37 deletions src/parser/RdfParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include <absl/strings/charconv.h>

#include <cstring>
#include <exception>
#include <optional>

#include "global/Constants.h"
#include "parser/GeoPoint.h"
Expand Down Expand Up @@ -466,6 +468,23 @@ TripleComponent TurtleParser<T>::literalAndDatatypeToTripleComponentImpl(
asNormalizedStringViewUnsafe(normalizedLiteralContent));
std::string_view type = asStringViewUnsafe(typeIri.getContent());

// Helper to handle literals that are invalid for the respective datatype
auto makeNormalLiteral = [parser, literal, normalizedLiteralContent,
type](std::optional<std::exception> error =
std::nullopt) {
std::string errorMsg = "";
if (error.has_value()) {
errorMsg = error.value().what();
}
ullingerc marked this conversation as resolved.
Show resolved Hide resolved
LOG(DEBUG) << normalizedLiteralContent
<< " could not be parsed as an object of type " << type << ":"
ullingerc marked this conversation as resolved.
Show resolved Hide resolved
<< errorMsg
<< ". It is treated as a plain string literal without datatype "
"instead."
<< std::endl;
parser->lastParseResult_ = std::move(literal);
};

try {
if (ad_utility::contains(integerDatatypes_, type)) {
parser->parseIntegerConstant(normalizedLiteralContent);
Expand All @@ -475,13 +494,7 @@ TripleComponent TurtleParser<T>::literalAndDatatypeToTripleComponentImpl(
} else if (normalizedLiteralContent == "false") {
parser->lastParseResult_ = false;
} else {
LOG(DEBUG)
<< normalizedLiteralContent
<< " could not be parsed as a boolean object of type " << type
<< ". It is treated as a plain string literal without datatype "
"instead"
<< std::endl;
parser->lastParseResult_ = std::move(literal);
makeNormalLiteral();
}
} else if (ad_utility::contains(floatDatatypes_, type)) {
parser->parseDoubleConstant(normalizedLiteralContent);
Expand Down Expand Up @@ -513,38 +526,16 @@ TripleComponent TurtleParser<T>::literalAndDatatypeToTripleComponentImpl(
literal.addDatatype(typeIri);
parser->lastParseResult_ = std::move(literal);
}
} catch (const DateParseException&) {
LOG(DEBUG) << normalizedLiteralContent
<< " could not be parsed as a date object of type " << type
<< ". It is treated as a plain string literal without datatype "
"instead"
<< std::endl;
parser->lastParseResult_ = std::move(literal);
} catch (const DateParseException& ex) {
makeNormalLiteral(ex);
} catch (const DateOutOfRangeException& ex) {
LOG(DEBUG)
<< normalizedLiteralContent
<< " could not be parsed as a date object for the following reason: "
<< ex.what()
<< ". It is treated as a plain string literal without datatype "
"instead"
<< std::endl;
parser->lastParseResult_ = std::move(literal);
} catch (const DurationParseException&) {
LOG(DEBUG) << normalizedLiteralContent
<< " could not be parsed as a duration object of type " << type
<< ". It is treated as a plain string literal without datatype "
"instead"
<< std::endl;
parser->lastParseResult_ = std::move(literal);
makeNormalLiteral(ex);
} catch (const DurationParseException& ex) {
makeNormalLiteral(ex);
} catch (const DurationOverflowException& ex) {
LOG(DEBUG) << normalizedLiteralContent
<< " could not be parsed as duration object for the following "
"reason: "
<< ex.what()
<< ". It is treated as a plain string literal without datatype "
"instead"
<< std::endl;
parser->lastParseResult_ = std::move(literal);
makeNormalLiteral(ex);
} catch (const CoordinateOutOfRangeException& ex) {
makeNormalLiteral(ex);
} catch (const std::exception& e) {
parser->raise(e.what());
}
Expand Down
5 changes: 5 additions & 0 deletions test/RdfParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,11 @@ TEST(RdfParserTest, literalAndDatatypeToTripleComponent) {
"POLYGON(7.8 47.9, 40.0 40.5, 10.9 20.5)");
EXPECT_EQ(asStringViewUnsafe(result2.getLiteral().getDatatype()),
GEO_WKT_LITERAL);
// Invalid points should be converted to plain string literals
auto result3 = ladttc("POINT(0.0 99.9999)", fromIri(GEO_WKT_LITERAL));
ASSERT_FALSE(result3.getLiteral().hasDatatype());
ASSERT_EQ(asStringViewUnsafe(result3.getLiteral().getContent()),
"POINT(0.0 99.9999)");
}

TEST(RdfParserTest, blankNode) {
Expand Down
Loading