diff --git a/sparql-anything-engine/src/main/java/io/github/sparqlanything/engine/functions/Cardinal.java b/sparql-anything-engine/src/main/java/io/github/sparqlanything/engine/functions/Cardinal.java
index 226407d4..6dca9301 100644
--- a/sparql-anything-engine/src/main/java/io/github/sparqlanything/engine/functions/Cardinal.java
+++ b/sparql-anything-engine/src/main/java/io/github/sparqlanything/engine/functions/Cardinal.java
@@ -24,7 +24,7 @@
import org.apache.jena.sparql.expr.NodeValue;
import org.apache.jena.sparql.function.FunctionBase1;
import org.apache.jena.sparql.util.FmtUtils;
-import org.semarglproject.vocab.XSD;
+import io.github.sparqlanything.html.org.semarglproject.vocab.XSD;
public class Cardinal extends FunctionBase1 implements FXFunction {
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/NQuadsParser.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/NQuadsParser.java
new file mode 100644
index 00000000..7a96542d
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/NQuadsParser.java
@@ -0,0 +1,407 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.rdf;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.ParseException;
+import io.github.sparqlanything.html.org.semarglproject.rdf.ProcessorGraphHandler;
+import io.github.sparqlanything.html.org.semarglproject.sink.CharSink;
+import io.github.sparqlanything.html.org.semarglproject.sink.Pipe;
+import io.github.sparqlanything.html.org.semarglproject.sink.QuadSink;
+import io.github.sparqlanything.html.org.semarglproject.source.StreamProcessor;
+
+import java.util.BitSet;
+
+/**
+ * Implementation of streaming NQuads parser.
+ *
+ * List of supported options:
+ *
+ * {@link StreamProcessor#PROCESSOR_GRAPH_HANDLER_PROPERTY}
+ * {@link StreamProcessor#ENABLE_ERROR_RECOVERY}
+ *
+ */
+public final class NQuadsParser extends Pipe implements CharSink {
+
+ /**
+ * Class URI for errors produced by a parser
+ */
+ public static final String ERROR = "http://semarglproject.org/nquads/Error";
+
+ private static final short PARSING_OUTSIDE = 0;
+ private static final short PARSING_URI = 1;
+ private static final short PARSING_BNODE = 2;
+ private static final short PARSING_LITERAL = 3;
+ private static final short PARSING_AFTER_LITERAL = 4;
+ private static final short PARSING_LITERAL_TYPE = 5;
+ private static final short PARSING_COMMENT = 6;
+
+ private static final short OBJECT_NON_LITERAL = 0;
+ private static final short OBJECT_PLAIN_LITERAL = 1;
+ private static final short OBJECT_TYPED_LITERAL = 2;
+
+ private static final char SENTENCE_END = '.';
+
+ /**
+ * NQuads whitespace char checker
+ */
+ private static final BitSet WHITESPACE = new BitSet();
+
+ static {
+ WHITESPACE.set('\t');
+ WHITESPACE.set(' ');
+ WHITESPACE.set('\r');
+ WHITESPACE.set('\n');
+ }
+
+
+ private String subj = null;
+ private String pred = null;
+ private String literal = null;
+ private String literalType = null; // type or lang for non-plain literals
+ private byte quadType = -1;
+
+ private io.github.sparqlanything.html.org.semarglproject.rdf.ProcessorGraphHandler processorGraphHandler = null;
+ private boolean ignoreErrors = false;
+ private boolean skipSentence = false;
+
+ private short parsingState;
+
+ private int tokenStartPos;
+ private short charsToEscape = 0;
+ private boolean waitingForSentenceEnd = false;
+ private StringBuilder addBuffer = null;
+
+ private NQuadsParser(QuadSink sink) {
+ super(sink);
+ }
+
+ /**
+ * Creates instance of NQuadsParser connected to specified sink.
+ * @param sink sink to be connected to
+ * @return instance of NQuadsParser
+ */
+ public static CharSink connect(QuadSink sink) {
+ return new NQuadsParser(sink);
+ }
+
+ private void error(String msg) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (processorGraphHandler != null) {
+ processorGraphHandler.error(ERROR, msg);
+ }
+ if (!ignoreErrors) {
+ throw new io.github.sparqlanything.html.org.semarglproject.rdf.ParseException(msg);
+ } else {
+ resetQuad();
+ skipSentence = true;
+ parsingState = PARSING_OUTSIDE;
+ }
+ }
+
+ @Override
+ public NQuadsParser process(String str) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ return process(str.toCharArray(), 0, str.length());
+ }
+
+ @Override
+ public NQuadsParser process(char ch) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ char[] buffer = new char[1];
+ buffer[0] = ch;
+ return process(buffer, 0, 1);
+ }
+
+ @Override
+ public NQuadsParser process(char[] buffer, int start, int count) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (tokenStartPos != -1) {
+ tokenStartPos = start;
+ }
+ int end = start + count;
+
+ for (int pos = start; pos < end; pos++) {
+ if (skipSentence && buffer[pos] != SENTENCE_END) {
+ continue;
+ } else {
+ skipSentence = false;
+ }
+
+ if (parsingState == PARSING_OUTSIDE) {
+ processOutsideChar(buffer, pos);
+ } else if (parsingState == PARSING_COMMENT) {
+ if (buffer[pos] == '\n' || buffer[pos] == '\r') {
+ parsingState = PARSING_OUTSIDE;
+ }
+ } else if (parsingState == PARSING_URI) {
+ if (buffer[pos] == '>') {
+ onNonLiteral(unescape(extractToken(buffer, pos, 1)));
+ parsingState = PARSING_OUTSIDE;
+ }
+ } else if (parsingState == PARSING_BNODE) {
+ if (WHITESPACE.get(buffer[pos]) || buffer[pos] == SENTENCE_END) {
+ onNonLiteral(extractToken(buffer, pos - 1, 0));
+ parsingState = PARSING_OUTSIDE;
+ }
+ } else if (parsingState == PARSING_LITERAL) {
+ processLiteralChar(buffer, pos);
+ } else if (parsingState == PARSING_AFTER_LITERAL) {
+ if (buffer[pos] == '@' || buffer[pos] == '^') {
+ tokenStartPos = pos;
+ parsingState = PARSING_LITERAL_TYPE;
+ } else if (WHITESPACE.get(buffer[pos]) || buffer[pos] == '<') {
+ onPlainLiteral(literal, null);
+ parsingState = PARSING_OUTSIDE;
+ processOutsideChar(buffer, pos);
+ } else {
+ error("Unexpected character '" + buffer[pos] + "' after literal in string '" + new String(buffer) + "'");
+ }
+ } else if (parsingState == PARSING_LITERAL_TYPE) {
+ processLiteralTypeChar(buffer, pos);
+ }
+ }
+ if (tokenStartPos != -1) {
+ if (addBuffer == null) {
+ addBuffer = new StringBuilder();
+ }
+ addBuffer.append(buffer, tokenStartPos, end - tokenStartPos);
+ }
+ return this;
+ }
+
+ private void processLiteralChar(char[] buffer, int pos) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (charsToEscape == 9 && buffer[pos] == 'u') {
+ charsToEscape -= 5;
+ } else if (charsToEscape == 9 && buffer[pos] != 'U') {
+ charsToEscape = 0;
+ } else if (charsToEscape > 0) {
+ charsToEscape--;
+ } else {
+ if (buffer[pos] == '\"') {
+ literal = unescape(extractToken(buffer, pos, 1));
+ parsingState = PARSING_AFTER_LITERAL;
+ } else if (buffer[pos] == '\\') {
+ charsToEscape = 9;
+ }
+ }
+ }
+
+ private void processLiteralTypeChar(char[] buffer, int pos) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (WHITESPACE.get(buffer[pos])) {
+ String type = extractToken(buffer, pos, 0);
+ int trimSize = type.charAt(type.length() - 1) == SENTENCE_END ? 1 : 0;
+ if (type.charAt(0) == '@') {
+ onPlainLiteral(literal, type.substring(1, type.length() - 1 - trimSize));
+ } else if (type.startsWith("^^<") && type.charAt(type.length() - 2) == '>') {
+ onTypedLiteral(literal, type.substring(3, type.length() - 2 - trimSize));
+ } else {
+ error("Literal type '" + type + "' can not be parsed");
+ }
+ parsingState = PARSING_OUTSIDE;
+ if (trimSize > 0) {
+ finishSentence();
+ }
+ }
+ }
+
+ private void processOutsideChar(char[] buffer, int pos) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ switch (buffer[pos]) {
+ case '\"':
+ parsingState = PARSING_LITERAL;
+ tokenStartPos = pos;
+ break;
+ case '<':
+ parsingState = PARSING_URI;
+ tokenStartPos = pos;
+ break;
+ case '_':
+ parsingState = PARSING_BNODE;
+ tokenStartPos = pos;
+ break;
+ case '#':
+ parsingState = PARSING_COMMENT;
+ break;
+ case SENTENCE_END:
+ finishSentence();
+ break;
+ default:
+ if (!WHITESPACE.get(buffer[pos])) {
+ error("Unexpected character '" + buffer[pos] + "'");
+ }
+ }
+ }
+
+ private void finishSentence() throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (waitingForSentenceEnd) {
+ waitingForSentenceEnd = false;
+ } else {
+ error("Unexpected end of sentence");
+ }
+ }
+
+ private void onNonLiteral(String uri) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (waitingForSentenceEnd) {
+ error("End of sentence expected");
+ }
+ if (subj == null) {
+ subj = uri;
+ } else if (pred == null) {
+ pred = uri;
+ } else if (literal == null) {
+ literal = uri;
+ quadType = OBJECT_NON_LITERAL;
+ } else {
+ onGraph(uri);
+ }
+ }
+
+ private void onPlainLiteral(String value, String lang) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ literal = value;
+ literalType = lang;
+ quadType = OBJECT_PLAIN_LITERAL;
+ }
+
+ private void onTypedLiteral(String value, String type) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ literal = value;
+ literalType = type;
+ quadType = OBJECT_TYPED_LITERAL;
+ }
+
+ private void onGraph(String value) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (quadType == OBJECT_PLAIN_LITERAL) {
+ sink.addPlainLiteral(subj, pred, literal, literalType, value);
+ } else if (quadType == OBJECT_TYPED_LITERAL) {
+ sink.addTypedLiteral(subj, pred, literal, literalType, value);
+ } else if (quadType == OBJECT_NON_LITERAL) {
+ sink.addNonLiteral(subj, pred, literal, value);
+ }
+ resetQuad();
+ }
+
+ @Override
+ public void setBaseUri(String baseUri) {
+ }
+
+ @Override
+ protected boolean setPropertyInternal(String key, Object value) {
+ if (StreamProcessor.PROCESSOR_GRAPH_HANDLER_PROPERTY.equals(key) && value instanceof io.github.sparqlanything.html.org.semarglproject.rdf.ProcessorGraphHandler) {
+ processorGraphHandler = (ProcessorGraphHandler) value;
+ } else if (StreamProcessor.ENABLE_ERROR_RECOVERY.equals(key) && value instanceof Boolean) {
+ ignoreErrors = (Boolean) value;
+ }
+ return false;
+ }
+
+ private String extractToken(char[] buffer, int tokenEndPos, int trimSize) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ String saved;
+ if (addBuffer != null) {
+ if (tokenEndPos - trimSize >= tokenStartPos) {
+ addBuffer.append(buffer, tokenStartPos, tokenEndPos - tokenStartPos - trimSize + 1);
+ }
+ addBuffer.delete(0, trimSize);
+ saved = addBuffer.toString();
+ addBuffer = null;
+ } else {
+ saved = String.valueOf(buffer, tokenStartPos + trimSize, tokenEndPos - tokenStartPos + 1 - 2 * trimSize);
+ }
+ tokenStartPos = -1;
+ return saved;
+ }
+
+ @Override
+ public void startStream() throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ super.startStream();
+ resetQuad();
+ waitingForSentenceEnd = false;
+ parsingState = PARSING_OUTSIDE;
+ }
+
+ private void resetQuad() {
+ addBuffer = null;
+ tokenStartPos = -1;
+ subj = null;
+ pred = null;
+ literal = null;
+ literalType = null;
+ quadType = -1;
+ waitingForSentenceEnd = true;
+ }
+
+ @Override
+ public void endStream() throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (tokenStartPos != -1 || waitingForSentenceEnd) {
+ error("Unexpected end of stream");
+ }
+ super.endStream();
+ }
+
+ private String unescape(String str) throws ParseException {
+ int limit = str.length();
+ StringBuilder result = new StringBuilder(limit);
+
+ for (int i = 0; i < limit; i++) {
+ char ch = str.charAt(i);
+ if (ch != '\\') {
+ result.append(ch);
+ continue;
+ }
+ i++;
+ if (i == limit) {
+ break;
+ }
+ ch = str.charAt(i);
+ switch (ch) {
+ case '\\':
+ case '\'':
+ case '\"':
+ result.append(ch);
+ break;
+ case 'b':
+ result.append('\b');
+ break;
+ case 'f':
+ result.append('\f');
+ break;
+ case 'n':
+ result.append('\n');
+ break;
+ case 'r':
+ result.append('\r');
+ break;
+ case 't':
+ result.append('\t');
+ break;
+ case 'u':
+ case 'U':
+ int sequenceLength = ch == 'u' ? 4 : 8;
+ if (i + sequenceLength >= limit) {
+ error("Error parsing escape sequence '\\" + ch + "'");
+ }
+ String code = str.substring(i + 1, i + 1 + sequenceLength);
+ i += sequenceLength;
+
+ try {
+ int value = Integer.parseInt(code, 16);
+ result.append((char) value);
+ } catch (NumberFormatException nfe) {
+ error("Error parsing escape sequence '\\" + ch + "'");
+ }
+ break;
+ default:
+ result.append(ch);
+ break;
+ }
+ }
+ return result.toString();
+ }
+
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/NQuadsSerializer.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/NQuadsSerializer.java
new file mode 100644
index 00000000..9f2ca840
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/NQuadsSerializer.java
@@ -0,0 +1,90 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.rdf;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.NTriplesSerializer;
+import io.github.sparqlanything.html.org.semarglproject.rdf.ParseException;
+import io.github.sparqlanything.html.org.semarglproject.sink.CharSink;
+import io.github.sparqlanything.html.org.semarglproject.sink.QuadSink;
+
+/**
+ * Implementation of {@link io.github.sparqlanything.html.org.semarglproject.sink.TripleSink} which serializes triples to
+ * {@link CharSink} using NTriples syntax.
+ */
+public class NQuadsSerializer extends NTriplesSerializer implements QuadSink {
+
+ private NQuadsSerializer(CharSink sink) {
+ super(sink);
+ }
+
+ /**
+ * Creates instance of TurtleSerializer connected to specified sink.
+ * @param sink sink to be connected to
+ * @return instance of TurtleSerializer
+ */
+ public static QuadSink connect(CharSink sink) {
+ return new NQuadsSerializer(sink);
+ }
+
+ @Override
+ public void addNonLiteral(String subj, String pred, String obj, String graph) {
+ try {
+ startTriple(subj, pred);
+ serializeBnodeOrUri(obj);
+ if (graph != null) {
+ serializeBnodeOrUri(graph);
+ }
+ sink.process(DOT_EOL);
+ } catch (ParseException e) {
+ // ignore
+ }
+ }
+
+ @Override
+ public void addPlainLiteral(String subj, String pred, String content, String lang, String graph) {
+ try {
+ startTriple(subj, pred);
+ addContent(content);
+ if (lang != null) {
+ sink.process('@').process(lang);
+ }
+ sink.process(SPACE);
+ if (graph != null) {
+ serializeBnodeOrUri(graph);
+ }
+ sink.process(DOT_EOL);
+ } catch (ParseException e) {
+ // ignore
+ }
+ }
+
+ @Override
+ public void addTypedLiteral(String subj, String pred, String content, String type, String graph) {
+ try {
+ startTriple(subj, pred);
+ addContent(content);
+ sink.process("^^");
+ serializeUri(type);
+ if (graph != null) {
+ serializeBnodeOrUri(graph);
+ }
+ sink.process(DOT_EOL);
+ } catch (ParseException e) {
+ // ignore
+ }
+ }
+
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/NTriplesParser.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/NTriplesParser.java
new file mode 100644
index 00000000..2fdee049
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/NTriplesParser.java
@@ -0,0 +1,397 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.rdf;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.ParseException;
+import io.github.sparqlanything.html.org.semarglproject.rdf.ProcessorGraphHandler;
+import io.github.sparqlanything.html.org.semarglproject.sink.CharSink;
+import io.github.sparqlanything.html.org.semarglproject.sink.Pipe;
+import io.github.sparqlanything.html.org.semarglproject.sink.TripleSink;
+import io.github.sparqlanything.html.org.semarglproject.source.StreamProcessor;
+
+import java.util.BitSet;
+
+/**
+ * Implementation of streaming NTriples parser.
+ *
+ * List of supported options:
+ *
+ * {@link StreamProcessor#PROCESSOR_GRAPH_HANDLER_PROPERTY}
+ * {@link StreamProcessor#ENABLE_ERROR_RECOVERY}
+ *
+ */
+public final class NTriplesParser extends Pipe implements CharSink {
+
+ /**
+ * Class URI for errors produced by a parser
+ */
+ public static final String ERROR = "http://semarglproject.org/ntriples/Error";
+
+ private static final short PARSING_OUTSIDE = 0;
+ private static final short PARSING_URI = 1;
+ private static final short PARSING_BNODE = 2;
+ private static final short PARSING_LITERAL = 3;
+ private static final short PARSING_AFTER_LITERAL = 4;
+ private static final short PARSING_LITERAL_TYPE = 5;
+ private static final short PARSING_COMMENT = 6;
+
+ private static final char SENTENCE_END = '.';
+
+ /**
+ * NTriples whitespace char checker
+ */
+ private static final BitSet WHITESPACE = new BitSet();
+
+ static {
+ WHITESPACE.set('\t');
+ WHITESPACE.set(' ');
+ WHITESPACE.set('\r');
+ WHITESPACE.set('\n');
+ }
+
+
+ private String subj = null;
+ private String pred = null;
+ private String literalObj = null;
+
+ private io.github.sparqlanything.html.org.semarglproject.rdf.ProcessorGraphHandler processorGraphHandler = null;
+ private boolean ignoreErrors = false;
+ private boolean skipSentence = false;
+
+ private short parsingState;
+
+ private int tokenStartPos;
+ private short charsToEscape = 0;
+ private boolean waitingForSentenceEnd = false;
+ private StringBuilder addBuffer = null;
+
+ private NTriplesParser(TripleSink sink) {
+ super(sink);
+ }
+
+ /**
+ * Creates instance of NTriplesParser connected to specified sink.
+ * @param sink sink to be connected to
+ * @return instance of NTriplesParser
+ */
+ public static CharSink connect(TripleSink sink) {
+ return new NTriplesParser(sink);
+ }
+
+ private void error(String msg) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (processorGraphHandler != null) {
+ processorGraphHandler.error(ERROR, msg);
+ }
+ if (!ignoreErrors) {
+ throw new io.github.sparqlanything.html.org.semarglproject.rdf.ParseException(msg);
+ } else {
+ resetTriple();
+ skipSentence = true;
+ parsingState = PARSING_OUTSIDE;
+ }
+ }
+
+ @Override
+ public NTriplesParser process(String str) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ return process(str.toCharArray(), 0, str.length());
+ }
+
+ @Override
+ public NTriplesParser process(char ch) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ char[] buffer = new char[1];
+ buffer[0] = ch;
+ return process(buffer, 0, 1);
+ }
+
+ @Override
+ public NTriplesParser process(char[] buffer, int start, int count) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (tokenStartPos != -1) {
+ tokenStartPos = start;
+ }
+ int end = start + count;
+
+ for (int pos = start; pos < end; pos++) {
+ if (skipSentence && buffer[pos] != SENTENCE_END) {
+ continue;
+ } else {
+ skipSentence = false;
+ }
+
+ if (parsingState == PARSING_OUTSIDE) {
+ processOutsideChar(buffer, pos);
+ } else if (parsingState == PARSING_COMMENT) {
+ if (buffer[pos] == '\n' || buffer[pos] == '\r') {
+ parsingState = PARSING_OUTSIDE;
+ }
+ } else if (parsingState == PARSING_URI) {
+ if (buffer[pos] == '>') {
+ onNonLiteral(unescape(extractToken(buffer, pos, 1)));
+ parsingState = PARSING_OUTSIDE;
+ }
+ } else if (parsingState == PARSING_BNODE) {
+ if (WHITESPACE.get(buffer[pos]) || buffer[pos] == SENTENCE_END) {
+ onNonLiteral(extractToken(buffer, pos - 1, 0));
+ parsingState = PARSING_OUTSIDE;
+ }
+ } else if (parsingState == PARSING_LITERAL) {
+ processLiteralChar(buffer, pos);
+ } else if (parsingState == PARSING_AFTER_LITERAL) {
+ if (buffer[pos] == '@' || buffer[pos] == '^') {
+ tokenStartPos = pos;
+ parsingState = PARSING_LITERAL_TYPE;
+ } else if (WHITESPACE.get(buffer[pos]) || buffer[pos] == SENTENCE_END) {
+ onPlainLiteral(literalObj, null);
+ parsingState = PARSING_OUTSIDE;
+ processOutsideChar(buffer, pos);
+ } else {
+ error("Unexpected character '" + buffer[pos] + "' after literal");
+ }
+ } else if (parsingState == PARSING_LITERAL_TYPE) {
+ processLiteralTypeChar(buffer, pos);
+ }
+ }
+ if (tokenStartPos != -1) {
+ if (addBuffer == null) {
+ addBuffer = new StringBuilder();
+ }
+ addBuffer.append(buffer, tokenStartPos, end - tokenStartPos);
+ }
+ return this;
+ }
+
+ private void processLiteralChar(char[] buffer, int pos) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (charsToEscape == 9 && buffer[pos] == 'u') {
+ charsToEscape -= 5;
+ } else if (charsToEscape == 9 && buffer[pos] != 'U') {
+ charsToEscape = 0;
+ } else if (charsToEscape > 0) {
+ charsToEscape--;
+ } else {
+ if (buffer[pos] == '\"') {
+ literalObj = unescape(extractToken(buffer, pos, 1));
+ parsingState = PARSING_AFTER_LITERAL;
+ } else if (buffer[pos] == '\\') {
+ charsToEscape = 9;
+ }
+ }
+ }
+
+ private void processLiteralTypeChar(char[] buffer, int pos) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (WHITESPACE.get(buffer[pos])) {
+ String type = extractToken(buffer, pos, 0);
+ int trimSize = type.charAt(type.length() - 1) == SENTENCE_END ? 1 : 0;
+ if (type.charAt(0) == '@') {
+ onPlainLiteral(literalObj, type.substring(1, type.length() - 1 - trimSize));
+ } else if (type.startsWith("^^<") && type.charAt(type.length() - 2) == '>') {
+ onTypedLiteral(literalObj, type.substring(3, type.length() - 2 - trimSize));
+ } else {
+ error("Literal type '" + type + "' can not be parsed");
+ }
+ parsingState = PARSING_OUTSIDE;
+ if (trimSize > 0) {
+ finishSentence();
+ }
+ }
+ }
+
+ private void processOutsideChar(char[] buffer, int pos) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ switch (buffer[pos]) {
+ case '\"':
+ parsingState = PARSING_LITERAL;
+ tokenStartPos = pos;
+ break;
+ case '<':
+ parsingState = PARSING_URI;
+ tokenStartPos = pos;
+ break;
+ case '_':
+ parsingState = PARSING_BNODE;
+ tokenStartPos = pos;
+ break;
+ case '#':
+ parsingState = PARSING_COMMENT;
+ break;
+ case SENTENCE_END:
+ finishSentence();
+ break;
+ default:
+ if (!WHITESPACE.get(buffer[pos])) {
+ error("Unexpected character '" + buffer[pos] + "'");
+ }
+ }
+ }
+
+ private void finishSentence() throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (waitingForSentenceEnd) {
+ waitingForSentenceEnd = false;
+ } else {
+ error("Unexpected end of sentence");
+ }
+ }
+
+ private void onNonLiteral(String uri) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (waitingForSentenceEnd) {
+ error("End of sentence expected");
+ }
+ if (subj == null) {
+ subj = uri;
+ } else if (pred == null) {
+ pred = uri;
+ } else {
+ sink.addNonLiteral(subj, pred, uri);
+ resetTriple();
+ }
+ }
+
+ private void onPlainLiteral(String value, String lang) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (subj == null || pred == null) {
+ if (waitingForSentenceEnd) {
+ error("End of sentence expected");
+ } else {
+ error("Literal is not an object");
+ }
+ }
+ sink.addPlainLiteral(subj, pred, value, lang);
+ resetTriple();
+ }
+
+ private void onTypedLiteral(String value, String type) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (subj == null || pred == null) {
+ if (waitingForSentenceEnd) {
+ error("End of sentence expected");
+ } else {
+ error("Literal is not an object");
+ }
+ }
+ sink.addTypedLiteral(subj, pred, value, type);
+ resetTriple();
+ }
+
+ @Override
+ public void setBaseUri(String baseUri) {
+ }
+
+ @Override
+ protected boolean setPropertyInternal(String key, Object value) {
+ if (StreamProcessor.PROCESSOR_GRAPH_HANDLER_PROPERTY.equals(key) && value instanceof io.github.sparqlanything.html.org.semarglproject.rdf.ProcessorGraphHandler) {
+ processorGraphHandler = (ProcessorGraphHandler) value;
+ } else if (StreamProcessor.ENABLE_ERROR_RECOVERY.equals(key) && value instanceof Boolean) {
+ ignoreErrors = (Boolean) value;
+ }
+ return false;
+ }
+
+ private String extractToken(char[] buffer, int tokenEndPos, int trimSize) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ String saved;
+ if (addBuffer != null) {
+ if (tokenEndPos - trimSize >= tokenStartPos) {
+ addBuffer.append(buffer, tokenStartPos, tokenEndPos - tokenStartPos - trimSize + 1);
+ }
+ addBuffer.delete(0, trimSize);
+ saved = addBuffer.toString();
+ addBuffer = null;
+ } else {
+ saved = String.valueOf(buffer, tokenStartPos + trimSize, tokenEndPos - tokenStartPos + 1 - 2 * trimSize);
+ }
+ tokenStartPos = -1;
+ return saved;
+ }
+
+ @Override
+ public void startStream() throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ super.startStream();
+ resetTriple();
+ waitingForSentenceEnd = false;
+ parsingState = PARSING_OUTSIDE;
+ }
+
+ private void resetTriple() {
+ addBuffer = null;
+ tokenStartPos = -1;
+ subj = null;
+ pred = null;
+ waitingForSentenceEnd = true;
+ }
+
+ @Override
+ public void endStream() throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (tokenStartPos != -1 || waitingForSentenceEnd) {
+ error("Unexpected end of stream");
+ }
+ super.endStream();
+ }
+
+ private String unescape(String str) throws ParseException {
+ int limit = str.length();
+ StringBuilder result = new StringBuilder(limit);
+
+ for (int i = 0; i < limit; i++) {
+ char ch = str.charAt(i);
+ if (ch != '\\') {
+ result.append(ch);
+ continue;
+ }
+ i++;
+ if (i == limit) {
+ break;
+ }
+ ch = str.charAt(i);
+ switch (ch) {
+ case '\\':
+ case '\'':
+ case '\"':
+ result.append(ch);
+ break;
+ case 'b':
+ result.append('\b');
+ break;
+ case 'f':
+ result.append('\f');
+ break;
+ case 'n':
+ result.append('\n');
+ break;
+ case 'r':
+ result.append('\r');
+ break;
+ case 't':
+ result.append('\t');
+ break;
+ case 'u':
+ case 'U':
+ int sequenceLength = ch == 'u' ? 4 : 8;
+ if (i + sequenceLength >= limit) {
+ error("Error parsing escape sequence '\\" + ch + "'");
+ }
+ String code = str.substring(i + 1, i + 1 + sequenceLength);
+ i += sequenceLength;
+
+ try {
+ int value = Integer.parseInt(code, 16);
+ result.append((char) value);
+ } catch (NumberFormatException nfe) {
+ error("Error parsing escape sequence '\\" + ch + "'");
+ }
+ break;
+ default:
+ result.append(ch);
+ break;
+ }
+ }
+ return result.toString();
+ }
+
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/NTriplesSerializer.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/NTriplesSerializer.java
new file mode 100644
index 00000000..9077457b
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/NTriplesSerializer.java
@@ -0,0 +1,228 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.rdf;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.ParseException;
+import io.github.sparqlanything.html.org.semarglproject.sink.CharSink;
+import io.github.sparqlanything.html.org.semarglproject.sink.Pipe;
+import io.github.sparqlanything.html.org.semarglproject.sink.TripleSink;
+import io.github.sparqlanything.html.org.semarglproject.vocab.RDF;
+
+import java.util.BitSet;
+
+/**
+ * Implementation of {@link TripleSink} which serializes triples to
+ * {@link CharSink} using NTriples syntax.
+ */
+public class NTriplesSerializer extends Pipe implements TripleSink {
+
+ protected static final String DOT_EOL = ".\n";
+ protected static final char SPACE = ' ';
+
+ private static final char QUOTE = '"';
+ private static final char URI_START = '<';
+ private static final char URI_END = '>';
+
+ private static final BitSet ESCAPABLE_CONTENT_CHARS = new BitSet();
+ private static final BitSet ESCAPABLE_URI_CHARS = new BitSet();
+
+ static {
+ ESCAPABLE_CONTENT_CHARS.set('\\');
+ ESCAPABLE_CONTENT_CHARS.set('\"');
+ ESCAPABLE_CONTENT_CHARS.set('\b');
+ ESCAPABLE_CONTENT_CHARS.set('\f');
+ ESCAPABLE_CONTENT_CHARS.set('\n');
+ ESCAPABLE_CONTENT_CHARS.set('\r');
+ ESCAPABLE_CONTENT_CHARS.set('\t');
+
+ for (char ch = 0; ch <= 0x20; ch++) {
+ ESCAPABLE_URI_CHARS.set(ch);
+ }
+ ESCAPABLE_URI_CHARS.set('\\');
+ ESCAPABLE_URI_CHARS.set('<');
+ ESCAPABLE_URI_CHARS.set('>');
+ ESCAPABLE_URI_CHARS.set('{');
+ ESCAPABLE_URI_CHARS.set('}');
+ ESCAPABLE_URI_CHARS.set('"');
+ ESCAPABLE_URI_CHARS.set('`');
+ ESCAPABLE_URI_CHARS.set('|');
+ ESCAPABLE_URI_CHARS.set('^');
+ }
+
+ protected NTriplesSerializer(CharSink sink) {
+ super(sink);
+ }
+
+ /**
+ * Creates instance of TurtleSerializer connected to specified sink.
+ * @param sink sink to be connected to
+ * @return instance of TurtleSerializer
+ */
+ public static TripleSink connect(CharSink sink) {
+ return new NTriplesSerializer(sink);
+ }
+
+ @Override
+ public void addNonLiteral(String subj, String pred, String obj) {
+ try {
+ startTriple(subj, pred);
+ serializeBnodeOrUri(obj);
+ sink.process(DOT_EOL);
+ } catch (ParseException e) {
+ // ignore
+ }
+ }
+
+ @Override
+ public void addPlainLiteral(String subj, String pred, String content, String lang) {
+ try {
+ startTriple(subj, pred);
+ addContent(content);
+ if (lang != null) {
+ sink.process('@').process(lang);
+ }
+ sink.process(SPACE).process(DOT_EOL);
+ } catch (ParseException e) {
+ // ignore
+ }
+ }
+
+ @Override
+ public void addTypedLiteral(String subj, String pred, String content, String type) {
+ try {
+ startTriple(subj, pred);
+ addContent(content);
+ sink.process("^^");
+ serializeUri(type);
+ sink.process(DOT_EOL);
+ } catch (ParseException e) {
+ // ignore
+ }
+ }
+
+ @Override
+ protected boolean setPropertyInternal(String key, Object value) {
+ return false;
+ }
+
+ @Override
+ public void setBaseUri(String baseUri) {
+ // ignore
+ }
+
+ protected void startTriple(String subj, String pred) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ serializeBnodeOrUri(subj);
+ serializeBnodeOrUri(pred);
+ }
+
+ protected void serializeBnodeOrUri(String value) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (value.startsWith(RDF.BNODE_PREFIX)) {
+ sink.process(value).process(SPACE);
+ } else {
+ serializeUri(value);
+ }
+ }
+
+ protected void serializeUri(String uri) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ String escapedUri = escapeUri(uri);
+ sink.process(URI_START).process(escapedUri).process(URI_END).process(SPACE);
+ }
+
+ protected void addContent(String content) throws ParseException {
+ String escapedContent = escapeContent(content);
+ sink.process(QUOTE).process(escapedContent).process(QUOTE);
+ }
+
+ private static String escapeContent(String str) {
+ int limit = str.length();
+ int pos = 0;
+ for (; pos < limit; pos++) {
+ char ch = str.charAt(pos);
+ if (ch > 0x80 || ESCAPABLE_CONTENT_CHARS.get(ch)) {
+ break;
+ }
+ }
+ if (pos == limit) {
+ return str;
+ }
+ StringBuilder result = new StringBuilder(limit);
+ result.append(str.substring(0, pos));
+ for (; pos < limit; pos++) {
+ char ch = str.charAt(pos);
+ if (ch < 0x80) {
+ switch (ch) {
+ case '\\':
+ case '\"':
+ result.append('\\').append(ch);
+ break;
+ case '\b':
+ result.append("\\b");
+ break;
+ case '\f':
+ result.append("\\f");
+ break;
+ case '\n':
+ result.append("\\n");
+ break;
+ case '\r':
+ result.append("\\r");
+ break;
+ case '\t':
+ result.append("\\t");
+ break;
+ default:
+ result.append(ch);
+ }
+ } else if (ch <= 0xFFFF) {
+ result.append("\\u").append(String.format("%04X", (int) ch));
+ } else {
+ result.append("\\U").append(String.format("%08X", (int) ch));
+ }
+ }
+ return result.toString();
+ }
+
+ private static String escapeUri(String str) {
+ int limit = str.length();
+ int pos = 0;
+ for (; pos < limit; pos++) {
+ char ch = str.charAt(pos);
+ if (ch > 0x80 || ESCAPABLE_URI_CHARS.get(ch)) {
+ break;
+ }
+ }
+ if (pos == limit) {
+ return str;
+ }
+ StringBuilder result = new StringBuilder(limit);
+ result.append(str.substring(0, pos));
+ for (; pos < limit; pos++) {
+ char ch = str.charAt(pos);
+ if (ESCAPABLE_URI_CHARS.get(ch)) {
+ result.append("\\u").append(String.format("%04X", (int) ch));
+ } else if (ch < 0x80) {
+ result.append(ch);
+ } else if (ch <= 0xFFFF) {
+ result.append("\\u").append(String.format("%04X", (int) ch));
+ } else {
+ result.append("\\U").append(String.format("%08X", (int) ch));
+ }
+ }
+ return result.toString();
+ }
+
+
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/ParseException.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/ParseException.java
new file mode 100644
index 00000000..7d232a28
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/ParseException.java
@@ -0,0 +1,33 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.rdf;
+
+public class ParseException extends Exception {
+
+ private static final long serialVersionUID = 2088926094965976520L;
+
+ public ParseException(String string) {
+ super(string);
+ }
+
+ public ParseException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public ParseException(Throwable cause) {
+ super(cause);
+ }
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/ProcessorGraphHandler.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/ProcessorGraphHandler.java
new file mode 100644
index 00000000..4a07385b
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/ProcessorGraphHandler.java
@@ -0,0 +1,43 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.rdf;
+
+/**
+ * Interface for handling processor graph events
+ */
+public interface ProcessorGraphHandler {
+
+ /**
+ * Callback for info events
+ * @param infoClass event class URI
+ * @param message info message
+ */
+ void info(String infoClass, String message);
+
+ /**
+ * Callback for warning events
+ * @param warningClass warning class URI
+ * @param message warning message
+ */
+ void warning(String warningClass, String message);
+
+ /**
+ * Callback for error events
+ * @param errorClass event class URI
+ * @param message error message
+ */
+ void error(String errorClass, String message);
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/RdfXmlParser.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/RdfXmlParser.java
new file mode 100644
index 00000000..b7bb3ced
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/RdfXmlParser.java
@@ -0,0 +1,730 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.rdf;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.ParseException;
+import io.github.sparqlanything.html.org.semarglproject.rdf.ProcessorGraphHandler;
+import io.github.sparqlanything.html.org.semarglproject.ri.MalformedIriException;
+import io.github.sparqlanything.html.org.semarglproject.ri.RIUtils;
+import io.github.sparqlanything.html.org.semarglproject.sink.Pipe;
+import io.github.sparqlanything.html.org.semarglproject.sink.XmlSink;
+import io.github.sparqlanything.html.org.semarglproject.sink.TripleSink;
+import io.github.sparqlanything.html.org.semarglproject.source.StreamProcessor;
+import io.github.sparqlanything.html.org.semarglproject.vocab.RDF;
+import io.github.sparqlanything.html.org.semarglproject.xml.XmlUtils;
+import org.xml.sax.Attributes;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+import javax.xml.XMLConstants;
+import java.util.*;
+
+/**
+ * Implementation of streaming RDF/XML parser.
+ *
+ * List of supported options:
+ *
+ * {@link StreamProcessor#PROCESSOR_GRAPH_HANDLER_PROPERTY}
+ * {@link StreamProcessor#ENABLE_ERROR_RECOVERY}
+ *
+ */
+public final class RdfXmlParser extends Pipe implements XmlSink {
+
+ /**
+ * Class URI for errors produced by a parser
+ */
+ public static final String ERROR = "http://semarglproject.org/ntriples/Error";
+
+ private static final String IS_NOT_ALLOWED_HERE = " is not allowed here";
+
+ // processing modes
+ private static final short INSIDE_OF_PROPERTY = 1;
+ private static final short INSIDE_OF_RESOURCE = 2;
+ private static final short PARSE_TYPE_LITERAL = 3;
+ private static final short PARSE_TYPE_COLLECTION = 4;
+ private static final short PARSE_TYPE_RESOURCE = 5;
+ private static final short ERROR_RECOVERY = 6;
+
+ private static final String ID_ATTR = "ID";
+ private static final String NODE_ID_ATTR = "nodeID";
+ private static final String ABOUT_ATTR = "about";
+
+ private static final String PARSE_LITERAL_VALUE = "Literal";
+ private static final String PARSE_RESOURCE_VALUE = "Resource";
+ private static final String PARSE_COLLECTION_VALUE = "Collection";
+
+ private short mode = 0;
+
+ private String baseUri = "";
+
+ private final Stack modeStack = new Stack();
+ private final Stack langStack = new Stack();
+ private final Stack baseStack = new Stack();
+ private final Stack subjStack = new Stack();
+ private final Stack subjLiIndexStack = new Stack();
+ private final Map nsMappings = new HashMap();
+
+ private final Set processedIDs = new HashSet();
+
+ private int bnodeId = 0;
+
+ // IRI or bnode
+ private String subjRes = null;
+
+ // tail node of parseType="Collection"
+ private String seqTailRes = null;
+
+ // predicate IRI
+ private String predIri = null;
+
+ // typed literal datatype IRI
+ private String datatypeIri = null;
+
+ private String reifyIri = null;
+ private boolean captureLiteral = false;
+
+ private int parseDepth = 0;
+ private StringBuilder parse = new StringBuilder();
+
+ private io.github.sparqlanything.html.org.semarglproject.rdf.ProcessorGraphHandler processorGraphHandler = null;
+ private boolean ignoreErrors = false;
+
+ // holds data for triples which addition depends on XML node contents (blank or not)
+ private List pendingTriples = new ArrayList();
+
+ private RdfXmlParser(TripleSink sink) {
+ super(sink);
+ }
+
+ /**
+ * Creates instance of RdfXmlParser connected to specified sink.
+ * @param sink sink to be connected to
+ * @return instance of RdfXmlParser
+ */
+ public static XmlSink connect(TripleSink sink) {
+ return new RdfXmlParser(sink);
+ }
+
+ private void error(String msg) throws SAXException {
+ if (processorGraphHandler != null) {
+ processorGraphHandler.error(ERROR, msg);
+ }
+ if (ignoreErrors) {
+ modeStack.push(mode);
+ mode = ERROR_RECOVERY;
+ } else {
+ throw new SAXException(new io.github.sparqlanything.html.org.semarglproject.rdf.ParseException(msg));
+ }
+ }
+
+ @SuppressWarnings("deprecation")
+ private boolean violatesSchema(String nodeIri) {
+ return nodeIri == null || nodeIri.isEmpty() || nodeIri.equals(RDF.PARSE_TYPE)
+ || nodeIri.equals(RDF.ABOUT_EACH) || nodeIri.equals(RDF.DATATYPE)
+ || nodeIri.equals(RDF.BAG_ID) || nodeIri.equals(RDF.ABOUT)
+ || nodeIri.equals(RDF.RESOURCE) || nodeIri.equals(RDF.NODEID)
+ || nodeIri.equals(RDF.ID) || nodeIri.equals(RDF.ABOUT_EACH_PREFIX);
+ }
+
+ @Override
+ public void startElement(String nsUri, String lname, String qname, Attributes attrs) throws SAXException {
+ processPendingTriples(true);
+
+ modeStack.push(mode);
+
+ if (parseDepth > 0) {
+ parseDepth++;
+ if (mode == PARSE_TYPE_LITERAL) {
+ parse.append(XmlUtils.serializeOpenTag(nsUri, qname, nsMappings, attrs, true));
+ nsMappings.clear();
+ return;
+ }
+ }
+
+ if (mode == ERROR_RECOVERY) {
+ return;
+ }
+
+ processLangAndBase(attrs);
+
+ String iri = nsUri + lname;
+ if (subjRes == null && (nsUri == null || nsUri.isEmpty()) || iri.equals(RDF.RDF)) {
+ return;
+ }
+ if (violatesSchema(iri)) {
+ error(qname + IS_NOT_ALLOWED_HERE);
+ }
+
+ switch (mode) {
+ case PARSE_TYPE_COLLECTION:
+ case INSIDE_OF_PROPERTY: {
+ subjRes = getSubject(attrs);
+ if (subjRes == null) {
+ // error during subject processing was ignored so we need to skip next steps
+ return;
+ }
+
+ if (mode != PARSE_TYPE_COLLECTION && !subjStack.isEmpty()) {
+ processNonLiteralTriple(subjStack.peek(), predIri, subjRes);
+ }
+
+ if (!iri.equals(RDF.DESCRIPTION)) {
+ if (iri.equals(RDF.LI)) {
+ error(qname + IS_NOT_ALLOWED_HERE);
+ } else {
+ sink.addNonLiteral(subjRes, RDF.TYPE, iri);
+ }
+ }
+
+ processResourceAttrs(qname, attrs);
+
+ subjStack.push(subjRes);
+ subjLiIndexStack.push(1);
+ if (mode == INSIDE_OF_PROPERTY) {
+ mode = INSIDE_OF_RESOURCE;
+ }
+ break;
+ }
+ case PARSE_TYPE_RESOURCE:
+ case INSIDE_OF_RESOURCE: {
+ int liIndex = subjLiIndexStack.pop();
+
+ boolean correctProperty = checkPropertyForErrors(qname, iri, attrs);
+
+ if (!correctProperty) {
+ // error during property processing was ignored so we need to skip next steps
+ return;
+ }
+
+ predIri = iri;
+ if (predIri.equals(RDF.LI)) {
+ predIri = RDF.NS + "_" + liIndex++;
+ }
+ subjLiIndexStack.push(liIndex);
+
+ String nodeId = attrs.getValue(RDF.NS, ID_ATTR);
+ if (nodeId != null) {
+ reifyIri = resolveIRINoResolve(baseStack.peek(), nodeId);
+ }
+
+ captureLiteral = true;
+ mode = INSIDE_OF_PROPERTY;
+ processPropertyAttrs(nsUri, attrs);
+ if (captureLiteral) {
+ parse = new StringBuilder();
+ }
+ break;
+ }
+ default:
+ throw new IllegalStateException("Unknown mode = " + mode);
+ }
+ }
+
+ private void processPendingTriples(boolean forceNewBNode) {
+ Iterator iterator = pendingTriples.iterator();
+ while (iterator.hasNext()) {
+ String propRes = iterator.next();
+ String attr = iterator.next();
+ String value = iterator.next();
+ if (forceNewBNode || propRes == null) {
+ String bnode = newBnode();
+ processNonLiteralTriple(subjRes, predIri, bnode);
+ sink.addPlainLiteral(bnode, attr, value, langStack.peek());
+ } else {
+ sink.addPlainLiteral(propRes, attr, value, langStack.peek());
+ }
+ }
+ pendingTriples.clear();
+ }
+
+ private boolean checkPropertyForErrors(String qname, String iri, Attributes attrs) throws SAXException {
+ if (iri.equals(RDF.NIL) || iri.equals(RDF.DESCRIPTION)) {
+ error(qname + IS_NOT_ALLOWED_HERE);
+ return false;
+ }
+ if (!RIUtils.isIri(iri)) {
+ error("Invalid property IRI");
+ return false;
+ }
+
+ if (attrs.getValue(RDF.NS, "resource") != null && attrs.getValue(RDF.NS, NODE_ID_ATTR) != null) {
+ error("Both rdf:resource and rdf:nodeID are present");
+ return false;
+ }
+ if (attrs.getValue(RDF.NS, "parseType") != null && !isAttrsValidForParseType(attrs)) {
+ error("rdf:parseType conflicts with other attributes");
+ return false;
+ }
+ return true;
+ }
+
+ private void processResourceAttrs(String qname, Attributes attrs) throws SAXException {
+ for (int i = 0; i < attrs.getLength(); i++) {
+ String tag = attrs.getURI(i) + attrs.getLocalName(i);
+ if (tag.equals(RDF.NODEID) || tag.equals(RDF.ABOUT) || tag.equals(RDF.ID)
+ || attrs.getQName(i).startsWith(XMLConstants.XML_NS_PREFIX)) {
+ continue;
+ }
+ String value = attrs.getValue(i);
+ if (tag.equals(RDF.TYPE)) {
+ sink.addNonLiteral(subjRes, RDF.TYPE, value);
+ } else {
+ if (violatesSchema(tag) || tag.equals(RDF.LI)) {
+ error(qname + IS_NOT_ALLOWED_HERE);
+ } else {
+ sink.addPlainLiteral(subjRes, tag, value, langStack.peek());
+ }
+ }
+ }
+ }
+
+ private void processPropertyAttrs(String nsUri, Attributes attrs) throws SAXException {
+ // process resource first
+ int resIdx = attrs.getIndex(RDF.NS, "resource");
+ String propertyRes = null;
+ if (resIdx >= 0) {
+ propertyRes = processPropertyRes(attrs.getValue(resIdx));
+ }
+
+ for (int i = 0; i < attrs.getLength(); i++) {
+ if (i == resIdx) {
+ continue;
+ }
+ String attr = attrs.getURI(i) + attrs.getLocalName(i);
+ if (attrs.getQName(i).startsWith(XMLConstants.XML_NS_PREFIX) || attr.equals(RDF.ID)) {
+ continue;
+ }
+ processPropertyTagAttr(nsUri, attr, attrs.getValue(i), propertyRes);
+ }
+ }
+
+ private void processLangAndBase(Attributes attrs) throws SAXException {
+ String lang = langStack.peek();
+ if (attrs.getValue(XmlUtils.XML_LANG) != null) {
+ lang = attrs.getValue(XmlUtils.XML_LANG);
+ }
+ langStack.push(lang);
+
+ String base = baseStack.peek();
+ if (attrs.getValue(XmlUtils.XML_BASE) != null) {
+ base = attrs.getValue(XmlUtils.XML_BASE);
+ if (base.contains("#")) {
+ base = base.substring(0, base.lastIndexOf('#'));
+ }
+ base += '#';
+ if (!RIUtils.isAbsoluteIri(base)) {
+ error("Invalid base IRI");
+ base = baseStack.peek();
+ }
+ }
+ baseStack.push(base);
+ }
+
+ private String processPropertyRes(String value) throws SAXException {
+ String propertyRes = resolveIRI(baseStack.peek(), value);
+ if (propertyRes != null) {
+ processNonLiteralTriple(subjRes, predIri, propertyRes);
+ captureLiteral = false;
+ }
+ return propertyRes;
+ }
+
+ private void processPropertyTagAttr(String nsUri, String attr, String value,
+ String propertyRes) throws SAXException {
+ if (attr.equals(RDF.DATATYPE)) {
+ datatypeIri = resolveIRINoResolve(nsUri, value);
+ } else if (attr.equals(RDF.PARSE_TYPE)) {
+ parseDepth = 1;
+ if (value.equalsIgnoreCase(PARSE_LITERAL_VALUE)) {
+ parse = new StringBuilder();
+ mode = PARSE_TYPE_LITERAL;
+ } else if (value.equalsIgnoreCase(PARSE_RESOURCE_VALUE)) {
+ String bnode = newBnode();
+ processNonLiteralTriple(subjRes, predIri, bnode);
+ subjRes = bnode;
+ subjStack.push(subjRes);
+ subjLiIndexStack.push(1);
+ mode = PARSE_TYPE_RESOURCE;
+ } else if (value.equalsIgnoreCase(PARSE_COLLECTION_VALUE)) {
+ String bnode = newBnode();
+ sink.addNonLiteral(subjRes, predIri, bnode);
+ subjRes = bnode;
+ seqTailRes = null;
+ subjStack.push(bnode);
+ subjLiIndexStack.push(1);
+ mode = PARSE_TYPE_COLLECTION;
+ }
+ captureLiteral = false;
+ } else if (attr.equals(RDF.NODEID)) {
+ if (!XmlUtils.isValidNCName(value)) {
+ error("Invalid nodeID");
+ } else {
+ String id = RDF.BNODE_PREFIX + 'n' + value.hashCode();
+ processNonLiteralTriple(subjRes, predIri, id);
+ captureLiteral = false;
+ }
+ } else {
+ if (violatesSchema(attr) || attr.equals(RDF.NIL)) {
+ error(attr + IS_NOT_ALLOWED_HERE);
+ } else {
+ pendingTriples.add(propertyRes);
+ pendingTriples.add(attr);
+ pendingTriples.add(value);
+ captureLiteral = false;
+ }
+ }
+ }
+
+ @Override
+ public void endElement(String namespaceUri, String lname, String qname) throws SAXException {
+ processPendingTriples(false);
+ if (parseDepth > 0) {
+ parseDepth--;
+ if (mode == PARSE_TYPE_LITERAL && parseDepth > 0) {
+ parse.append("").append(qname).append(">");
+ return;
+ }
+ }
+ if (subjStack.isEmpty()) {
+ return;
+ }
+
+ switch (mode) {
+ case PARSE_TYPE_RESOURCE:
+ case INSIDE_OF_RESOURCE: {
+ subjStack.pop();
+ if (!subjStack.isEmpty()) {
+ subjRes = subjStack.peek();
+ }
+ subjLiIndexStack.pop();
+ if (mode == INSIDE_OF_RESOURCE) {
+ mode = INSIDE_OF_PROPERTY;
+ } else {
+ mode = INSIDE_OF_RESOURCE;
+ }
+ break;
+ }
+ case PARSE_TYPE_COLLECTION: {
+ subjStack.pop();
+ subjLiIndexStack.pop();
+ if (parseDepth > 0) {
+ if (seqTailRes == null) {
+ seqTailRes = subjStack.peek();
+ sink.addNonLiteral(seqTailRes, RDF.FIRST, subjRes);
+ } else {
+ String bnode = newBnode();
+ sink.addNonLiteral(seqTailRes, RDF.REST, bnode);
+ sink.addNonLiteral(bnode, RDF.FIRST, subjRes);
+ seqTailRes = bnode;
+ }
+ } else {
+ sink.addNonLiteral(seqTailRes, RDF.REST, RDF.NIL);
+ if (!subjStack.isEmpty()) {
+ subjRes = subjStack.peek();
+ }
+ mode = INSIDE_OF_RESOURCE;
+ }
+ break;
+ }
+ case INSIDE_OF_PROPERTY: {
+ if (captureLiteral) {
+ String value = parse.toString();
+ if (datatypeIri != null) {
+ processLiteralTriple(subjRes, predIri, value, datatypeIri, true);
+ } else {
+ processLiteralTriple(subjRes, predIri, value, langStack.peek(), false);
+ }
+ captureLiteral = false;
+ }
+ mode = INSIDE_OF_RESOURCE;
+ break;
+ }
+ case PARSE_TYPE_LITERAL: {
+ processLiteralTriple(subjRes, predIri, parse.toString(), RDF.XML_LITERAL, true);
+ mode = INSIDE_OF_RESOURCE;
+ break;
+ }
+ case ERROR_RECOVERY: {
+ mode = modeStack.pop();
+ return;
+ }
+ default:
+ throw new IllegalStateException("Unknown mode = " + mode);
+ }
+ langStack.pop();
+ baseStack.pop();
+ // TODO: fix modeStack
+ short savedMode = modeStack.pop();
+ if (savedMode == PARSE_TYPE_RESOURCE) {
+ mode = savedMode;
+ }
+ }
+
+ private boolean isAttrsValidForParseType(Attributes attrs) {
+ for (int i = 0; i < attrs.getLength(); i++) {
+ if (attrs.getQName(i).startsWith("xml")) {
+ continue;
+ }
+ String uri = attrs.getURI(i) + attrs.getLocalName(i);
+ if (uri.equals(RDF.PARSE_TYPE) || uri.equals(RDF.ID)) {
+ continue;
+ }
+ return false;
+ }
+ return true;
+ }
+
+ private void processNonLiteralTriple(String subj, String pred, String obj) {
+ sink.addNonLiteral(subj, pred, obj);
+ if (reifyIri != null) {
+ sink.addNonLiteral(reifyIri, RDF.TYPE, RDF.STATEMENT);
+ sink.addNonLiteral(reifyIri, RDF.SUBJECT, subj);
+ sink.addNonLiteral(reifyIri, RDF.PREDICATE, pred);
+ sink.addNonLiteral(reifyIri, RDF.OBJECT, obj);
+ reifyIri = null;
+ }
+ }
+
+ private void processLiteralTriple(String subj, String pred, String value, String langOrDt, boolean typed) {
+ if (typed) {
+ sink.addTypedLiteral(subj, pred, value, langOrDt);
+ } else {
+ sink.addPlainLiteral(subj, pred, value, langOrDt);
+ }
+ if (reifyIri != null) {
+ sink.addNonLiteral(reifyIri, RDF.TYPE, RDF.STATEMENT);
+ sink.addNonLiteral(reifyIri, RDF.SUBJECT, subj);
+ sink.addNonLiteral(reifyIri, RDF.PREDICATE, pred);
+ if (typed) {
+ sink.addTypedLiteral(reifyIri, RDF.OBJECT, value, langOrDt);
+ } else {
+ sink.addPlainLiteral(reifyIri, RDF.OBJECT, value, langOrDt);
+ }
+ reifyIri = null;
+ }
+ }
+
+ private String getSubject(Attributes attrs) throws SAXException {
+ int count = 0;
+ String result = null;
+ String attrValue = attrs.getValue(RDF.NS, ABOUT_ATTR);
+ if (attrValue != null) {
+ result = resolveIRI(baseStack.peek(), attrValue);
+ if (result != null) {
+ count++;
+ }
+ }
+ attrValue = attrs.getValue(RDF.NS, ID_ATTR);
+ if (attrValue != null) {
+ result = resolveIRINoResolve(baseStack.peek(), attrValue);
+ if (result != null) {
+ if (processedIDs.contains(result)) {
+ error("Duplicate definition for resource ID = " + result);
+ return null;
+ }
+ processedIDs.add(result);
+ count++;
+ }
+ }
+ attrValue = attrs.getValue(RDF.NS, NODE_ID_ATTR);
+ if (attrValue != null) {
+ result = RDF.BNODE_PREFIX + 'n' + attrValue.hashCode();
+ count++;
+ }
+ if (count == 0) {
+ return newBnode();
+ }
+ if (count > 1) {
+ error("Ambiguous identifier definition");
+ return null;
+ }
+ return result;
+ }
+
+ private String newBnode() {
+ bnodeId++;
+ return RDF.BNODE_PREFIX + 'n' + bnodeId;
+ }
+
+ /**
+ * Resolves specified IRI ignoring special cases
+ * @param baseIri base to resolve against
+ * @param iri IRI to resolve
+ * @return resolved IRI or null on error
+ * @throws SAXException
+ */
+ private String resolveIRINoResolve(String baseIri, String iri) throws SAXException {
+ if (RIUtils.isAbsoluteIri(iri)) {
+ return iri;
+ }
+ if (!XmlUtils.isValidNCName(iri)) {
+ error("Vocab term must be a valid NCName");
+ return null;
+ }
+ String result = baseIri + iri;
+ if (RIUtils.isAbsoluteIri(result)) {
+ return result;
+ }
+ error("Malformed IRI: " + iri);
+ return null;
+ }
+
+ /**
+ * Resolves specified IRI
+ * @param baseIri base to resolve against
+ * @param iri IRI to resolve
+ * @return resolved IRI or null on error
+ * @throws SAXException
+ */
+ private String resolveIRI(String baseIri, String iri) throws SAXException {
+ try {
+ return RIUtils.resolveIri(baseIri, iri);
+ } catch (MalformedIriException e) {
+ error(e.getMessage());
+ return null;
+ }
+ }
+
+ @Override
+ public void startDocument() throws SAXException {
+ mode = INSIDE_OF_PROPERTY;
+ sink.setBaseUri(baseUri);
+ baseStack.push(baseUri);
+ langStack.push(null);
+ captureLiteral = false;
+ subjRes = null;
+ seqTailRes = null;
+ predIri = null;
+ datatypeIri = null;
+ reifyIri = null;
+ parseDepth = 0;
+ }
+
+ @Override
+ public void endDocument() throws SAXException {
+ langStack.clear();
+ baseStack.clear();
+ subjStack.clear();
+ modeStack.clear();
+ subjLiIndexStack.clear();
+ nsMappings.clear();
+ processedIDs.clear();
+ parse = new StringBuilder();
+ pendingTriples.clear();
+ }
+
+ @Override
+ public void characters(char[] buffer, int offset, int length) throws SAXException {
+ processPendingTriples(true);
+ if (mode == PARSE_TYPE_LITERAL || captureLiteral) {
+ parse.append(String.copyValueOf(buffer, offset, length));
+ }
+ }
+
+ @Override
+ public void ignorableWhitespace(char[] buffer, int offset, int length) throws SAXException {
+ characters(buffer, offset, length);
+ }
+
+ @Override
+ public void processingInstruction(String target, String data) throws SAXException {
+ processPendingTriples(true);
+ if (parseDepth > 0 && mode == PARSE_TYPE_LITERAL) {
+ parse.append("").append(target).append(" ").append(data).append("?>");
+ }
+ }
+
+ @Override
+ public void comment(char[] buffer, int offset, int length) throws SAXException {
+ processPendingTriples(true);
+ if (parseDepth > 0 && mode == PARSE_TYPE_LITERAL) {
+ parse.append("");
+ }
+ }
+
+ @Override
+ public void startPrefixMapping(String abbr, String uri) throws SAXException {
+ if (mode == PARSE_TYPE_LITERAL) {
+ nsMappings.put(abbr, uri);
+ }
+ }
+
+ @Override
+ public void setBaseUri(String baseUri) {
+ if (baseUri != null && !baseUri.isEmpty() && Character.isLetter(baseUri.charAt(baseUri.length() - 1))) {
+ this.baseUri = baseUri + "#";
+ } else {
+ this.baseUri = baseUri == null ? "" : baseUri;
+ }
+ }
+
+ @Override
+ public void setDocumentLocator(Locator arg0) {
+ }
+
+ @Override
+ public void skippedEntity(String arg0) throws SAXException {
+ }
+
+ @Override
+ public void endPrefixMapping(String arg0) throws SAXException {
+ }
+
+ @Override
+ public void endCDATA() throws SAXException {
+ }
+
+ @Override
+ public void endDTD() throws SAXException {
+ }
+
+ @Override
+ public void endEntity(String arg0) throws SAXException {
+ }
+
+ @Override
+ public void startCDATA() throws SAXException {
+ }
+
+ @Override
+ public void startDTD(String arg0, String arg1, String arg2) throws SAXException {
+ }
+
+ @Override
+ public void startEntity(String arg0) throws SAXException {
+ }
+
+ @Override
+ public io.github.sparqlanything.html.org.semarglproject.rdf.ParseException processException(SAXException e) {
+ Throwable cause = e.getCause();
+ if (cause instanceof io.github.sparqlanything.html.org.semarglproject.rdf.ParseException) {
+ return (ParseException) cause;
+ }
+ return new ParseException(e);
+ }
+
+ @Override
+ protected boolean setPropertyInternal(String key, Object value) {
+ if (StreamProcessor.PROCESSOR_GRAPH_HANDLER_PROPERTY.equals(key) && value instanceof io.github.sparqlanything.html.org.semarglproject.rdf.ProcessorGraphHandler) {
+ processorGraphHandler = (ProcessorGraphHandler) value;
+ } else if (StreamProcessor.ENABLE_ERROR_RECOVERY.equals(key) && value instanceof Boolean) {
+ ignoreErrors = (Boolean) value;
+ }
+ return false;
+ }
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/TurtleSerializer.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/TurtleSerializer.java
new file mode 100644
index 00000000..ee45d0c9
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/TurtleSerializer.java
@@ -0,0 +1,239 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.rdf;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.ParseException;
+import io.github.sparqlanything.html.org.semarglproject.sink.CharSink;
+import io.github.sparqlanything.html.org.semarglproject.sink.Pipe;
+import io.github.sparqlanything.html.org.semarglproject.sink.TripleSink;
+import io.github.sparqlanything.html.org.semarglproject.vocab.RDF;
+
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.Queue;
+import java.util.Set;
+
+/**
+ * Implementation of {@link TripleSink} which serializes triples to {@link CharSink} using
+ * Turtle syntax. *
+ */
+public final class TurtleSerializer extends Pipe implements TripleSink {
+
+ private static final String DOT_EOL = " .\n";
+ private static final String COMMA_EOL = " ,\n";
+ private static final String SEMICOLON_EOL = " ;\n";
+ private static final String EOL = "\n";
+
+ private static final String MULTILINE_QUOTE = "\"\"\"";
+ private static final char SINGLE_LINE_QUOTE = '"';
+ private static final char BNODE_START = '[';
+ private static final char BNODE_END = ']';
+ private static final char URI_START = '<';
+ private static final char URI_END = '>';
+
+ private static final char SPACE = ' ';
+ private static final char RDF_TYPE_ABBR = 'a';
+ private static final String INDENT = " ";
+
+ private String prevSubj;
+ private String prevPred;
+ private final Queue bnodeStack = new LinkedList();
+ private final Set namedBnodes = new HashSet();
+ private String baseUri;
+
+ private TurtleSerializer(CharSink sink) {
+ super(sink);
+ }
+
+ /**
+ * Creates instance of TurtleSerializer connected to specified sink.
+ * @param sink sink to be connected to
+ * @return instance of TurtleSerializer
+ */
+ public static TripleSink connect(CharSink sink) {
+ return new TurtleSerializer(sink);
+ }
+
+ @Override
+ public void addNonLiteral(String subj, String pred, String obj) {
+ try {
+ startTriple(subj, pred);
+ if (obj.startsWith(RDF.BNODE_PREFIX)) {
+ if (!namedBnodes.contains(obj) && obj.endsWith(RDF.SHORTENABLE_BNODE_SUFFIX)) {
+ openBnode(obj);
+ } else {
+ sink.process(obj);
+ }
+ } else {
+ serializeUri(obj);
+ }
+ } catch (ParseException e) {
+ // ignore
+ }
+ }
+
+ @Override
+ public void addPlainLiteral(String subj, String pred, String content, String lang) {
+ try {
+ startTriple(subj, pred);
+ addContent(content);
+ if (lang != null) {
+ sink.process('@').process(lang);
+ }
+ } catch (ParseException e) {
+ // ignore
+ }
+ }
+
+ @Override
+ public void addTypedLiteral(String subj, String pred, String content, String type) {
+ try {
+ startTriple(subj, pred);
+ addContent(content);
+ sink.process("^^");
+ serializeUri(type);
+ } catch (ParseException e) {
+ // ignore
+ }
+ }
+
+ @Override
+ public void startStream() throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ super.startStream();
+ prevSubj = null;
+ prevPred = null;
+ if (baseUri != null) {
+ sink.process("@base ").process(URI_START).process(baseUri).process(URI_END).process(DOT_EOL);
+ }
+ sink.process("@prefix rdf: ").process(URI_START).process(RDF.NS).process(URI_END).process(DOT_EOL);
+ bnodeStack.clear();
+ namedBnodes.clear();
+ }
+
+ @Override
+ public void endStream() throws ParseException {
+ while (!bnodeStack.isEmpty()) {
+ closeBnode();
+ }
+ if (prevPred != null) {
+ sink.process(DOT_EOL);
+ } else {
+ sink.process(EOL);
+ }
+ baseUri = null;
+ super.endStream();
+ }
+
+ @Override
+ protected boolean setPropertyInternal(String key, Object value) {
+ return false;
+ }
+
+ @Override
+ public void setBaseUri(String baseUri) {
+ this.baseUri = baseUri.substring(0, baseUri.length() - 1);
+ }
+
+ private void startTriple(String subj, String pred) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (subj.equals(prevSubj)) {
+ if (pred.equals(prevPred)) {
+ sink.process(COMMA_EOL);
+ indent(2);
+ } else if (prevPred != null) {
+ sink.process(SEMICOLON_EOL);
+ indent(1);
+ serializePredicate(pred);
+ } else {
+ indent(0);
+ serializePredicate(pred);
+ }
+ } else {
+ if (!bnodeStack.isEmpty()) {
+ closeBnode();
+ startTriple(subj, pred);
+ return;
+ } else if (prevSubj != null) {
+ sink.process(DOT_EOL);
+ }
+ if (subj.startsWith(RDF.BNODE_PREFIX)) {
+ if (subj.endsWith(RDF.SHORTENABLE_BNODE_SUFFIX)) {
+ openBnode(subj);
+ } else {
+ sink.process(subj).process(SPACE);
+ namedBnodes.add(subj);
+ }
+ } else {
+ serializeUri(subj);
+ }
+ serializePredicate(pred);
+ }
+ prevSubj = subj;
+ prevPred = pred;
+ }
+
+ private void serializePredicate(String pred) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ if (RDF.TYPE.equals(pred)) {
+ sink.process(RDF_TYPE_ABBR).process(SPACE);
+ } else {
+ serializeUri(pred);
+ }
+ }
+
+ private void serializeUri(String uri) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ String escapedUri = uri.replace("\\", "\\\\").replace(">", "\\u003E");
+ if (escapedUri.startsWith(RDF.NS)) {
+ sink.process("rdf:").process(escapedUri.substring(RDF.NS.length()));
+ } else if (baseUri != null && escapedUri.startsWith(baseUri)) {
+ sink.process(URI_START).process(escapedUri.substring(baseUri.length())).process(URI_END);
+ } else {
+ sink.process(URI_START).process(escapedUri).process(URI_END);
+ }
+ sink.process(SPACE);
+ }
+
+ private void indent(int additionalIndent) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ for (int i = 0; i < bnodeStack.size() + additionalIndent; i++) {
+ sink.process(INDENT);
+ }
+ }
+
+ private void addContent(String content) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ String escapedContent = content.replace("\\", "\\\\").replace("\"", "\\\"");
+ if (escapedContent.contains(EOL)) {
+ sink.process(MULTILINE_QUOTE).process(escapedContent).process(MULTILINE_QUOTE);
+ } else {
+ sink.process(SINGLE_LINE_QUOTE).process(escapedContent).process(SINGLE_LINE_QUOTE);
+ }
+ }
+
+ private void openBnode(String obj) throws io.github.sparqlanything.html.org.semarglproject.rdf.ParseException {
+ sink.process(BNODE_START);
+ bnodeStack.offer(obj);
+ prevSubj = obj;
+ prevPred = null;
+ }
+
+ private void closeBnode() throws ParseException {
+ sink.process(BNODE_END);
+ bnodeStack.poll();
+ prevSubj = bnodeStack.peek();
+ prevPred = null;
+ if (prevSubj == null) {
+ sink.process(DOT_EOL);
+ }
+ }
+
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/rdfa/DocumentContext.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/rdfa/DocumentContext.java
new file mode 100644
index 00000000..94eaebe8
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/rdfa/DocumentContext.java
@@ -0,0 +1,144 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.rdf.rdfa;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.RdfaParser;
+import io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.Vocabulary;
+import io.github.sparqlanything.html.org.semarglproject.ri.RIUtils;
+import io.github.sparqlanything.html.org.semarglproject.ri.MalformedIriException;
+import io.github.sparqlanything.html.org.semarglproject.vocab.RDF;
+import io.github.sparqlanything.html.org.semarglproject.vocab.RDFa;
+
+import java.util.HashMap;
+import java.util.Map;
+
+final class DocumentContext {
+
+ static final short FORMAT_UNKNOWN = 0;
+ static final short FORMAT_HTML4 = 1;
+ static final short FORMAT_HTML5 = 2;
+ static final short FORMAT_XML = 3;
+ static final short FORMAT_SVG = 4;
+
+ private static final String RDFA_10_STRING = "rdfa 1.0";
+
+ private static final String HTML_ROOT_ELEMENT = "html";
+ private static final String HTML_BASE = "base";
+ private static final String SVG_ROOT_ELEMENT = "svg";
+
+ short documentFormat;
+ short rdfaVersion;
+
+ final io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.RdfaParser parser;
+
+ String base;
+ String originUri;
+
+ private Map bnodeMapping = new HashMap();
+ private int nextBnodeId;
+
+ DocumentContext(RdfaParser parser) {
+ this.parser = parser;
+ nextBnodeId = 0;
+ clear();
+ }
+
+ String resolveBNode(String value) {
+ if (value.startsWith(RDF.BNODE_PREFIX) || value.startsWith('[' + RDF.BNODE_PREFIX)
+ && value.charAt(value.length() - 1) == ']') {
+ String name;
+ if (value.charAt(0) == '[') {
+ name = value.substring(RDF.BNODE_PREFIX.length() + 1, value.length() - 1);
+ } else {
+ name = value.substring(RDF.BNODE_PREFIX.length());
+ }
+ if (!bnodeMapping.containsKey(name)) {
+ bnodeMapping.put(name, createBnode(false));
+ }
+ return bnodeMapping.get(name);
+ }
+ return null;
+ }
+
+ void detectFormat(String localName, String qName, String version) {
+ if (documentFormat == FORMAT_UNKNOWN) {
+ if (localName.equals(SVG_ROOT_ELEMENT)) {
+ documentFormat = FORMAT_SVG;
+ } else if (localName.equalsIgnoreCase(HTML_ROOT_ELEMENT)) {
+ documentFormat = FORMAT_HTML4;
+ } else {
+ documentFormat = FORMAT_XML;
+ }
+ }
+ if (qName.equalsIgnoreCase(HTML_ROOT_ELEMENT) && version != null
+ && version.toLowerCase().contains(RDFA_10_STRING)) {
+ rdfaVersion = RDFa.VERSION_10;
+ }
+ }
+
+ void detectBase(String qName, String xmlBase, String hRef) {
+ boolean xmlBaseF = (documentFormat == FORMAT_XML || documentFormat == FORMAT_SVG) && xmlBase != null;
+ if (xmlBaseF || qName.equalsIgnoreCase(HTML_BASE) && hRef != null) {
+ base = (xmlBaseF ? xmlBase : hRef).replaceAll("#.*", "");
+ }
+ }
+
+ String createBnode(boolean shortenable) {
+ if (shortenable) {
+ return RDF.BNODE_PREFIX + 'n' + (nextBnodeId++) + RDF.SHORTENABLE_BNODE_SUFFIX;
+ }
+ return RDF.BNODE_PREFIX + 'n' + nextBnodeId++;
+ }
+
+ void processDtd(String name, String publicId, String systemId) {
+ if (publicId == null) {
+ if (HTML_ROOT_ELEMENT.equalsIgnoreCase(name)) {
+ documentFormat = FORMAT_HTML5;
+ }
+ } else {
+ String publicIdLower = publicId.toLowerCase();
+ if (publicIdLower.contains(HTML_ROOT_ELEMENT)) {
+ documentFormat = FORMAT_HTML4;
+ }
+ if (publicIdLower.contains(RDFA_10_STRING)) {
+ rdfaVersion = RDFa.VERSION_10;
+ }
+ }
+ }
+
+ String resolveIri(String iri) throws MalformedIriException {
+ return RIUtils.resolveIri(base, iri);
+ }
+
+ void clear() {
+ rdfaVersion = parser.getRdfaVersion();
+ documentFormat = FORMAT_UNKNOWN;
+ bnodeMapping = new HashMap();
+ base = null;
+ originUri = null;
+ }
+
+ io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.Vocabulary loadVocabulary(String vocabUrl) {
+ return parser.loadVocabulary(vocabUrl);
+ }
+
+ void setBaseUri(String baseUri) {
+ if (base == null) {
+ originUri = baseUri;
+ }
+ this.base = baseUri;
+ }
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/rdfa/EvalContext.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/rdfa/EvalContext.java
new file mode 100644
index 00000000..4a1e7b22
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/rdfa/EvalContext.java
@@ -0,0 +1,354 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.rdf.rdfa;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.DocumentContext;
+import io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.RdfaParser;
+import io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.Vocabulary;
+import io.github.sparqlanything.html.org.semarglproject.ri.RIUtils;
+import io.github.sparqlanything.html.org.semarglproject.ri.MalformedCurieException;
+import io.github.sparqlanything.html.org.semarglproject.ri.MalformedIriException;
+import io.github.sparqlanything.html.org.semarglproject.vocab.RDFa;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.regex.Pattern;
+
+final class EvalContext {
+
+ // Initial context described in http://www.w3.org/2011/rdfa-context/rdfa-1.1.html
+ private static final Map RDFA11_INITIAL_CONTEXT = new HashMap();
+ private static final Pattern TERM_PATTERN = Pattern.compile("[a-zA-Z0-9_-]+", Pattern.DOTALL);
+
+ private static final String CAN_NOT_RESOLVE_TERM = "Can't resolve term ";
+
+ private static final String XHTML_VOCAB = "http://www.w3.org/1999/xhtml/vocab#";
+ private static final String POWDER_DESCRIBED_BY = "http://www.w3.org/2007/05/powder-s#describedby";
+
+ private static final String[] XHTML_VOCAB_PROPS = {
+ // XHTML Metainformation Vocabulary
+ "alternate", "appendix", "bookmark", "cite", "chapter", "contents",
+ "copyright", "first", "glossary", "help", "icon", "index", "itsRules",
+ "last", "license", "meta", "next", "p3pv1", "prev", "previous", "role",
+ "section", "stylesheet", "subsection", "start","top", "up",
+
+ // Items from the XHTML Role Module
+ "banner", "complementary", "contentinfo", "definition", "main",
+ "navigation", "note", "search",
+
+ // Items from the Accessible Rich Internet Applications Vocabulary
+ "alert", "alertdialog", "application", "article", "button", "checkbox",
+ "columnheader", "combobox", "dialog", "directory", "document", "form",
+ "grid", "gridcell", "group", "heading", "img", "link", "list", "listbox",
+ "listitem", "log", "marquee", "math", "menu", "menubar", "menuitem",
+ "menuitemcheckbox", "menuitemradio", "option", "presentation",
+ "progressbar", "radio", "radiogroup", "region", "row", "rowgroup",
+ "rowheader", "scrollbar", "separator", "slider", "spinbutton", "status",
+ "tab", "tablist", "tabpanel", "textbox", "timer", "toolbar", "tooltip",
+ "tree", "treegrid", "treeitem"
+ };
+
+ static {
+ // Vocabulary Prefixes of W3C Documents
+ RDFA11_INITIAL_CONTEXT.put("owl", "http://www.w3.org/2002/07/owl#");
+ RDFA11_INITIAL_CONTEXT.put("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
+ RDFA11_INITIAL_CONTEXT.put("rdfs", "http://www.w3.org/2000/01/rdf-schema#");
+ RDFA11_INITIAL_CONTEXT.put("rdfa", "http://www.w3.org/ns/rdfa#");
+ RDFA11_INITIAL_CONTEXT.put("xhv", "http://www.w3.org/1999/xhtml/vocab#");
+ RDFA11_INITIAL_CONTEXT.put("xsd", "http://www.w3.org/2001/XMLSchema#");
+ RDFA11_INITIAL_CONTEXT.put("grddl", "http://www.w3.org/2003/g/data-view#");
+ RDFA11_INITIAL_CONTEXT.put("ma", "http://www.w3.org/ns/ma-ont#");
+ RDFA11_INITIAL_CONTEXT.put("rif", "http://www.w3.org/2007/rif#");
+ RDFA11_INITIAL_CONTEXT.put("skos", "http://www.w3.org/2004/02/skos/core#");
+ RDFA11_INITIAL_CONTEXT.put("skosxl", "http://www.w3.org/2008/05/skos-xl#");
+ RDFA11_INITIAL_CONTEXT.put("wdr", "http://www.w3.org/2007/05/powder#");
+ RDFA11_INITIAL_CONTEXT.put("void", "http://rdfs.org/ns/void#");
+ RDFA11_INITIAL_CONTEXT.put("wdrs", "http://www.w3.org/2007/05/powder-s#");
+ RDFA11_INITIAL_CONTEXT.put("xml", "http://www.w3.org/XML/1998/namespace");
+
+ // Widely used Vocabulary prefixes
+ RDFA11_INITIAL_CONTEXT.put("cc", "http://creativecommons.org/ns#");
+ RDFA11_INITIAL_CONTEXT.put("ctag", "http://commontag.org/ns#");
+ RDFA11_INITIAL_CONTEXT.put("dc", "http://purl.org/dc/terms/");
+ RDFA11_INITIAL_CONTEXT.put("dcterms", "http://purl.org/dc/terms/");
+ RDFA11_INITIAL_CONTEXT.put("foaf", "http://xmlns.com/foaf/0.1/");
+ RDFA11_INITIAL_CONTEXT.put("gr", "http://purl.org/goodrelations/v1#");
+ RDFA11_INITIAL_CONTEXT.put("ical", "http://www.w3.org/2002/12/cal/icaltzd#");
+ RDFA11_INITIAL_CONTEXT.put("og", "http://ogp.me/ns#");
+ RDFA11_INITIAL_CONTEXT.put("rev", "http://purl.org/stuff/rev#");
+ RDFA11_INITIAL_CONTEXT.put("sioc", "http://rdfs.org/sioc/ns#");
+ RDFA11_INITIAL_CONTEXT.put("v", "http://rdf.data-vocabulary.org/#");
+ RDFA11_INITIAL_CONTEXT.put("vcard", "http://www.w3.org/2006/vcard/ns#");
+ RDFA11_INITIAL_CONTEXT.put("schema", "http://schema.org/");
+ }
+
+ Map iriMappings;
+ String subject;
+ String object;
+ List incomplTriples;
+ String lang;
+ String objectLit;
+ String objectLitDt;
+ List properties;
+ boolean parsingLiteral;
+ Map> listMapping;
+
+ private final DocumentContext documentContext;
+ private io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.Vocabulary vocab;
+ private String profile;
+
+ private EvalContext(String lang, io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.Vocabulary vocab, String profile, DocumentContext documentContext) {
+ this.subject = null;
+ this.object = null;
+ this.iriMappings = null;
+ this.incomplTriples = new ArrayList();
+ this.lang = lang;
+ this.objectLit = null;
+ this.objectLitDt = null;
+ this.vocab = vocab;
+ this.profile = profile;
+ this.properties = null;
+ this.parsingLiteral = false;
+ this.listMapping = null;
+ this.documentContext = documentContext;
+ }
+
+ static EvalContext createInitialContext(DocumentContext documentContext) {
+ // RDFa Core 1.0 processing sequence step 1
+ EvalContext initialContext = new EvalContext(null, null, null, documentContext);
+ initialContext.subject = documentContext.base;
+ initialContext.listMapping = new HashMap>();
+ initialContext.iriMappings = new TreeMap();
+ return initialContext;
+ }
+
+ EvalContext initChildContext(String profile, String vocab, String lang,
+ Map overwriteMappings) {
+ // RDFa Core 1.0 processing sequence step 2
+ EvalContext current = new EvalContext(this.lang, this.vocab, this.profile, documentContext);
+ current.listMapping = this.listMapping;
+ current.initPrefixMappings(iriMappings, overwriteMappings);
+
+ if (documentContext.rdfaVersion > RDFa.VERSION_10) {
+ if (profile != null) {
+ String newProfile = profile + "#";
+ if (current.profile == null) {
+ current.profile = newProfile;
+ } else {
+ current.profile = newProfile + ' ' + current.profile;
+ }
+ }
+ if (vocab != null) {
+ if (vocab.length() == 0) {
+ current.vocab = null;
+ } else {
+ current.vocab = documentContext.loadVocabulary(vocab);
+ }
+ }
+ }
+
+ // RDFa Core 1.0 processing sequence step 3
+ if (lang != null) {
+ current.lang = lang;
+ }
+ if (current.lang != null && current.lang.isEmpty()) {
+ current.lang = null;
+ }
+ return current;
+ }
+
+ private void initPrefixMappings(Map parentMappings, Map overwriteMappings) {
+ if (overwriteMappings.isEmpty()) {
+ iriMappings = parentMappings;
+ } else {
+ iriMappings = new TreeMap(parentMappings);
+ iriMappings.putAll(overwriteMappings);
+ }
+
+ if (documentContext.rdfaVersion > RDFa.VERSION_10) {
+ for (String prefix : overwriteMappings.keySet()) {
+ String standardMapping = RDFA11_INITIAL_CONTEXT.get(prefix);
+ String newMapping = overwriteMappings.get(prefix);
+ if (standardMapping != null && !standardMapping.equals(newMapping)) {
+ documentContext.parser.warning(RDFa.PREFIX_REDEFINITION, "Standard prefix "
+ + prefix + ": redefined to <" + newMapping + '>');
+ }
+ }
+ }
+ }
+
+ List getMappingForIri(String iri) {
+ if (!listMapping.containsKey(iri)) {
+ listMapping.put(iri, new ArrayList());
+ }
+ return listMapping.get(iri);
+ }
+
+ void addContent(String content) {
+ objectLit += content;
+ }
+
+ void updateBase(String oldBase, String base) {
+ if (object != null && object.equals(oldBase)) {
+ object = base;
+ }
+ if (subject != null && subject.equals(oldBase)) {
+ subject = base;
+ }
+ }
+
+ /**
+ * Resolves @predicate or @datatype according to RDFa Core 1.1 section 5
+ *
+ * @param value value of attribute
+ * @return resource IRI
+ * @throws MalformedIriException if IRI can not be resolved
+ */
+ String resolvePredOrDatatype(String value) throws MalformedIriException {
+ if (value == null || value.isEmpty()) {
+ throw new MalformedIriException("Empty predicate or datatype found");
+ }
+ if (value == io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.RdfaParser.AUTODETECT_DATE_DATATYPE) {
+ return RdfaParser.AUTODETECT_DATE_DATATYPE;
+ }
+ return resolveTermOrCurieOrAbsIri(value);
+ }
+
+ /**
+ * Resolves @about or @resource according to RDFa Core 1.1 section 5
+ *
+ * @param value value of attribute
+ * @return resource IRI
+ * @throws MalformedIriException if IRI can not be resolved
+ */
+ String resolveAboutOrResource(String value) throws MalformedIriException {
+ String result = documentContext.resolveBNode(value);
+ if (result != null) {
+ return result;
+ }
+ return resolveCurieOrIri(value, false);
+ }
+
+ /**
+ * Resolves @role according to Role Attribute 1.0 section 4
+ * @param value value of attribute
+ * @return role IRI
+ * @throws MalformedIriException if role can not be resolved
+ */
+ String resolveRole(String value) throws MalformedIriException {
+ if (TERM_PATTERN.matcher(value).matches()) {
+ return XHTML_VOCAB + value;
+ }
+ return resolveCurieOrIri(value, true);
+ }
+
+ /**
+ * Resolves TERMorCURIEorAbsIRI according to RDFa Core 1.1 section A
+ * @param value value to be resolved
+ * @return resource IRI
+ * @throws MalformedIriException if IRI can not be resolved
+ */
+ private String resolveTermOrCurieOrAbsIri(String value) throws MalformedIriException {
+ if (TERM_PATTERN.matcher(value).matches()) {
+ if (vocab == null && documentContext.rdfaVersion > RDFa.VERSION_10 && "describedby".equals(value)) {
+ return POWDER_DESCRIBED_BY;
+ }
+ String term;
+ if (vocab != null) {
+ term = vocab.resolveTerm(value);
+ } else {
+ term = resolveXhtmlTerm(value);
+ }
+ if (term == null) {
+ documentContext.parser.warning(RDFa.UNRESOLVED_TERM, CAN_NOT_RESOLVE_TERM + value);
+ throw new MalformedIriException(CAN_NOT_RESOLVE_TERM + value);
+ }
+ return term;
+ }
+ return resolveCurieOrIri(value, true);
+ }
+
+ Iterable expand(String pred) {
+ if (vocab == null) {
+ return Collections.EMPTY_LIST;
+ }
+ return vocab.expand(pred);
+ }
+
+ private String resolveCurieOrIri(String curie, boolean ignoreRelIri) throws MalformedIriException {
+ if (!ignoreRelIri && (curie == null || curie.isEmpty())) {
+ return documentContext.resolveIri(curie);
+ }
+ boolean safeSyntax = curie.startsWith("[") && curie.endsWith("]");
+ if (safeSyntax) {
+ curie = curie.substring(1, curie.length() - 1);
+ }
+
+ int delimPos = curie.indexOf(':');
+ if (delimPos == -1) {
+ if (safeSyntax || ignoreRelIri) {
+ throw new MalformedCurieException("CURIE with no prefix (" + curie + ") found");
+ }
+ return documentContext.resolveIri(curie);
+ }
+
+ String result = resolveMapping(curie, delimPos, safeSyntax);
+ if (RIUtils.isIri(result)) {
+ return result;
+ }
+ throw new MalformedIriException("Malformed IRI: " + curie);
+ }
+
+ private String resolveMapping(String curie, int delimPos, boolean safeSyntax) throws MalformedCurieException {
+ String localName = curie.substring(delimPos + 1);
+ String prefix = curie.substring(0, delimPos);
+
+ if (prefix.equals("_")) {
+ throw new MalformedCurieException("CURIE with invalid prefix (" + curie + ") found");
+ }
+
+ if (!iriMappings.containsKey(prefix)) {
+ if (documentContext.rdfaVersion > RDFa.VERSION_10 && RDFA11_INITIAL_CONTEXT.containsKey(prefix)) {
+ String nsUri = RDFA11_INITIAL_CONTEXT.get(prefix);
+ iriMappings.put(prefix, nsUri);
+ String result = nsUri + localName;
+ if (RIUtils.isIri(result)) {
+ return result;
+ }
+ throw new MalformedCurieException("Malformed CURIE (" + curie + ")");
+ }
+ if (!safeSyntax && RIUtils.isIri(curie)) {
+ return curie;
+ }
+ throw new MalformedCurieException("CURIE with unresolvable prefix found (" + curie + ")");
+ }
+ return iriMappings.get(prefix) + localName;
+ }
+
+ private static String resolveXhtmlTerm(String predicate) {
+ for (String link : XHTML_VOCAB_PROPS) {
+ if (link.equalsIgnoreCase(predicate)) {
+ return XHTML_VOCAB + link;
+ }
+ }
+ return null;
+ }
+
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/rdfa/RdfaParser.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/rdfa/RdfaParser.java
new file mode 100644
index 00000000..0c33c95d
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/rdfa/RdfaParser.java
@@ -0,0 +1,1359 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.rdf.rdfa;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.ParseException;
+import io.github.sparqlanything.html.org.semarglproject.rdf.ProcessorGraphHandler;
+import io.github.sparqlanything.html.org.semarglproject.rdf.RdfXmlParser;
+import io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.DocumentContext;
+import io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.EvalContext;
+import io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.VocabManager;
+import io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.Vocabulary;
+import io.github.sparqlanything.html.org.semarglproject.ri.MalformedCurieException;
+import io.github.sparqlanything.html.org.semarglproject.ri.MalformedIriException;
+import io.github.sparqlanything.html.org.semarglproject.ri.RIUtils;
+import io.github.sparqlanything.html.org.semarglproject.sink.Pipe;
+import io.github.sparqlanything.html.org.semarglproject.sink.TripleSink;
+import io.github.sparqlanything.html.org.semarglproject.sink.XmlSink;
+import io.github.sparqlanything.html.org.semarglproject.source.StreamProcessor;
+import io.github.sparqlanything.html.org.semarglproject.vocab.RDF;
+import io.github.sparqlanything.html.org.semarglproject.vocab.RDFa;
+import io.github.sparqlanything.html.org.semarglproject.vocab.XSD;
+import io.github.sparqlanything.html.org.semarglproject.xml.XmlUtils;
+import org.xml.sax.Attributes;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+import javax.xml.bind.DatatypeConverter;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+
+/**
+ * Implementation of streaming RDFa (1.0 and
+ * 1.1 ) parser. Supports HTML4, HTML5, XHTML1,
+ * XHTML5, XML and SVG inputs. Provides RDFa version and document syntax autodetection.
+ *
+ *
+ * List of supported options:
+ *
+ * {@link #RDFA_VERSION_PROPERTY}
+ * {@link StreamProcessor#PROCESSOR_GRAPH_HANDLER_PROPERTY}
+ * {@link #ENABLE_OUTPUT_GRAPH}
+ * {@link #ENABLE_PROCESSOR_GRAPH}
+ * {@link #ENABLE_VOCAB_EXPANSION}
+ *
+ */
+public final class RdfaParser extends Pipe implements XmlSink, TripleSink, ProcessorGraphHandler {
+
+ /**
+ * Used as a key with {@link #setProperty(String, Object)} method.
+ * RDFa version compatibility. Allowed values are {@link RDFa#VERSION_10} and {@link RDFa#VERSION_11}.
+ */
+ public static final String RDFA_VERSION_PROPERTY =
+ "http://semarglproject.org/rdfa/properties/version";
+
+ /**
+ * Used as a key with {@link #setProperty(String, Object)} method.
+ * Enables or disables generation of triples from output graph.
+ */
+ public static final String ENABLE_OUTPUT_GRAPH =
+ "http://semarglproject.org/rdfa/properties/enable-output-graph";
+
+ /**
+ * Used as a key with {@link #setProperty(String, Object)} method.
+ * Enables or disables generation of triples from processor graph.
+ * ProcessorGraphHandler will receive events regardless of this option.
+ */
+ public static final String ENABLE_PROCESSOR_GRAPH =
+ "http://semarglproject.org/rdfa/properties/enable-processor-graph";
+
+ /**
+ * Used as a key with {@link #setProperty(String, Object)} method.
+ * Enables or disables vocabulary
+ * expansion feature.
+ */
+ public static final String ENABLE_VOCAB_EXPANSION =
+ "http://semarglproject.org/rdfa/properties/enable-vocab-expansion";
+
+ static final String AUTODETECT_DATE_DATATYPE = "AUTODETECT_DATE_DATATYPE";
+
+ private static final ThreadLocal VOCAB_MANAGER = new ThreadLocal() {
+ @Override
+ protected VocabManager initialValue() {
+ return new VocabManager();
+ }
+ };
+
+ // flag used in incomplTriple list to indicate that following element should be
+ // treated as having @rev relation instead of @rel
+ private static final String REVERSED_TRIPLE_FLAG = null;
+ // flag used in listMapping list to indicate that following two elements represent literal object
+ // that allows to save some GC time and avoid creating literal objects hierarchy with following instanceof checks
+ private static final String LITERAL_OBJECT_FLAG = null;
+
+ private static final String BODY = "body";
+ private static final String HEAD = "head";
+ private static final String VERSION = "version";
+ private static final String METADATA = "metadata";
+
+ private static final String PLAIN_LITERAL = "";
+ private static final String XHTML_DEFAULT_XMLNS = "http://www.w3.org/1999/xhtml";
+
+ private static final String XHTML_VOCAB = "http://www.w3.org/1999/xhtml/vocab#";
+
+ // html5 support
+ private static final String DATETIME_ATTR = "datetime";
+ private static final String TIME_QNAME = "time";
+ private static final String VALUE_ATTR = "value";
+ private static final String DATA_ATTR = "data";
+ private static final String XML_BASE = "xml:base";
+
+ // keys for coalesce method
+ private static final String BASE_IF_HEAD_OR_BODY = "bihob";
+ private static final String BASE_IF_ROOT_NODE = "birn";
+ private static final String PARENT_OBJECT = "poie";
+ private static final String BNODE_IF_TYPEOF = RDFa.TYPEOF_ATTR;
+
+ private Deque contextStack = null;
+
+ private StringBuilder xmlString = null;
+ private List xmlStringPred = null;
+ private String xmlStringSubj = null;
+
+ private Short forcedRdfaVersion = null;
+ private boolean sinkOutputGraph;
+ private boolean sinkProcessorGraph;
+
+ private boolean expandVocab;
+ private final DocumentContext dh;
+ private final Splitter splitter;
+ private Locator locator = null;
+
+ private ProcessorGraphHandler processorGraphHandler = null;
+
+ private boolean rdfXmlInline = false;
+ private XmlSink rdfXmlParser = null;
+
+ private Map> patternProps = new HashMap>();
+ private List copyingPairs = new ArrayList();
+
+ private final Map overwriteMappings = new HashMap();
+
+ private RdfaParser(TripleSink sink) {
+ super(sink);
+ contextStack = new LinkedList();
+ dh = new DocumentContext(this);
+ splitter = new Splitter();
+ sinkProcessorGraph = true;
+ sinkOutputGraph = true;
+ expandVocab = false;
+ }
+
+ /**
+ * Creates instance of RdfaParser connected to specified sink
+ * @param sink sink to be connected to
+ * @return instance of RdfaParser
+ */
+ public static XmlSink connect(TripleSink sink) {
+ return new RdfaParser(sink);
+ }
+
+ @Override
+ public void startDocument() {
+ EvalContext initialContext = EvalContext.createInitialContext(dh);
+ initialContext.iriMappings.put("", XHTML_VOCAB);
+ contextStack.push(initialContext);
+
+ xmlString = null;
+ xmlStringPred = null;
+ xmlStringSubj = null;
+
+ rdfXmlInline = false;
+ rdfXmlParser = null;
+ }
+
+ @Override
+ public void endDocument() throws SAXException {
+ if (sinkOutputGraph) {
+ Iterator iterator = copyingPairs.iterator();
+ while (iterator.hasNext()) {
+ String subj = iterator.next();
+ String pattern = iterator.next();
+ if (patternProps.containsKey(pattern)) {
+ copyProps(subj, patternProps.get(pattern));
+ }
+ }
+
+ iterator = copyingPairs.iterator();
+ while (iterator.hasNext()) {
+ iterator.next();
+ String pattern = iterator.next();
+ patternProps.remove(pattern);
+ }
+ for (String pattern : patternProps.keySet()) {
+ addNonLiteralInternal(pattern, RDF.TYPE, RDFa.PATTERN);
+ copyProps(pattern, patternProps.get(pattern));
+ }
+ }
+
+ dh.clear();
+ contextStack.clear();
+ patternProps.clear();
+ copyingPairs.clear();
+ }
+
+ @Override
+ public void startElement(String nsUri, String localName, String qName, Attributes attrs) throws SAXException {
+ if (rdfXmlInline) {
+ rdfXmlParser.startElement(nsUri, localName, qName, attrs);
+ return;
+ } else if (dh.documentFormat == DocumentContext.FORMAT_SVG && localName.equals(METADATA)) {
+ if (rdfXmlParser == null) {
+ rdfXmlParser = RdfXmlParser.connect(this);
+ rdfXmlParser.setBaseUri(dh.base);
+ rdfXmlParser.startDocument();
+ }
+ rdfXmlInline = true;
+ return;
+ }
+
+ if (contextStack.size() < 4) {
+ String oldBase = dh.base;
+ dh.detectFormat(localName, qName, attrs.getValue(VERSION));
+ dh.detectBase(qName, attrs.getValue(XML_BASE), attrs.getValue(RDFa.HREF_ATTR));
+ if (!dh.base.equals(oldBase)) {
+ for (EvalContext ctx : contextStack) {
+ ctx.updateBase(oldBase, dh.base);
+ }
+ }
+ }
+
+ EvalContext parent = contextStack.peek();
+ if (parent.parsingLiteral) {
+ xmlString.append(XmlUtils.serializeOpenTag(nsUri, qName, parent.iriMappings, attrs, false));
+ }
+
+ if (dh.rdfaVersion > RDFa.VERSION_10 && attrs.getValue(RDFa.PREFIX_ATTR) != null) {
+ for (Iterator iterator = splitter.split(attrs.getValue(RDFa.PREFIX_ATTR)); iterator.hasNext(); ) {
+ String prefix = iterator.next();
+ int prefixLength = prefix.length();
+ if (prefixLength < 2 || prefix.charAt(prefixLength - 1) != ':' || !iterator.hasNext()) {
+ continue;
+ }
+ String uri = iterator.next();
+ startPrefixMapping(prefix.substring(0, prefixLength - 1), uri);
+ }
+ }
+
+ String lang = attrs.getValue(XmlUtils.XML_LANG);
+ if (lang == null) {
+ lang = attrs.getValue(XmlUtils.LANG);
+ }
+ EvalContext current = parent.initChildContext(attrs.getValue(RDFa.PROFILE_ATTR),
+ attrs.getValue(RDFa.VOCAB_ATTR), lang, overwriteMappings);
+ overwriteMappings.clear();
+
+ boolean skipTerms = dh.rdfaVersion > RDFa.VERSION_10 && attrs.getValue(RDFa.PROPERTY_ATTR) != null
+ && (dh.documentFormat == DocumentContext.FORMAT_HTML4
+ || dh.documentFormat == DocumentContext.FORMAT_HTML5);
+ List rels = convertRelRevToList(attrs.getValue(RDFa.REL_ATTR), skipTerms);
+ List revs = convertRelRevToList(attrs.getValue(RDFa.REV_ATTR), skipTerms);
+ boolean noRelsAndRevs = rels == null && revs == null;
+
+ boolean skipElement = findSubjectAndObject(qName, attrs, noRelsAndRevs, current, parent);
+
+ // don't fill parent list if subject was changed at this
+ // or previous step by current.parentObject
+ if (dh.rdfaVersion > RDFa.VERSION_10 && current.subject != null && (!current.subject.equals(parent.object)
+ || parent.subject != null && !parent.subject.equals(parent.object))) {
+ // RDFa Core 1.1 processing sequence step 8
+ current.listMapping = new HashMap>();
+ }
+
+ processRels(attrs, rels, current);
+ processRevs(revs, current);
+
+ if (current.object == null && !noRelsAndRevs) {
+ current.object = dh.createBnode(false);
+ }
+
+ processPropertyAttr(qName, attrs, current, parent, noRelsAndRevs);
+
+ if (dh.rdfaVersion > RDFa.VERSION_10) {
+ processRoleAttribute(attrs.getValue(RDFa.ID_ATTR), attrs.getValue(RDFa.ROLE_ATTR), current);
+ }
+
+ if (!skipElement) {
+ // RDFa Core 1.0 processing sequence step 10
+ // RDFa Core 1.1 processing sequence step 12
+ processIncompleteTriples(current, parent);
+ }
+
+ // RDFa Core 1.0 processing sequence step 11
+ // RDFa Core 1.1 processing sequence step 13
+ pushContext(current, parent, skipElement);
+ }
+
+ /**
+ * Splits @rel or @rev attribute value to list of predicates. Terms can be optionally ignored.
+ * @param propertyVal value of @rel or @rev attribute
+ * @param skipTerms is terms should be skipped
+ * @return list of predicates
+ */
+ private List convertRelRevToList(String propertyVal, boolean skipTerms) {
+ if (propertyVal == null) {
+ return null;
+ }
+ List result = new ArrayList();
+ Iterator iterator = splitter.split(propertyVal);
+ while (splitter.hasNext()) {
+ String pred = iterator.next();
+ if (skipTerms && pred.indexOf(':') == -1) {
+ continue;
+ }
+ result.add(pred);
+ }
+ if (skipTerms && result.isEmpty()) {
+ result = null;
+ }
+ return result;
+ }
+
+ /**
+ * Generates triples related to @role attribute
+ * @param id value of @id attribute
+ * @param roleVal value of @role attribute
+ * @param current current context
+ */
+ private void processRoleAttribute(String id, String roleVal, EvalContext current) {
+ if (roleVal == null) {
+ return;
+ }
+ String subject;
+ if (id != null) {
+ subject = dh.base + '#' + id;
+ } else {
+ subject = dh.createBnode(true);
+ }
+ Iterator iterator = splitter.split(roleVal);
+ while (splitter.hasNext()) {
+ try {
+ String role = current.resolveRole(iterator.next());
+ addNonLiteral(subject, XHTML_VOCAB + "role", role);
+ } catch (MalformedIriException e) {
+ // do nothing
+ }
+ }
+ }
+
+ /**
+ * Determines object and subject for current context
+ * @param qName node's qName
+ * @param attrs node's attributes
+ * @param noRelAndRev is no @rel and @rev attributes specified
+ * @param current current context
+ * @param parent parent context
+ * @return skip element flag
+ */
+ private boolean findSubjectAndObject(String qName, Attributes attrs, boolean noRelAndRev, EvalContext current,
+ EvalContext parent) {
+ String newSubject = null;
+ try {
+ if (dh.rdfaVersion > RDFa.VERSION_10) {
+ if (noRelAndRev) {
+ // RDFa Core 1.1 processing sequence step 5
+ if (attrs.getValue(RDFa.PROPERTY_ATTR) != null && attrs.getValue(RDFa.CONTENT_ATTR) == null
+ && attrs.getValue(VALUE_ATTR) == null && attrs.getValue(RDFa.DATATYPE_ATTR) == null) {
+ // RDFa Core 1.1 processing sequence step 5.1
+ current.subject = coalesce(qName, attrs, parent, current, RDFa.ABOUT_ATTR,
+ BASE_IF_ROOT_NODE, PARENT_OBJECT);
+
+ if (attrs.getValue(RDFa.TYPEOF_ATTR) != null) {
+ current.object = coalesce(qName, attrs, parent, current, RDFa.ABOUT_ATTR, BASE_IF_ROOT_NODE,
+ RDFa.RESOURCE_ATTR, DATA_ATTR, RDFa.HREF_ATTR, RDFa.SRC_ATTR, BNODE_IF_TYPEOF);
+ newSubject = current.object;
+ }
+ } else {
+ // RDFa Core 1.1 processing sequence step 5.2
+ current.subject = coalesce(qName, attrs, parent, current, RDFa.ABOUT_ATTR,
+ RDFa.RESOURCE_ATTR, DATA_ATTR, RDFa.HREF_ATTR, RDFa.SRC_ATTR, BASE_IF_ROOT_NODE,
+ BNODE_IF_TYPEOF, PARENT_OBJECT);
+ if (attrs.getValue(RDFa.TYPEOF_ATTR) != null) {
+ newSubject = current.subject;
+ }
+ }
+ } else {
+ // RDFa Core 1.1 processing sequence step 6
+ current.object = coalesce(qName, attrs, parent, current, RDFa.RESOURCE_ATTR, DATA_ATTR,
+ RDFa.HREF_ATTR, RDFa.SRC_ATTR);
+ current.subject = coalesce(qName, attrs, parent, current, RDFa.ABOUT_ATTR,
+ BASE_IF_ROOT_NODE, PARENT_OBJECT);
+ if (attrs.getValue(RDFa.TYPEOF_ATTR) != null) {
+ if (attrs.getValue(RDFa.ABOUT_ATTR) != null) {
+ newSubject = current.subject;
+ } else {
+ if (current.object == null) {
+ current.object = dh.createBnode(noRelAndRev);
+ }
+ newSubject = current.object;
+ }
+ }
+ }
+ } else {
+ if (noRelAndRev) {
+ // RDFa Core 1.0 processing sequence step 4
+ current.subject = coalesce(qName, attrs, parent, current, RDFa.ABOUT_ATTR, RDFa.SRC_ATTR,
+ RDFa.RESOURCE_ATTR, RDFa.HREF_ATTR, BASE_IF_HEAD_OR_BODY, BNODE_IF_TYPEOF, PARENT_OBJECT);
+ } else {
+ // RDFa Core 1.0 processing sequence step 5
+ current.subject = coalesce(qName, attrs, parent, current, RDFa.ABOUT_ATTR, RDFa.SRC_ATTR,
+ BASE_IF_HEAD_OR_BODY, BNODE_IF_TYPEOF, PARENT_OBJECT);
+ current.object = coalesce(qName, attrs, parent, current, RDFa.RESOURCE_ATTR, RDFa.HREF_ATTR);
+ }
+ if (attrs.getValue(RDFa.TYPEOF_ATTR) != null) {
+ newSubject = current.subject;
+ }
+ }
+ } catch (MalformedIriException e) {
+ warning(RDFa.WARNING, e.getMessage());
+ pushContextNoLiteral(current, parent);
+ }
+
+ if (newSubject != null) {
+ // RDFa Core 1.0 processing sequence step 6
+ // RDFa Core 1.1 processing sequence step 7
+ Iterator iterator = splitter.split(attrs.getValue(RDFa.TYPEOF_ATTR));
+ while (splitter.hasNext()) {
+ try {
+ String iri = current.resolvePredOrDatatype(iterator.next());
+ addNonLiteral(newSubject, RDF.TYPE, iri);
+ } catch (MalformedIriException e) {
+ // do nothing
+ }
+ }
+ }
+ return noRelAndRev && attrs.getValue(RDFa.PROPERTY_ATTR) == null
+ && (current.subject == null && parent.object == null || current.subject.equals(parent.object));
+ }
+
+ /**
+ * Iterates through attribute names list and returns first not null
+ * value of attribute with such name. Also processes special cases
+ * if no such attributes found:
+ *
+ * {@link #BNODE_IF_TYPEOF} - returns new bnode if typeof attr found
+ * {@link #PARENT_OBJECT} - returns parent.object
+ * {@link #BASE_IF_HEAD_OR_BODY} - returns base if processing head or body node in HTML
+ *
+ *
+ * @param tagName name of processed element
+ * @param attrs attribute list
+ * @param parent parent context
+ * @param current current context
+ * @param attrNames prioritized list of attributes
+ * @throws MalformedIriException
+ */
+ private String coalesce(String tagName, Attributes attrs, EvalContext parent,
+ EvalContext current, String... attrNames) throws MalformedIriException {
+ for (String attr : attrNames) {
+ if (attrs.getValue(attr) != null) {
+ if (attr.equals(RDFa.ABOUT_ATTR) || attr.equals(RDFa.RESOURCE_ATTR)) {
+ String val = attrs.getValue(attr);
+ if (val.equals("[]")) {
+ continue;
+ }
+ try {
+ return current.resolveAboutOrResource(val);
+ } catch (MalformedCurieException e) {
+ warning(RDFa.UNRESOLVED_CURIE, e.getMessage());
+ return null;
+ }
+ } else if (attr.equals(RDFa.HREF_ATTR) || attr.equals(RDFa.SRC_ATTR) || attr.equals(DATA_ATTR)) {
+ return dh.resolveIri(attrs.getValue(attr));
+ } else if (attr.equals(BNODE_IF_TYPEOF)) {
+ return dh.createBnode(false);
+ }
+ } else if (attr.equals(PARENT_OBJECT) && parent.object != null) {
+ return parent.object;
+ } else {
+ boolean isHeadOrBody = tagName.equals(HEAD) || tagName.equals(BODY);
+ boolean isRoot = contextStack.size() == 1 || attrs.getValue(RDFa.TYPEOF_ATTR) != null && isHeadOrBody;
+ if (isHeadOrBody && attr.equals(BASE_IF_HEAD_OR_BODY) || isRoot && attr.equals(BASE_IF_ROOT_NODE)) {
+ return dh.base;
+ }
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Generates [incompleted] triples with predicates from @rel attribute
+ * @param attrs node's attributes
+ * @param rels list of predicates from @rel attribute
+ * @param current current context
+ */
+ private void processRels(Attributes attrs, List rels, EvalContext current) {
+ if (rels != null) {
+ boolean inList = dh.rdfaVersion > RDFa.VERSION_10 && attrs.getValue(RDFa.INLIST_ATTR) != null;
+ // RDFa Core 1.1 processing sequence steps 9 and 10
+ // RDFa Core 1.0 processing sequence steps 7 and 8
+ for (String predicate : rels) {
+ String iri;
+ try {
+ iri = current.resolvePredOrDatatype(predicate);
+ } catch (MalformedIriException e) {
+ continue;
+ }
+ if (inList) {
+ List list = current.getMappingForIri(iri);
+ if (current.object != null) {
+ list.add(current.object);
+ } else {
+ current.incomplTriples.add(list);
+ }
+ } else {
+ if (current.object != null) {
+ addNonLiteral(current.subject, iri, current.object);
+ } else {
+ current.incomplTriples.add(iri);
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Generates [incompleted] triples with predicates from @rev attribute
+ * @param revs list of predicates from @rev attribute
+ * @param current current context
+ */
+ private void processRevs(List revs, EvalContext current) {
+ if (revs != null) {
+ for (String predicate : revs) {
+ // RDFa Core 1.1 processing sequence steps 9 and 10
+ try {
+ String iri = current.resolvePredOrDatatype(predicate);
+ if (current.object != null) {
+ addNonLiteral(current.object, iri, current.subject);
+ } else {
+ current.incomplTriples.add(REVERSED_TRIPLE_FLAG);
+ current.incomplTriples.add(iri);
+ }
+ } catch (MalformedIriException e) {
+ // do nothing
+ }
+ }
+ }
+ }
+
+ /**
+ * Processes @property attribute of specified node
+ * @param qName node's QName
+ * @param attrs node's attributes
+ * @param current current context
+ * @param parent parent context
+ * @param noRelsAndRevs are on @rel and @rev attributes specified
+ */
+ private void processPropertyAttr(String qName, Attributes attrs, EvalContext current,
+ EvalContext parent, boolean noRelsAndRevs) {
+ if (attrs.getValue(RDFa.PROPERTY_ATTR) == null) {
+ current.parsingLiteral = false;
+ return;
+ }
+
+ // RDFa Core 1.0 processing sequence step 9
+ // RDFa Core 1.1 processing sequence step 11
+ parseLiteralObject(qName, attrs, current, parent, noRelsAndRevs);
+
+ // noinspection StringEquality
+ current.parsingLiteral = current.objectLitDt == RDF.XML_LITERAL;
+ if (current.properties == null) {
+ current.objectLitDt = null;
+ current.parsingLiteral = false;
+ }
+ }
+
+ /**
+ * Determines literal object for specified node. Can change objectLitDt in current context
+ * @param qName node's QName
+ * @param attrs node's attributes
+ * @param current current context
+ * @param parent parent context
+ * @param noRelsAndRevs are on @rel and @rev attributes specified
+ */
+ private void parseLiteralObject(String qName, Attributes attrs, EvalContext current,
+ EvalContext parent, boolean noRelsAndRevs) {
+ String content = parseContent(attrs);
+ String langOrDt = parseDatatype(qName, attrs, current);
+
+ if (langOrDt != null && !RDF.XML_LITERAL.equals(langOrDt)) {
+ // RDFa Core 1.0 processing sequence step 9, typed literal case
+ // RDFa Core 1.1 processing sequence step 11, typed literal case
+ if (content != null) {
+ langOrDt = resolveLangOrDt(content, langOrDt, current);
+ } else {
+ current.objectLitDt = langOrDt;
+ langOrDt = null;
+ }
+ } else if (content != null) {
+ // RDFa Core 1.0 processing sequence step 9, plain literal case
+ // RDFa Core 1.1 processing sequence step 11, plain literal using @content case
+ langOrDt = current.lang;
+ } else if (langOrDt == null && dh.rdfaVersion > RDFa.VERSION_10) {
+ if (attrs.getValue(RDFa.CONTENT_ATTR) == null && attrs.getValue(VALUE_ATTR) == null && noRelsAndRevs) {
+ // RDFa Core 1.1 processing sequence step 11, no rel or rev or content case
+ try {
+ langOrDt = coalesce(qName, attrs, parent, current,
+ RDFa.RESOURCE_ATTR, DATA_ATTR, RDFa.HREF_ATTR, RDFa.SRC_ATTR);
+ } catch (MalformedIriException e) {
+ warning(RDFa.WARNING, e.getMessage());
+ pushContextNoLiteral(current, parent);
+ }
+ }
+ if (langOrDt == null) {
+ if (attrs.getValue(RDFa.ABOUT_ATTR) == null && attrs.getValue(RDFa.TYPEOF_ATTR) != null) {
+ // RDFa Core 1.1 processing sequence step 11, @typeof present and @about is not case
+ langOrDt = current.object;
+ if (current.object == null) {
+ // RDFa Core 1.1 processing sequence step 11, last plain literal case
+ current.objectLitDt = PLAIN_LITERAL;
+ }
+ } else {
+ // RDFa Core 1.1 processing sequence step 11, last plain literal case
+ current.objectLitDt = PLAIN_LITERAL;
+ }
+ }
+ } else {
+ if (langOrDt == null || langOrDt.length() > 0) {
+ // RDFa Core 1.0 processing sequence step 9, xml literal case
+ // RDFa Core 1.1 processing sequence step 11, xml literal case
+ current.objectLitDt = RDF.XML_LITERAL;
+ } else {
+ // RDFa Core 1.0 processing sequence step 9, plain literal case
+ // RDFa Core 1.1 processing sequence step 11, plain literal case
+ current.objectLitDt = PLAIN_LITERAL;
+ }
+ langOrDt = null;
+ }
+ boolean inList = attrs.getValue(RDFa.INLIST_ATTR) != null;
+ processPropertyPredicate(attrs, content, langOrDt, current, inList);
+ }
+
+ /**
+ * Extracts content for specified node with respect of HTML5 attributes
+ * @param attrs node's attributes
+ * @return content
+ */
+ private String parseContent(Attributes attrs) {
+ String content = attrs.getValue(RDFa.CONTENT_ATTR);
+ if (content == null && dh.documentFormat == DocumentContext.FORMAT_HTML5) {
+ if (attrs.getValue(VALUE_ATTR) != null) {
+ content = attrs.getValue(VALUE_ATTR);
+ }
+ if (attrs.getValue(DATETIME_ATTR) != null) {
+ content = attrs.getValue(DATETIME_ATTR);
+ }
+ }
+ return content;
+ }
+
+ /**
+ * Extracts datatype uri for specified node
+ * @param qName node's QName
+ * @param attrs node's attributes
+ * @param current current context
+ * @return datatype URI or {@link #AUTODETECT_DATE_DATATYPE} if datatype should be detected at validation phase
+ */
+ private String parseDatatype(String qName, Attributes attrs, EvalContext current) {
+ String datatype = attrs.getValue(RDFa.DATATYPE_ATTR);
+ if (dh.documentFormat == DocumentContext.FORMAT_HTML5) {
+ if (attrs.getValue(DATETIME_ATTR) != null) {
+ if (datatype == null) {
+ datatype = AUTODETECT_DATE_DATATYPE;
+ }
+ } else if (qName.equals(TIME_QNAME) && datatype == null) {
+ datatype = AUTODETECT_DATE_DATATYPE;
+ }
+ }
+ try {
+ if (datatype != null && datatype.length() > 0) {
+ datatype = current.resolvePredOrDatatype(datatype);
+ }
+ } catch (MalformedIriException e) {
+ datatype = null;
+ }
+ return datatype;
+ }
+
+ /**
+ * Generates triples corresponding to specified object and predicates from @property attribute
+ * @param attrs node's attributes
+ * @param content objects's content
+ * @param langOrUri object's content lang or datatype (if literal) or object's URI
+ * @param current current context
+ * @param inList is inlist property presented
+ */
+ private void processPropertyPredicate(Attributes attrs, String content, String langOrUri,
+ EvalContext current, boolean inList) {
+ Iterator iterator = splitter.split(attrs.getValue(RDFa.PROPERTY_ATTR));
+ while (splitter.hasNext()) {
+ String iri;
+ try {
+ iri = current.resolvePredOrDatatype(iterator.next());
+ } catch (MalformedIriException e) {
+ continue;
+ }
+ if (content != null || langOrUri != null) {
+ if (dh.rdfaVersion > RDFa.VERSION_10 && inList) {
+ List list = current.getMappingForIri(iri);
+ if (content != null) {
+ list.add(LITERAL_OBJECT_FLAG);
+ list.add(content);
+ list.add(langOrUri);
+ } else {
+ list.add(langOrUri);
+ }
+ } else {
+ if (content != null) {
+ addLiteralTriple(current.subject, iri, content, langOrUri);
+ } else {
+ addNonLiteral(current.subject, iri, langOrUri);
+ }
+ }
+ } else if (current.properties == null) {
+ current.properties = new ArrayList();
+ if (dh.rdfaVersion > RDFa.VERSION_10 && inList) {
+ current.properties.add(RDFa.INLIST_ATTR);
+ }
+ current.properties.add(iri);
+ } else {
+ current.properties.add(iri);
+ }
+ }
+ }
+
+ private String resolveLangOrDt(String content, String dt, EvalContext current) {
+ if (dt == null) {
+ return current.lang;
+ }
+ if (dt.equals(RdfaParser.AUTODETECT_DATE_DATATYPE)) {
+ try {
+ if (content.matches("-?P\\d+Y\\d+M\\d+DT\\d+H\\d+M\\d+(\\.\\d+)?S")) {
+ return XSD.DURATION;
+ }
+ if (content.indexOf(':') != -1) {
+ if (content.indexOf('T') != -1) {
+ DatatypeConverter.parseDateTime(content);
+ return XSD.DATE_TIME;
+ }
+ DatatypeConverter.parseTime(content);
+ return XSD.TIME;
+ }
+ if (content.matches("-?\\d{4,}")) {
+ return XSD.G_YEAR;
+ }
+ if (content.matches("-?\\d{4,}-(0[1-9]|1[0-2])")) {
+ return XSD.G_YEAR_MONTH;
+ }
+ DatatypeConverter.parseDate(content);
+ return XSD.DATE;
+ } catch (IllegalArgumentException e) {
+ return current.lang;
+ }
+ }
+ if (dt.indexOf(':') == -1) {
+ return current.lang;
+ }
+ return dt;
+ }
+
+ /**
+ * Generates triples from parent's incompleted triples list
+ * @param current current context
+ * @param parent parent context
+ */
+ private void processIncompleteTriples(EvalContext current, EvalContext parent) {
+ if (current.subject == null) {
+ return;
+ }
+ String subject = parent.subject;
+ for (Iterator iti = parent.incomplTriples.iterator(); iti.hasNext(); ) {
+ Object predicateOrList = iti.next();
+ if (predicateOrList == REVERSED_TRIPLE_FLAG) {
+ addNonLiteral(current.subject, (String) iti.next(), subject);
+ } else if (predicateOrList instanceof String) {
+ addNonLiteral(subject, (String) predicateOrList, current.subject);
+ } else {
+ @SuppressWarnings("unchecked")
+ Collection list = (Collection) predicateOrList;
+ list.add(current.subject);
+ }
+ }
+ }
+
+ /**
+ * Pushes current context to stack before processing child nodes
+ * @param current current context
+ * @param parent parent context
+ */
+ private void pushContext(EvalContext current, EvalContext parent, boolean skipElement) {
+ if (current.parsingLiteral) {
+ xmlString = new StringBuilder();
+ xmlStringPred = current.properties;
+ xmlStringSubj = current.subject == null ? parent.subject : current.subject;
+ }
+ if (current.parsingLiteral || skipElement) {
+ current.subject = parent.subject;
+ current.object = parent.object;
+ current.incomplTriples = parent.incomplTriples;
+ current.objectLit = null;
+ current.objectLitDt = parent.objectLitDt;
+ if (current.objectLitDt != null) {
+ current.objectLit = "";
+ }
+ current.properties = null;
+ contextStack.push(current);
+ } else {
+ pushContextNoLiteral(current, parent);
+ }
+ }
+
+ /**
+ * Pushes current context to stack before processing child nodes when no literals are parsed
+ * @param current current context
+ * @param parent parent context
+ */
+ private void pushContextNoLiteral(EvalContext current, EvalContext parent) {
+ if (current.subject == null) {
+ current.subject = parent.subject;
+ }
+ if (current.object == null) {
+ current.object = current.subject;
+ }
+ if (current.objectLitDt != null || parent.objectLitDt != null) {
+ current.objectLit = "";
+ }
+ contextStack.push(current);
+ }
+
+ @Override
+ public void endElement(String nsUri, String localName, String qName) throws SAXException {
+ if (rdfXmlInline) {
+ // delegate parsing to RDF/XML parser
+ if (dh.documentFormat == DocumentContext.FORMAT_SVG && localName.equals(METADATA)) {
+ rdfXmlParser.endDocument();
+ rdfXmlParser = null;
+ rdfXmlInline = false;
+ } else {
+ rdfXmlParser.endElement(nsUri, localName, qName);
+ }
+ return;
+ }
+
+ EvalContext current = contextStack.pop();
+ processXmlString(current);
+
+ // serialize close tag if parsing literal
+ if (xmlString != null) {
+ xmlString.append("").append(qName).append('>');
+ }
+
+ if (contextStack.isEmpty()) {
+ return;
+ }
+
+ EvalContext parent = contextStack.peek();
+ processContent(current, parent);
+
+ // noinspection ObjectEquality
+ if (parent.listMapping != current.listMapping) {
+ // current mapping isn't inherited from parent
+ // RDFa Core 1.0 processing sequence step 14
+ processListMappings(current);
+ }
+ }
+
+ /**
+ * Generates triples for parsed literal if it present
+ * @param current current context
+ */
+ private void processXmlString(EvalContext current) {
+ if (current.parsingLiteral && xmlString != null) {
+ String content = xmlString.toString();
+ xmlString = null;
+ if (dh.rdfaVersion == RDFa.VERSION_10 && content.indexOf('<') == -1) {
+ for (String pred : xmlStringPred) {
+ addPlainLiteral(xmlStringSubj, pred, content, current.lang);
+ }
+ } else {
+ for (String pred : xmlStringPred) {
+ addTypedLiteral(xmlStringSubj, pred, content, RDF.XML_LITERAL);
+ }
+ }
+ }
+ }
+
+ /**
+ * Generates triples for node content
+ * @param current current context
+ * @param parent parent context
+ */
+ private void processContent(EvalContext current, EvalContext parent) {
+ String content = current.objectLit;
+ if (content == null) {
+ return;
+ }
+ if (!parent.parsingLiteral && parent.objectLit != null) {
+ parent.objectLit += content;
+ }
+ if (current.properties == null) {
+ return;
+ }
+
+ String dt = current.objectLitDt;
+ boolean inlist = RDFa.INLIST_ATTR.equals(current.properties.get(0));
+
+ if (inlist) {
+ String langOrDt = resolveLangOrDt(content, dt, current);
+ current.properties.remove(0);
+ for (String predIri : current.properties) {
+ List mappingForIri = current.getMappingForIri(predIri);
+ mappingForIri.add(LITERAL_OBJECT_FLAG);
+ mappingForIri.add(content);
+ mappingForIri.add(langOrDt);
+ }
+ } else {
+ for (String predIri : current.properties) {
+ dt = resolveLangOrDt(content, dt, current);
+ addLiteralTriple(current.subject, predIri, content, dt);
+ }
+ }
+ }
+
+ /**
+ * Generates triples from list mappings on node close event
+ * @param current current context
+ */
+ private void processListMappings(EvalContext current) {
+ Map> list = current.listMapping;
+ for (String pred : list.keySet()) {
+ String prev = null;
+ String start = null;
+ for (Iterator iterator = list.get(pred).iterator(); iterator.hasNext(); ) {
+ String res = iterator.next();
+ String child = dh.createBnode(false);
+ // noinspection StringEquality
+ if (res == LITERAL_OBJECT_FLAG) {
+ String content = iterator.next();
+ String langOrDt = iterator.next();
+ addLiteralTriple(child, RDF.FIRST, content, langOrDt);
+ } else {
+ addNonLiteral(child, RDF.FIRST, res);
+ }
+ if (prev == null) {
+ start = child;
+ } else {
+ addNonLiteral(prev, RDF.REST, child);
+ }
+ prev = child;
+ }
+ if (start == null) {
+ addNonLiteral(current.subject, pred, RDF.NIL);
+ } else {
+ addNonLiteral(prev, RDF.REST, RDF.NIL);
+ addNonLiteral(current.subject, pred, start);
+ }
+ }
+ list.clear();
+ }
+
+ @Override
+ public void characters(char[] buffer, int start, int length) throws SAXException {
+ if (rdfXmlInline) {
+ rdfXmlParser.characters(buffer, start, length);
+ return;
+ }
+ EvalContext parent = contextStack.peek();
+ if (xmlString != null) {
+ xmlString.append(buffer, start, length);
+ }
+ if (parent.objectLit != null) {
+ parent.addContent(String.copyValueOf(buffer, start, length));
+ }
+ }
+
+ @Override
+ public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
+ if (rdfXmlInline) {
+ rdfXmlParser.ignorableWhitespace(ch, start, length);
+ }
+ }
+
+ @Override
+ public void startDTD(String name, String publicId, String systemId) throws SAXException {
+ dh.processDtd(name, publicId, systemId);
+ }
+
+ @Override
+ public void startPrefixMapping(String prefix, String uri) throws SAXException {
+ if (rdfXmlInline) {
+ rdfXmlParser.startPrefixMapping(prefix, uri);
+ return;
+ }
+ // TODO: check for valid prefix
+ if (prefix.length() == 0 && XHTML_DEFAULT_XMLNS.equalsIgnoreCase(uri)) {
+ overwriteMappings.put(prefix, XHTML_VOCAB);
+ } else {
+ try {
+ overwriteMappings.put(prefix, RIUtils.resolveIri(dh.originUri, uri));
+ } catch (MalformedIriException e) {
+ // do nothing
+ }
+ }
+ }
+
+ @Override
+ public void endPrefixMapping(String prefix) throws SAXException {
+ if (rdfXmlInline) {
+ rdfXmlParser.endPrefixMapping(prefix);
+ }
+ }
+
+ @Override
+ public boolean setPropertyInternal(String key, Object value) {
+ if (ENABLE_OUTPUT_GRAPH.equals(key) && value instanceof Boolean) {
+ sinkOutputGraph = (Boolean) value;
+ } else if (getRdfaVersion() != RDFa.VERSION_10 && ENABLE_PROCESSOR_GRAPH.equals(key)
+ && value instanceof Boolean) {
+ sinkProcessorGraph = (Boolean) value;
+ forcedRdfaVersion = RDFa.VERSION_11;
+ } else if (getRdfaVersion() != RDFa.VERSION_10 && ENABLE_VOCAB_EXPANSION.equals(key)
+ && value instanceof Boolean) {
+ expandVocab = (Boolean) value;
+ forcedRdfaVersion = RDFa.VERSION_11;
+// } else if (sinkProcessorGraph || expandVocab) {
+// forcedRdfaVersion = RDFa.VERSION_11;
+ } else if (RDFA_VERSION_PROPERTY.equals(key) && value instanceof Short) {
+ short rdfaVersion = (Short) value;
+ if (rdfaVersion < RDFa.VERSION_10 || rdfaVersion > RDFa.VERSION_11) {
+ throw new IllegalArgumentException("Unsupported RDFa version");
+ }
+ forcedRdfaVersion = rdfaVersion;
+ dh.rdfaVersion = forcedRdfaVersion;
+ if (rdfaVersion < RDFa.VERSION_11) {
+ sinkProcessorGraph = false;
+ expandVocab = false;
+ } else {
+ sinkProcessorGraph = true;
+ expandVocab = true;
+ }
+ } else if (StreamProcessor.PROCESSOR_GRAPH_HANDLER_PROPERTY.equals(key)
+ && value instanceof ProcessorGraphHandler) {
+ processorGraphHandler = (ProcessorGraphHandler) value;
+ return false;
+ } else {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public void setBaseUri(String baseUri) {
+ dh.setBaseUri(baseUri);
+ }
+
+ /**
+ * Loads vocabulary from specified URL. Vocabulary will not contain terms in case when
+ * vocabulary expansion is disabled.
+ *
+ * @param vocabUrl URL to load from
+ * @return loaded vocabulary (can be cached)
+ */
+ Vocabulary loadVocabulary(String vocabUrl) {
+ if (sinkOutputGraph) {
+ sink.addNonLiteral(dh.base, RDFa.USES_VOCABULARY, vocabUrl);
+ }
+ return VOCAB_MANAGER.get().findVocab(vocabUrl, expandVocab);
+ }
+
+ // error handling
+
+ @Override
+ public void info(String infoClass, String message) {
+ addProcessorGraphRecord(infoClass, message);
+ if (processorGraphHandler != null) {
+ processorGraphHandler.info(infoClass, message);
+ }
+ }
+
+ @Override
+ public void warning(String warningClass, String message) {
+ addProcessorGraphRecord(warningClass, message);
+ if (processorGraphHandler != null) {
+ processorGraphHandler.warning(warningClass, message);
+ }
+ }
+
+ @Override
+ public void error(String errorClass, String message) {
+ addProcessorGraphRecord(errorClass, message);
+ if (processorGraphHandler != null) {
+ processorGraphHandler.error(errorClass, message);
+ }
+ }
+
+ private void addProcessorGraphRecord(String recordClass, String recordContext) {
+ if (dh.rdfaVersion > RDFa.VERSION_10 && sinkProcessorGraph) {
+ String errorNode = dh.createBnode(true);
+ String location = "";
+ if (locator != null) {
+ location = " at " + locator.getLineNumber() + ':' + locator.getColumnNumber();
+ }
+ sink.addNonLiteral(errorNode, RDF.TYPE, recordClass);
+ sink.addPlainLiteral(errorNode, RDFa.CONTEXT, recordContext + location, null);
+ }
+ }
+
+ @Override
+ public ParseException processException(SAXException e) {
+ Throwable cause = e.getCause();
+ if (cause instanceof ParseException) {
+ error(RDFa.ERROR, cause.getMessage());
+ return (ParseException) cause;
+ }
+ error(RDFa.ERROR, e.getMessage());
+ return new ParseException(e);
+ }
+
+ private void copyProps(String subj, List props) {
+ Iterator iterator = props.iterator();
+ while (iterator.hasNext()) {
+ String type = iterator.next();
+ if (type == null) {
+ addNonLiteralInternal(subj, iterator.next(), iterator.next());
+ } else if (type.isEmpty()) {
+ addPlainLiteralInternal(subj, iterator.next(), iterator.next(), iterator.next());
+ } else {
+ addTypedLiteralInternal(subj, iterator.next(), iterator.next(), type);
+ }
+ }
+ }
+
+ // proxying TripleSink calls to filter output graph
+
+ private void addLiteralTriple(String subject, String pred, String content, String langOrDt) {
+ if (langOrDt == null || langOrDt.length() < 6 || langOrDt.indexOf(':') == -1) {
+ addPlainLiteral(subject, pred, content, langOrDt);
+ } else {
+ addTypedLiteral(subject, pred, content, langOrDt);
+ }
+ }
+
+ @Override
+ public void addNonLiteral(String subj, String pred, String obj) {
+ if (!sinkOutputGraph) {
+ return;
+ }
+ if (obj.equals(RDFa.PATTERN)) {
+ if (!patternProps.containsKey(subj)) {
+ patternProps.put(subj, new ArrayList());
+ }
+ return;
+ // TODO: check vocab expansion
+ } else if (pred.equals(RDFa.COPY)) {
+ if (patternProps.containsKey(obj)) {
+ copyProps(subj, patternProps.get(obj));
+ } else {
+ copyingPairs.add(subj);
+ copyingPairs.add(obj);
+ }
+ return;
+ } else if (patternProps.containsKey(subj)) {
+ List props = patternProps.get(subj);
+ props.add(null);
+ props.add(pred);
+ props.add(obj);
+ return;
+ }
+ addNonLiteralInternal(subj, pred, obj);
+ }
+
+ private void addNonLiteralInternal(String subj, String pred, String obj) {
+ if (!expandVocab) {
+ sink.addNonLiteral(subj, pred, obj);
+ return;
+ }
+ addNonLiteralWithObjExpansion(subj, pred, obj);
+ for (String predSynonym : contextStack.peek().expand(pred)) {
+ addNonLiteralWithObjExpansion(subj, predSynonym, obj);
+ }
+ }
+
+ private void addNonLiteralWithObjExpansion(String subj, String pred, String obj) {
+ if (obj.startsWith(RDF.BNODE_PREFIX)) {
+ sink.addNonLiteral(subj, pred, obj);
+ return;
+ }
+ sink.addNonLiteral(subj, pred, obj);
+ for (String objSynonym : contextStack.peek().expand(obj)) {
+ sink.addNonLiteral(subj, pred, objSynonym);
+ }
+ }
+
+ @Override
+ public void addPlainLiteral(String subj, String pred, String content, String lang) {
+ if (!sinkOutputGraph) {
+ return;
+ }
+ if (patternProps.containsKey(subj)) {
+ List props = patternProps.get(subj);
+ props.add("");
+ props.add(pred);
+ props.add(content);
+ props.add(lang);
+ return;
+ }
+ addPlainLiteralInternal(subj, pred, content, lang);
+ }
+
+ private void addPlainLiteralInternal(String subj, String pred, String content, String lang) {
+ sink.addPlainLiteral(subj, pred, content, lang);
+ for (String predSynonym : contextStack.peek().expand(pred)) {
+ sink.addPlainLiteral(subj, predSynonym, content, lang);
+ }
+ }
+
+ @Override
+ public void addTypedLiteral(String subj, String pred, String content, String type) {
+ if (!sinkOutputGraph) {
+ return;
+ }
+ if (patternProps.containsKey(subj)) {
+ List props = patternProps.get(subj);
+ props.add(type);
+ props.add(pred);
+ props.add(content);
+ return;
+ }
+ addTypedLiteralInternal(subj, pred, content, type);
+ }
+
+ private void addTypedLiteralInternal(String subj, String pred, String content, String type) {
+ sink.addTypedLiteral(subj, pred, content, type);
+ for (String predSynonym : contextStack.peek().expand(pred)) {
+ sink.addTypedLiteral(subj, predSynonym, content, type);
+ }
+ }
+
+ @Override
+ public void setDocumentLocator(Locator locator) {
+ this.locator = locator;
+ }
+
+ // ignored events
+
+ @Override
+ public void processingInstruction(String target, String data) throws SAXException {
+ }
+
+ @Override
+ public void skippedEntity(String name) throws SAXException {
+ }
+
+ @Override
+ public void startEntity(String s) throws SAXException {
+ }
+
+ @Override
+ public void endEntity(String s) throws SAXException {
+ }
+
+ @Override
+ public void startCDATA() throws SAXException {
+ }
+
+ @Override
+ public void endCDATA() throws SAXException {
+ }
+
+ @Override
+ public void comment(char[] chars, int i, int i1) throws SAXException {
+ }
+
+ @Override
+ public void endDTD() throws SAXException {
+ }
+
+ short getRdfaVersion() {
+ if (forcedRdfaVersion == null) {
+ return RDFa.VERSION_11;
+ }
+ return forcedRdfaVersion;
+ }
+
+ private static final class Splitter implements Iterator {
+ private int pos = -1;
+ private int length = -1;
+ private String string = null;
+
+ private Iterator split(String string) {
+ this.string = string;
+ length = string.length();
+ pos = 0;
+ while (pos < length && XmlUtils.WHITESPACE.get(string.charAt(pos))) {
+ pos++;
+ }
+ return this;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return pos < length;
+ }
+
+ @Override
+ public String next() {
+ int start = pos;
+ while (pos < length && !XmlUtils.WHITESPACE.get(string.charAt(pos))) {
+ pos++;
+ }
+ if (start == pos) {
+ throw new NoSuchElementException();
+ }
+ String result = string.substring(start, pos);
+ while (pos < length && XmlUtils.WHITESPACE.get(string.charAt(pos))) {
+ pos++;
+ }
+ if (pos == length) {
+ string = null;
+ }
+ return result;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/rdfa/VocabManager.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/rdfa/VocabManager.java
new file mode 100644
index 00000000..6425b865
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/rdfa/VocabManager.java
@@ -0,0 +1,39 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.rdf.rdfa;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.Vocabulary;
+
+import java.util.HashMap;
+import java.util.Map;
+
+final class VocabManager {
+
+ private final Map vocabCache = new HashMap();
+
+ io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.Vocabulary findVocab(String vocabUrl, boolean expandVocab) {
+ io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.Vocabulary vocab = vocabCache.get(vocabUrl);
+ if (vocab != null) {
+ return vocab;
+ }
+ vocab = new io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.Vocabulary(vocabUrl);
+ vocabCache.put(vocabUrl, vocab);
+ if (expandVocab) {
+ vocab.load();
+ }
+ return vocab;
+ }
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/rdfa/Vocabulary.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/rdfa/Vocabulary.java
new file mode 100644
index 00000000..1176cc22
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/rdf/rdfa/Vocabulary.java
@@ -0,0 +1,172 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.rdf.rdfa;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.ParseException;
+import io.github.sparqlanything.html.org.semarglproject.rdf.RdfXmlParser;
+import io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.RdfaParser;
+import io.github.sparqlanything.html.org.semarglproject.ri.RIUtils;
+import io.github.sparqlanything.html.org.semarglproject.sink.TripleSink;
+import io.github.sparqlanything.html.org.semarglproject.source.StreamProcessor;
+import io.github.sparqlanything.html.org.semarglproject.vocab.OWL;
+import io.github.sparqlanything.html.org.semarglproject.vocab.RDF;
+import io.github.sparqlanything.html.org.semarglproject.vocab.RDFS;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+final class Vocabulary {
+
+ private final String url;
+ private Map> expansions = null;
+ private Collection terms = null;
+
+ Vocabulary(String url) {
+ this.url = url;
+ }
+
+ private void addExpansion(String pred, String expansion) {
+ if (!expansions.containsKey(pred)) {
+ expansions.put(pred, new HashSet());
+ }
+ expansions.get(pred).add(expansion);
+ }
+
+ void load() {
+ VocabParser vocabParser = new VocabParser();
+
+ URL vocabUrl;
+ try {
+ vocabUrl = new URL(url);
+ } catch (MalformedURLException e) {
+ return;
+ }
+
+ if (expansions == null) {
+ expansions = new HashMap>();
+ terms = new HashSet();
+ }
+
+ StreamProcessor rdfaSp = new StreamProcessor(RdfaParser.connect(vocabParser));
+ rdfaSp.setProperty(RdfaParser.ENABLE_VOCAB_EXPANSION, false);
+ parseVocabWithDp(vocabUrl, rdfaSp);
+
+ if (!terms.isEmpty() || !expansions.isEmpty()) {
+ return;
+ }
+
+ // TODO: add format detection
+ StreamProcessor rdfXmlSp = new StreamProcessor(RdfXmlParser.connect(vocabParser));
+ rdfaSp.setProperty(RdfaParser.ENABLE_VOCAB_EXPANSION, false);
+ parseVocabWithDp(vocabUrl, rdfXmlSp);
+
+ if (terms.isEmpty() && expansions.isEmpty()) {
+ terms = null;
+ expansions = null;
+ }
+ }
+
+ private void parseVocabWithDp(URL vocabUrl, StreamProcessor streamProcessor) {
+ InputStream inputStream;
+ try {
+ inputStream = vocabUrl.openStream();
+ } catch (IOException e) {
+ return;
+ }
+ InputStreamReader reader = new InputStreamReader(inputStream);
+ try {
+ streamProcessor.process(reader, url);
+ } catch (ParseException e) {
+ // do nothing
+ } finally {
+ try {
+ reader.close();
+ } catch (IOException e) {
+ // do nothing
+ }
+ }
+ }
+
+ Collection expand(String uri) {
+ if (expansions == null || !expansions.containsKey(uri)) {
+ return Collections.EMPTY_LIST;
+ }
+ return expansions.get(uri);
+ }
+
+ String resolveTerm(String term) {
+ String termUri = url + term;
+ if (terms == null && RIUtils.isAbsoluteIri(termUri) || terms != null && terms.contains(termUri)) {
+ return termUri;
+ }
+ return null;
+ }
+
+ private final class VocabParser implements TripleSink {
+ @Override
+ public void addNonLiteral(String subj, String pred, String obj) {
+ if (subj.startsWith(RDF.BNODE_PREFIX) || obj.startsWith(RDF.BNODE_PREFIX)) {
+ return;
+ }
+ if (pred.equals(OWL.EQUIVALENT_PROPERTY) || pred.equals(OWL.EQUIVALENT_CLASS)) {
+ addExpansion(subj, obj);
+ addExpansion(obj, subj);
+ terms.add(obj);
+ terms.add(subj);
+ } else if (pred.equals(RDFS.SUB_CLASS_OF) || pred.equals(RDFS.SUB_PROPERTY_OF)) {
+ addExpansion(subj, obj);
+ terms.add(obj);
+ terms.add(subj);
+ }
+ if (pred.equals(RDF.TYPE) && (obj.equals(RDF.PROPERTY) || obj.equals(RDFS.CLASS))) {
+ terms.add(subj);
+ }
+ }
+
+ @Override
+ public void addPlainLiteral(String subj, String pred, String content, String lang) {
+ }
+
+ @Override
+ public void addTypedLiteral(String subj, String pred, String content, String type) {
+ }
+
+ @Override
+ public void setBaseUri(String baseUri) {
+ }
+
+ @Override
+ public void startStream() throws ParseException {
+ }
+
+ @Override
+ public void endStream() throws ParseException {
+ }
+
+ @Override
+ public boolean setProperty(String key, Object value) {
+ return false;
+ }
+ }
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/ri/MalformedCurieException.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/ri/MalformedCurieException.java
new file mode 100644
index 00000000..35baac81
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/ri/MalformedCurieException.java
@@ -0,0 +1,27 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.ri;
+
+import io.github.sparqlanything.html.org.semarglproject.ri.MalformedIriException;
+
+public final class MalformedCurieException extends MalformedIriException {
+
+ private static final long serialVersionUID = -1077691754818847298L;
+
+ public MalformedCurieException(String message) {
+ super(message);
+ }
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/ri/MalformedIriException.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/ri/MalformedIriException.java
new file mode 100644
index 00000000..bf6f6f5e
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/ri/MalformedIriException.java
@@ -0,0 +1,25 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.ri;
+
+public class MalformedIriException extends Exception {
+
+ private static final long serialVersionUID = -8791044111458438579L;
+
+ public MalformedIriException(String message) {
+ super(message);
+ }
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/ri/RIUtils.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/ri/RIUtils.java
new file mode 100644
index 00000000..52ab8eea
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/ri/RIUtils.java
@@ -0,0 +1,119 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.ri;
+
+import io.github.sparqlanything.html.org.semarglproject.ri.MalformedIriException;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.regex.Pattern;
+
+/**
+ * Utility class. Provides methods related to resource identifiers.
+ */
+public final class RIUtils {
+
+ private static final Pattern ABS_OPAQUE_IRI_PATTERN = Pattern.compile(
+ // scheme
+ "[a-zA-Z][a-zA-Z0-9+.-]*:"
+ // opaque part
+ + "[^#/][^#]*",
+ Pattern.DOTALL);
+
+ private static final Pattern ABS_HIER_IRI_PATTERN = Pattern.compile(
+ // scheme
+ "[a-zA-Z][a-zA-Z0-9+.-]*:"
+ // user
+ + "/{1,3}(([^/?#@]*)@)?"
+ // host
+ + "(\\[[^@/?#]+\\]|([^@/?#:]+))"
+ // port
+ + "(:([^/?#]*))?"
+ // path
+ + "([^#?]*)?"
+ // query
+ + "(\\?([^#]*))?"
+ // fragment
+ + "(#[^#]*)?",
+ Pattern.DOTALL);
+
+ private static final Pattern URN_PATTERN = Pattern.compile("urn:[a-zA-Z0-9][a-zA-Z0-9-]{1,31}:.+");
+
+ private RIUtils() {
+ }
+
+ /**
+ * Resolves specified IRI. Absolute IRI are returned unmodified
+ * @param base base to resolve against
+ * @param iri IRI to be resolved
+ * @return resolved absolute IRI
+ * @throws MalformedIriException
+ */
+ public static String resolveIri(String base, String iri) throws MalformedIriException {
+ if (iri == null) {
+ return null;
+ }
+ if (isIri(iri) || isUrn(iri)) {
+ return iri;
+ } else {
+ if (iri.startsWith("?") || iri.isEmpty()) {
+ if (base.endsWith("#")) {
+ return base.substring(0, base.length() - 1) + iri;
+ }
+ return base + iri;
+ }
+ String result;
+ try {
+ URL basePart = new URL(base);
+ result = new URL(basePart, iri).toString();
+ } catch (MalformedURLException e) {
+ result = base + iri;
+ }
+ if (isIri(result)) {
+ return result;
+ }
+ throw new MalformedIriException("Malformed IRI: " + iri);
+ }
+ }
+
+ /**
+ * Checks if specified string is IRI
+ * @param value value to check
+ * @return true if value is IRI
+ */
+ public static boolean isIri(String value) {
+ return ABS_HIER_IRI_PATTERN.matcher(value).matches() || ABS_OPAQUE_IRI_PATTERN.matcher(value).matches();
+ }
+
+ /**
+ * Checks if specified string is absolute IRI
+ * @param value value to check
+ * @return true if value is absolute IRI
+ */
+ public static boolean isAbsoluteIri(String value) {
+ return ABS_HIER_IRI_PATTERN.matcher(value).matches();
+ }
+
+ /**
+ * Checks if specified string is URN
+ * @param value value to check
+ * @return true if value is URN
+ */
+ public static boolean isUrn(String value) {
+ return URN_PATTERN.matcher(value).matches();
+ }
+
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/CharOutputSink.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/CharOutputSink.java
new file mode 100644
index 00000000..c766c5f4
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/CharOutputSink.java
@@ -0,0 +1,199 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.sink;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.ParseException;
+import io.github.sparqlanything.html.org.semarglproject.sink.CharSink;
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.nio.charset.Charset;
+
+/**
+ * Implementation of {@link io.github.sparqlanything.html.org.semarglproject.sink.CharSink}. Provides bridging to Java IO APIs
+ * ({@link Writer}, {@link OutputStream}, {@link File}).
+ */
+public final class CharOutputSink implements CharSink {
+
+ private File file;
+ private Writer writer;
+ private OutputStream outputStream;
+ private boolean closeOnEndStream;
+ private final Charset charset;
+
+ private static final short BATCH_SIZE = 256;
+ private StringBuilder buffer;
+ private short bufferSize;
+
+ /**
+ * Creates class instance with default charset encoding..
+ */
+ public CharOutputSink() {
+ this(Charset.defaultCharset());
+ }
+
+ /**
+ * Creates class instance with specified charset encoding.
+ * @param charset charset
+ */
+ public CharOutputSink(Charset charset) {
+ this.charset = charset;
+ }
+
+ /**
+ * Creates class instance with specified charset name.
+ * @param charsetName charset name
+ */
+ public CharOutputSink(String charsetName) {
+ this.charset = Charset.forName(charsetName);
+ }
+
+ /**
+ * Redirects output to specified file
+ * @param file output file
+ */
+ public void connect(File file) {
+ this.file = file;
+ this.writer = null;
+ this.outputStream = null;
+ this.closeOnEndStream = true;
+ }
+
+ /**
+ * Redirects output to specified writer
+ * @param writer output writer
+ */
+ public void connect(Writer writer) {
+ this.file = null;
+ this.writer = writer;
+ this.outputStream = null;
+ this.closeOnEndStream = false;
+ }
+
+ /**
+ * Redirects output to specified stream
+ * @param outputStream output stream
+ */
+ public void connect(OutputStream outputStream) {
+ this.file = null;
+ this.writer = null;
+ this.outputStream = outputStream;
+ this.closeOnEndStream = false;
+ }
+
+ @Override
+ public CharOutputSink process(String str) throws ParseException {
+ buffer.append(str);
+ bufferSize += str.length();
+ writeBuffer();
+ return this;
+ }
+
+ @Override
+ public CharOutputSink process(char ch) throws ParseException {
+ buffer.append(ch);
+ bufferSize++;
+ writeBuffer();
+ return this;
+ }
+
+ @Override
+ public CharOutputSink process(char[] buffer, int start, int count) throws ParseException {
+ this.buffer.append(buffer, start, count);
+ bufferSize += count;
+ writeBuffer();
+ return this;
+ }
+
+ private void writeBuffer() {
+ if (bufferSize >= BATCH_SIZE) {
+ try {
+ try {
+ writer.write(buffer.toString());
+ } catch (IOException e) {
+ throw new ParseException(e);
+ }
+ } catch (ParseException e) {
+ // do nothing
+ }
+ buffer = new StringBuilder(BATCH_SIZE);
+ bufferSize = 0;
+ }
+ }
+
+ @Override
+ public void setBaseUri(String baseUri) {
+ }
+
+ @Override
+ public void startStream() throws ParseException {
+ buffer = new StringBuilder();
+ bufferSize = 0;
+ if (writer == null) {
+ if (file != null) {
+ try {
+ writer = new OutputStreamWriter(new FileOutputStream(file), charset);
+ } catch (FileNotFoundException e) {
+ throw new ParseException(e);
+ }
+ } else if (outputStream != null) {
+ writer = new OutputStreamWriter(outputStream, charset);
+ }
+ }
+ }
+
+ @Override
+ public void endStream() throws ParseException {
+ buffer.append("\n");
+ bufferSize = BATCH_SIZE;
+ writeBuffer();
+ try {
+ writer.flush();
+ } catch (IOException e) {
+ throw new ParseException(e);
+ }
+ if (closeOnEndStream) {
+ if (writer != null) {
+ closeQuietly(writer);
+ writer = null;
+ } else if (outputStream != null) {
+ closeQuietly(outputStream);
+ outputStream = null;
+ }
+ }
+ }
+
+ @Override
+ public boolean setProperty(String key, Object value) {
+ return false;
+ }
+
+ private static void closeQuietly(Closeable closeable) {
+ try {
+ if (closeable != null) {
+ closeable.close();
+ }
+ } catch (IOException ioe) {
+ // ignore
+ }
+ }
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/CharSink.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/CharSink.java
new file mode 100644
index 00000000..904ed5d8
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/CharSink.java
@@ -0,0 +1,51 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.sink;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.ParseException;
+import io.github.sparqlanything.html.org.semarglproject.sink.DataSink;
+
+/**
+ * Interface for handling events from CharSource
+ */
+public interface CharSink extends DataSink {
+
+ /**
+ * Callback for string processing
+ *
+ * @param str string for processing
+ * @throws ParseException
+ */
+ CharSink process(String str) throws ParseException;
+
+ /**
+ * Callback for char processing
+ *
+ * @param ch char for processing
+ * @throws ParseException
+ */
+ CharSink process(char ch) throws ParseException;
+
+ /**
+ * Callback for buffer processing
+ *
+ * @param buffer char buffer for processing
+ * @param start position to start
+ * @param count count of chars to process
+ * @throws ParseException
+ */
+ CharSink process(char[] buffer, int start, int count) throws ParseException;
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/DataSink.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/DataSink.java
new file mode 100644
index 00000000..475d5a53
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/DataSink.java
@@ -0,0 +1,50 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.sink;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.ParseException;
+
+/**
+ * Base sink interface.
+ */
+public interface DataSink {
+
+ /**
+ * Sets document base URI. Must be called befor start stream event.
+ * @param baseUri base URI
+ */
+ void setBaseUri(String baseUri);
+
+ /**
+ * Callback for start stream event.
+ * @throws ParseException
+ */
+ void startStream() throws ParseException;
+
+ /**
+ * Callback for end stream event.
+ * @throws ParseException
+ */
+ void endStream() throws ParseException;
+
+ /**
+ * Key-value based settings. Property settings are passed to child sinks.
+ * @param key property key
+ * @param value property value
+ * @return true if at least one sink understands specified property, false otherwise
+ */
+ boolean setProperty(String key, Object value);
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/Pipe.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/Pipe.java
new file mode 100644
index 00000000..6ab7ae6c
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/Pipe.java
@@ -0,0 +1,54 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.sink;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.ParseException;
+import io.github.sparqlanything.html.org.semarglproject.sink.DataSink;
+
+/**
+ * Base class for pipeline procecessing blocks with one source and one sink.
+ * @param class of output sink
+ */
+public abstract class Pipe implements DataSink {
+
+ protected final S sink;
+
+ protected Pipe(S sink) {
+ this.sink = sink;
+ }
+
+ @Override
+ public void startStream() throws ParseException {
+ sink.startStream();
+ }
+
+ @Override
+ public void endStream() throws ParseException {
+ sink.endStream();
+ }
+
+ @Override
+ public final boolean setProperty(String key, Object value) {
+ boolean sinkResult = false;
+ if (sink != null) {
+ sinkResult = sink.setProperty(key, value);
+ }
+ return setPropertyInternal(key, value) || sinkResult;
+ }
+
+ protected abstract boolean setPropertyInternal(String key, Object value);
+
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/QuadSink.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/QuadSink.java
new file mode 100644
index 00000000..4cf04161
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/QuadSink.java
@@ -0,0 +1,55 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.github.sparqlanything.html.org.semarglproject.sink;
+
+import io.github.sparqlanything.html.org.semarglproject.sink.TripleSink;
+
+/**
+ * Interface for quad consuming
+ */
+public interface QuadSink extends TripleSink {
+
+ /**
+ * Callback for handling triples with non literal object
+ * @param subj subject's IRI or BNode name
+ * @param pred predicate's IRI
+ * @param obj object's IRI or BNode name
+ * @param graph graph's IRI
+ */
+ void addNonLiteral(String subj, String pred, String obj, String graph);
+
+ /**
+ * Callback for handling triples with plain literal objects
+ * @param subj subject's IRI or BNode name
+ * @param pred predicate's IRI
+ * @param content unescaped string representation of content
+ * @param lang content's lang, can be null if no language specified
+ * @param graph graph's IRI
+ */
+ void addPlainLiteral(String subj, String pred, String content, String lang, String graph);
+
+ /**
+ * Callback for handling triples with typed literal objects
+ * @param subj subject's IRI or BNode name
+ * @param pred predicate's IRI
+ * @param content unescaped string representation of content
+ * @param type literal datatype's IRI
+ * @param graph graph's IRI
+ */
+ void addTypedLiteral(String subj, String pred, String content, String type, String graph);
+
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/TripleSink.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/TripleSink.java
new file mode 100644
index 00000000..73ca79e8
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/TripleSink.java
@@ -0,0 +1,51 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.sink;
+
+import io.github.sparqlanything.html.org.semarglproject.sink.DataSink;
+
+/**
+ * Interface for triple consuming.
+ */
+public interface TripleSink extends DataSink {
+
+ /**
+ * Callback for handling triples with non literal object
+ * @param subj subject's IRI or BNode name
+ * @param pred predicate's IRI
+ * @param obj object's IRI or BNode name
+ */
+ void addNonLiteral(String subj, String pred, String obj);
+
+ /**
+ * Callback for handling triples with plain literal objects
+ * @param subj subject's IRI or BNode name
+ * @param pred predicate's IRI
+ * @param content unescaped string representation of content
+ * @param lang content's lang, can be null if no language specified
+ */
+ void addPlainLiteral(String subj, String pred, String content, String lang);
+
+ /**
+ * Callback for handling triples with typed literal objects
+ * @param subj subject's IRI or BNode name
+ * @param pred predicate's IRI
+ * @param content unescaped string representation of content
+ * @param type literal datatype's IRI
+ */
+ void addTypedLiteral(String subj, String pred, String content, String type);
+
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/XmlSink.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/XmlSink.java
new file mode 100644
index 00000000..f27e1827
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/sink/XmlSink.java
@@ -0,0 +1,34 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.sink;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.ParseException;
+import io.github.sparqlanything.html.org.semarglproject.sink.DataSink;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.ext.LexicalHandler;
+
+/**
+ * Sink interface for streaming XML processors.
+ */
+public interface XmlSink extends DataSink, ContentHandler, LexicalHandler {
+
+ /**
+ * Unwraps underlying ParseException from SAXException or
+ * wraps generic SAXException with ParseException.
+ */
+ ParseException processException(SAXException e);
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/source/AbstractSource.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/source/AbstractSource.java
new file mode 100644
index 00000000..3f304dd2
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/source/AbstractSource.java
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.source;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.ParseException;
+import io.github.sparqlanything.html.org.semarglproject.sink.DataSink;
+
+import java.io.InputStream;
+import java.io.Reader;
+
+abstract class AbstractSource {
+
+ protected final S sink;
+
+ protected AbstractSource(S sink) {
+ this.sink = sink;
+ }
+
+ protected abstract void process(Reader reader, String mimeType, String baseUri) throws ParseException;
+
+ protected abstract void process(InputStream inputStream, String mimeType, String baseUri) throws ParseException;
+
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/source/BaseStreamProcessor.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/source/BaseStreamProcessor.java
new file mode 100644
index 00000000..6864e399
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/source/BaseStreamProcessor.java
@@ -0,0 +1,200 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.source;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.ParseException;
+import io.github.sparqlanything.html.org.semarglproject.sink.CharSink;
+import io.github.sparqlanything.html.org.semarglproject.sink.DataSink;
+import io.github.sparqlanything.html.org.semarglproject.sink.XmlSink;
+import io.github.sparqlanything.html.org.semarglproject.source.AbstractSource;
+import io.github.sparqlanything.html.org.semarglproject.source.CharSource;
+import io.github.sparqlanything.html.org.semarglproject.source.XmlSource;
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLConnection;
+
+/**
+ * Pipeline managing class to subclass from.
+ */
+public abstract class BaseStreamProcessor {
+
+ protected abstract void startStream() throws ParseException;
+
+ protected abstract void endStream() throws ParseException;
+
+ protected abstract void processInternal(Reader reader, String mimeType, String baseUri) throws ParseException;
+
+ protected abstract void processInternal(InputStream inputStream, String mimeType,
+ String baseUri) throws ParseException;
+
+ /**
+ * Key-value based settings. Property settings are passed to child sinks.
+ * @param key property key
+ * @param value property value
+ * @return true if at least one sink understands specified property, false otherwise
+ */
+ public abstract boolean setProperty(String key, Object value);
+
+ /**
+ * Processes specified document's file using file path as base URI
+ * @param file document's file
+ * @throws ParseException
+ */
+ public final void process(File file) throws ParseException {
+ String baseUri = "file://" + file.getAbsolutePath();
+ process(file, baseUri);
+ }
+
+ /**
+ * Processes specified document's file
+ * @param file document's file
+ * @param baseUri document's URI
+ * @throws ParseException
+ */
+ public final void process(File file, String baseUri) throws ParseException {
+ FileReader reader;
+ try {
+ reader = new FileReader(file);
+ } catch (FileNotFoundException e) {
+ throw new ParseException(e);
+ }
+ try {
+ process(reader, null, baseUri);
+ } finally {
+ closeQuietly(reader);
+ }
+ }
+
+ /**
+ * Processes document pointed by specified URI
+ * @param uri document's URI
+ * @throws ParseException
+ */
+ public final void process(String uri) throws ParseException {
+ process(uri, uri);
+ }
+
+ /**
+ * Processes document pointed by specified URI. Uses specified URI as document's base.
+ * @param uri document's URI
+ * @param baseUri document's URI
+ * @throws ParseException
+ */
+ public final void process(String uri, String baseUri) throws ParseException {
+ URL url;
+ try {
+ url = new URL(uri);
+ } catch (MalformedURLException e) {
+ throw new ParseException(e);
+ }
+ try {
+ URLConnection urlConnection = url.openConnection();
+ String mimeType = urlConnection.getContentType();
+ InputStream inputStream = urlConnection.getInputStream();
+ try {
+ process(inputStream, mimeType, baseUri);
+ } finally {
+ closeQuietly(inputStream);
+ }
+ } catch (IOException e) {
+ throw new ParseException(e);
+ }
+ }
+
+ /**
+ * Processes stream input for document
+ * @param inputStream document's input stream
+ * @param baseUri document's base URI
+ * @throws ParseException
+ */
+ public void process(InputStream inputStream, String baseUri) throws ParseException {
+ process(inputStream, null, baseUri);
+ }
+
+ /**
+ * Processes stream input for document
+ * @param inputStream document's input stream
+ * @param mimeType document's MIME type
+ * @param baseUri document's base URI
+ * @throws ParseException
+ */
+ public final void process(InputStream inputStream, String mimeType, String baseUri) throws ParseException {
+ startStream();
+ try {
+ processInternal(inputStream, mimeType, baseUri);
+ } finally {
+ endStream();
+ }
+ }
+
+ /**
+ * Processes reader input for document's
+ * @param reader document's reader
+ * @throws ParseException
+ */
+ public void process(Reader reader, String baseUri) throws ParseException {
+ process(reader, null, baseUri);
+ }
+
+ /**
+ * Processes reader input for document's
+ * @param reader document's reader
+ * @param mimeType document's MIME type
+ * @param baseUri document's base URI
+ * @throws ParseException
+ */
+ public final void process(Reader reader, String mimeType, String baseUri) throws ParseException {
+ startStream();
+ try {
+ processInternal(reader, mimeType, baseUri);
+ } finally {
+ endStream();
+ }
+ }
+
+ /**
+ * Creates source appropriate for specified sink.
+ * @param sink sink to create source for
+ * @return new instance of source which can stream to sink
+ */
+ protected static io.github.sparqlanything.html.org.semarglproject.source.AbstractSource createSourceForSink(DataSink sink) {
+ if (sink instanceof CharSink) {
+ return new io.github.sparqlanything.html.org.semarglproject.source.CharSource((CharSink) sink);
+ } else if (sink instanceof XmlSink) {
+ return new io.github.sparqlanything.html.org.semarglproject.source.XmlSource((XmlSink) sink);
+ }
+ return null;
+ }
+
+ static void closeQuietly(Closeable closeable) {
+ try {
+ if (closeable != null) {
+ closeable.close();
+ }
+ } catch (IOException ioe) {
+ // ignore
+ }
+ }
+
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/source/CharSource.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/source/CharSource.java
new file mode 100644
index 00000000..efc6392c
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/source/CharSource.java
@@ -0,0 +1,63 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.source;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.ParseException;
+import io.github.sparqlanything.html.org.semarglproject.sink.CharSink;
+import io.github.sparqlanything.html.org.semarglproject.source.AbstractSource;
+import io.github.sparqlanything.html.org.semarglproject.source.BaseStreamProcessor;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.Charset;
+
+final class CharSource extends AbstractSource {
+
+ CharSource(CharSink sink) {
+ super(sink);
+ }
+
+ @Override
+ public void process(Reader reader, String mimeType, String baseUri) throws ParseException {
+ BufferedReader bufferedReader = new BufferedReader(reader);
+ try {
+ sink.setBaseUri(baseUri);
+ char[] buffer = new char[512];
+ int read;
+ while ((read = bufferedReader.read(buffer)) != -1) {
+ sink.process(buffer, 0, read);
+ }
+ } catch (IOException e) {
+ throw new ParseException(e);
+ } finally {
+ BaseStreamProcessor.closeQuietly(bufferedReader);
+ }
+ }
+
+ @Override
+ public void process(InputStream inputStream, String mimeType, String baseUri) throws ParseException {
+ Reader reader = new InputStreamReader(inputStream, Charset.forName("UTF-8"));
+ try {
+ process(reader, mimeType, baseUri);
+ } finally {
+ BaseStreamProcessor.closeQuietly(reader);
+ }
+ }
+
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/source/StreamProcessor.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/source/StreamProcessor.java
new file mode 100644
index 00000000..08bb428b
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/source/StreamProcessor.java
@@ -0,0 +1,109 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.source;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.ParseException;
+import io.github.sparqlanything.html.org.semarglproject.sink.DataSink;
+import io.github.sparqlanything.html.org.semarglproject.source.AbstractSource;
+import io.github.sparqlanything.html.org.semarglproject.source.BaseStreamProcessor;
+import io.github.sparqlanything.html.org.semarglproject.source.XmlSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+import java.io.InputStream;
+import java.io.Reader;
+
+/**
+ * Simple pipeline managing wrapper. Automatically instantiates source appropriate for specified sink.
+ * Provides processing and setup methods.
+ *
+ * List of supported properties:
+ *
+ * {@link #XML_READER_PROPERTY}
+ *
+ */
+public final class StreamProcessor extends BaseStreamProcessor {
+
+ /**
+ * Used as a key with {@link #setProperty(String, Object)} method.
+ * Allows to specify custom {@link XMLReader} used with SAX parsers.
+ */
+ public static final String XML_READER_PROPERTY = "http://semarglproject.org/core/properties/xml-parser";
+
+ /**
+ * Used as a key with {@link #setProperty(String, Object)} method.
+ * Enables or disables error recovery mechanism.
+ */
+ public static final String ENABLE_ERROR_RECOVERY =
+ "http://semarglproject.org/core/properties/enable-error-recovery";
+
+ /**
+ * Used as a key with {@link #setProperty(String, Object)} method.
+ * Allows to specify handler for processor events.
+ * Subclass of {@link io.github.sparqlanything.html.org.semarglproject.rdf.ProcessorGraphHandler} must be passed as a value.
+ */
+ public static final String PROCESSOR_GRAPH_HANDLER_PROPERTY =
+ "http://semarglproject.org/core/properties/processor-graph-handler";
+
+ private final DataSink sink;
+ private final io.github.sparqlanything.html.org.semarglproject.source.AbstractSource source;
+
+ /**
+ * Instantiates stream processor for pipe starting with specified sink.
+ * @param sink pipe's input
+ */
+ public StreamProcessor(DataSink sink) {
+ this.sink = sink;
+ this.source = createSourceForSink(sink);
+ }
+
+ @Override
+ public void processInternal(InputStream inputStream, String mimeType, String baseUri) throws ParseException {
+ source.process(inputStream, mimeType, baseUri);
+ }
+
+ @Override
+ protected void startStream() throws ParseException {
+ sink.startStream();
+ }
+
+ @Override
+ protected void endStream() throws ParseException {
+ sink.endStream();
+ }
+
+ @Override
+ public void processInternal(Reader reader, String mimeType, String baseUri) throws ParseException {
+ source.process(reader, mimeType, baseUri);
+ }
+
+ @Override
+ public boolean setProperty(String key, Object value) {
+ boolean result = false;
+ if (XML_READER_PROPERTY.equals(key) && value instanceof XMLReader && source instanceof io.github.sparqlanything.html.org.semarglproject.source.XmlSource) {
+ try {
+ if (value != null) {
+ ((XmlSource) source).setXmlReader((XMLReader) value);
+ result = true;
+ }
+ } catch(SAXException e) {
+ throw new IllegalArgumentException("XMLReader was not able to be initialized", e);
+ }
+ }
+ return sink.setProperty(key, value) || result;
+ }
+
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/source/XmlSource.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/source/XmlSource.java
new file mode 100644
index 00000000..14cac2b4
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/source/XmlSource.java
@@ -0,0 +1,95 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.source;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.ParseException;
+import io.github.sparqlanything.html.org.semarglproject.sink.XmlSink;
+import io.github.sparqlanything.html.org.semarglproject.source.AbstractSource;
+import io.github.sparqlanything.html.org.semarglproject.source.BaseStreamProcessor;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.helpers.XMLReaderFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.Charset;
+
+final class XmlSource extends AbstractSource {
+
+ private XMLReader xmlReader = null;
+
+ XmlSource(XmlSink sink) {
+ super(sink);
+ }
+
+ @Override
+ public void process(Reader reader, String mimeType, String baseUri) throws ParseException {
+ try {
+ initXmlReader();
+ } catch (SAXException e) {
+ throw new ParseException("Can not instantinate XMLReader", e);
+ }
+ try {
+ sink.setBaseUri(baseUri);
+ xmlReader.parse(new InputSource(reader));
+ } catch (SAXException e) {
+ ParseException wrappedException = sink.processException(e);
+ try {
+ sink.endDocument();
+ } catch (SAXException e2) {
+ // do nothing
+ }
+ throw wrappedException;
+ } catch (IOException e) {
+ throw new ParseException(e);
+ }
+ }
+
+ @Override
+ public void process(InputStream inputStream, String mimeType, String baseUri) throws ParseException {
+ Reader reader = new InputStreamReader(inputStream, Charset.forName("UTF-8"));
+ try {
+ process(reader, mimeType, baseUri);
+ } finally {
+ BaseStreamProcessor.closeQuietly(reader);
+ }
+ }
+
+ private void initXmlReader() throws SAXException {
+ if (xmlReader == null) {
+ xmlReader = getDefaultXmlReader();
+ }
+ xmlReader.setContentHandler(sink);
+ xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", sink);
+ }
+
+ public void setXmlReader(XMLReader xmlReader) throws SAXException {
+ if(xmlReader == null) {
+ this.xmlReader = getDefaultXmlReader();
+ } else {
+ this.xmlReader = xmlReader;
+ }
+ }
+
+ public static XMLReader getDefaultXmlReader() throws SAXException {
+ XMLReader result = XMLReaderFactory.createXMLReader();
+ result.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
+ return result;
+ }
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/vocab/OWL.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/vocab/OWL.java
new file mode 100644
index 00000000..fcdef073
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/vocab/OWL.java
@@ -0,0 +1,111 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.vocab;
+
+/**
+ * Defines URIs for the OWL vocabulary terms.
+ */
+public final class OWL {
+ public static final String NS = "http://www.w3.org/2002/07/owl#";
+
+ // OWL 2 RDF-Based Vocabulary
+
+ public static final String ALL_DIFFERENT = NS + "AllDifferent";
+ public static final String ALL_DISJOINT_CLASSES = NS + "AllDisjointClasses";
+ public static final String ALL_DISJOINT_PROPERTIES = NS + "AllDisjointProperties";
+ public static final String ALL_VALUES_FROM = NS + "allValuesFrom";
+ public static final String ANNOTATED_PROPERTY = NS + "annotatedProperty";
+ public static final String ANNOTATED_SOURCE = NS + "annotatedSource";
+ public static final String ANNOTATED_TARGET = NS + "annotatedTarget";
+ public static final String ANNOTATION = NS + "Annotation";
+ public static final String ANNOTATION_PROPERTY = NS + "AnnotationProperty";
+ public static final String ASSERTION_PROPERTY = NS + "assertionProperty";
+ public static final String ASYMMETRIC_PROPERTY = NS + "AsymmetricProperty";
+ public static final String AXIOM = NS + "Axiom";
+ public static final String BACKWARD_COMPATIBLE_WITH = NS + "backwardCompatibleWith";
+ public static final String BOTTOM_DATA_PROPERTY = NS + "bottomDataProperty";
+ public static final String BOTTOM_OBJECT_PROPERTY = NS + "bottomObjectProperty";
+ public static final String CARDINALITY = NS + "cardinality";
+ public static final String CLASS = NS + "Class";
+ public static final String COMPLEMENT_OF = NS + "complementOf";
+ public static final String DATA_RANGE = NS + "DataRange";
+ public static final String DATATYPE_COMPLEMENT_OF = NS + "datatypeComplementOf";
+ public static final String DATATYPE_PROPERTY = NS + "DatatypeProperty";
+ public static final String DEPRECATED = NS + "deprecated";
+ public static final String DEPRECATED_CLASS = NS + "DeprecatedClass";
+ public static final String DEPRECATED_PROPERTY = NS + "DeprecatedProperty";
+ public static final String DIFFERENT_FROM = NS + "differentFrom";
+ public static final String DISJOINT_UNION_OF = NS + "disjointUnionOf";
+ public static final String DISJOINT_WITH = NS + "disjointWith";
+ public static final String DISTINCT_MEMBERS = NS + "distinctMembers";
+ public static final String EQUIVALENT_CLASS = NS + "equivalentClass";
+ public static final String EQUIVALENT_PROPERTY = NS + "equivalentProperty";
+ public static final String FUNCTIONAL_PROPERTY = NS + "FunctionalProperty";
+ public static final String HAS_KEY = NS + "hasKey";
+ public static final String HAS_SELF = NS + "hasSelf";
+ public static final String HAS_VALUE = NS + "hasValue";
+ public static final String IMPORTS = NS + "imports";
+ public static final String INCOMPATIBLE_WITH = NS + "incompatibleWith";
+ public static final String INTERSECTION_OF = NS + "intersectionOf";
+ public static final String INVERSE_FUNCTIONAL_PROPERTY = NS + "InverseFunctionalProperty";
+ public static final String INVERSE_OF = NS + "inverseOf";
+ public static final String IRREFLEXIVE_PROPERTY = NS + "IrreflexiveProperty";
+ public static final String MAX_CARDINALITY = NS + "maxCardinality";
+ public static final String MAX_QUALIFIED_CARDINALITY = NS + "maxQualifiedCardinality";
+ public static final String MEMBERS = NS + "members";
+ public static final String MIN_CARDINALITY = NS + "minCardinality";
+ public static final String MIN_QUALIFIED_CARDINALITY = NS + "minQualifiedCardinality";
+ public static final String NAMED_INDIVIDUAL = NS + "NamedIndividual";
+ public static final String NEGATIVE_PROPERTY_ASSERTION = NS + "NegativePropertyAssertion";
+ public static final String NOTHING = NS + "Nothing";
+ public static final String OBJECT_PROPERTY = NS + "ObjectProperty";
+ public static final String ON_CLASS = NS + "onClass";
+ public static final String ON_DATA_RANGE = NS + "onDataRange";
+ public static final String ON_DATATYPE = NS + "onDatatype";
+ public static final String ONE_OF = NS + "oneOf";
+ public static final String ON_PROPERTY = NS + "onProperty";
+ public static final String ON_PROPERTIES = NS + "onProperties";
+ public static final String ONTOLOGY = NS + "Ontology";
+ public static final String ONTOLOGY_PROPERTY = NS + "OntologyProperty";
+ public static final String PRIOR_VERSION = NS + "priorVersion";
+ public static final String PROPERTY_CHAIN_AXIOM = NS + "propertyChainAxiom";
+ public static final String PROPERTY_DISJOINT_WITH = NS + "propertyDisjointWith";
+ public static final String QUALIFIED_CARDINALITY = NS + "qualifiedCardinality";
+ public static final String REFLEXIVE_PROPERTY = NS + "ReflexiveProperty";
+ public static final String RESTRICTION = NS + "Restriction";
+ public static final String SAME_AS = NS + "sameAs";
+ public static final String SOME_VALUES_FROM = NS + "someValuesFrom";
+ public static final String SOURCE_INDIVIDUAL = NS + "sourceIndividual";
+ public static final String SYMMETRIC_PROPERTY = NS + "SymmetricProperty";
+ public static final String TARGET_INDIVIDUAL = NS + "targetIndividual";
+ public static final String TARGET_VALUE = NS + "targetValue";
+ public static final String THING = NS + "Thing";
+ public static final String TOP_DATA_PROPERTY = NS + "topDataProperty";
+ public static final String TOP_OBJECT_PROPERTY = NS + "topObjectProperty";
+ public static final String TRANSITIVE_PROPERTY = NS + "TransitiveProperty";
+ public static final String UNION_OF = NS + "unionOf";
+ public static final String VERSION_INFO = NS + "versionInfo";
+ public static final String VERSION_IRI = NS + "versionIRI";
+ public static final String WITH_RESTRICTIONS = NS + "withRestrictions";
+
+ // Datatypes of the OWL 2 RDF-Based Semantics
+
+ public static final String RATIONAL = NS + "rational";
+ public static final String REAL = NS + "real";
+
+ private OWL() {
+ }
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/vocab/RDF.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/vocab/RDF.java
new file mode 100644
index 00000000..27aaee0a
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/vocab/RDF.java
@@ -0,0 +1,82 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.vocab;
+
+/**
+ * Defines URIs for the RDF vocabulary terms and bnode constans used by framework.
+ */
+public final class RDF {
+
+ public static final String BNODE_PREFIX = "_:";
+
+ // indicates that short bnode syntax shouldn't be used for this node
+ public static final String SHORTENABLE_BNODE_SUFFIX = "sbl";
+
+ public static final String NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
+
+ // Basic classes and properties
+
+ public static final String PROPERTY = NS + "Property";
+ public static final String XML_LITERAL = NS + "XMLLiteral";
+
+ public static final String TYPE = NS + "type";
+ public static final String VALUE = NS + "value";
+
+ // Container and collection classes and properties
+
+ public static final String ALT = NS + "Alt";
+ public static final String BAG = NS + "Bag";
+ public static final String SEQ = NS + "Seq";
+ public static final String LIST = NS + "List";
+
+ public static final String FIRST = NS + "first";
+ public static final String NIL = NS + "nil";
+ public static final String REST = NS + "rest";
+
+ // Reification
+
+ public static final String STATEMENT = NS + "Statement";
+
+ public static final String OBJECT = NS + "object";
+ public static final String PREDICATE = NS + "predicate";
+ public static final String SUBJECT = NS + "subject";
+
+ // Syntax names
+
+ public static final String DESCRIPTION = NS + "Description";
+ public static final String ID = NS + "ID";
+ public static final String RDF = NS + "RDF";
+
+ public static final String ABOUT = NS + "about";
+ public static final String DATATYPE = NS + "datatype";
+ public static final String LI = NS + "li";
+ public static final String NODEID = NS + "nodeID";
+ public static final String PARSE_TYPE = NS + "parseType";
+ public static final String RESOURCE = NS + "resource";
+
+ // Deprecated
+
+ @Deprecated
+ public static final String ABOUT_EACH = NS + "aboutEach";
+ @Deprecated
+ public static final String ABOUT_EACH_PREFIX = NS + "aboutEachPrefix";
+ @Deprecated
+ public static final String BAG_ID = NS + "bagID";
+
+ private RDF() {
+ }
+
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/vocab/RDFS.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/vocab/RDFS.java
new file mode 100644
index 00000000..66784fc4
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/vocab/RDFS.java
@@ -0,0 +1,43 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.vocab;
+
+/**
+ * Defines URIs for the RDFS vocabulary terms.
+ */
+public final class RDFS {
+
+ public static final String NS = "http://www.w3.org/2000/01/rdf-schema#";
+
+ public static final String DOMAIN = NS + "domain";
+ public static final String RANGE = NS + "range";
+ public static final String RESOURCE = NS + "Resource";
+ public static final String LITERAL = NS + "Literal";
+ public static final String DATATYPE = NS + "Datatype";
+ public static final String CLASS = NS + "Class";
+ public static final String SUB_CLASS_OF = NS + "subClassOf";
+ public static final String SUB_PROPERTY_OF = NS + "subPropertyOf";
+ public static final String MEMBER = NS + "member";
+ public static final String CONTAINER = NS + "Container";
+ public static final String CONTAINER_MEMBERSHIP_PROPERTY = NS + "ContainerMembershipProperty";
+ public static final String COMMENT = NS + "comment";
+ public static final String SEE_ALSO = NS + "seeAlso";
+ public static final String IS_DEFINED_BY = NS + "isDefinedBy";
+ public static final String LABEL = NS + "label";
+
+ private RDFS() {
+ }
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/vocab/RDFa.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/vocab/RDFa.java
new file mode 100644
index 00000000..f3e384f7
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/vocab/RDFa.java
@@ -0,0 +1,58 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.vocab;
+
+/**
+ * Defines URIs for the RDFa vocabulary terms and RDFa attributes and versions.
+ */
+public final class RDFa {
+
+ public static final String NS = "http://www.w3.org/ns/rdfa#";
+
+ public static final short VERSION_10 = 1;
+ public static final short VERSION_11 = 2;
+
+ public static final String ABOUT_ATTR = "about";
+ public static final String CONTENT_ATTR = "content";
+ public static final String DATATYPE_ATTR = "datatype";
+ public static final String HREF_ATTR = "href";
+ public static final String ID_ATTR = "id";
+ public static final String INLIST_ATTR = "inlist";
+ public static final String PREFIX_ATTR = "prefix";
+ public static final String PROFILE_ATTR = "profile";
+ public static final String PROPERTY_ATTR = "property";
+ public static final String REL_ATTR = "rel";
+ public static final String RESOURCE_ATTR = "resource";
+ public static final String REV_ATTR = "rev";
+ public static final String ROLE_ATTR = "role";
+ public static final String SRC_ATTR = "src";
+ public static final String TYPEOF_ATTR = "typeof";
+ public static final String VOCAB_ATTR = "vocab";
+
+ public static final String CONTEXT = NS + "context";
+ public static final String WARNING = NS + "Warning";
+ public static final String PREFIX_REDEFINITION = NS + "PrefixRedefinition";
+ public static final String UNRESOLVED_CURIE = NS + "UnresolvedCURIE";
+ public static final String UNRESOLVED_TERM = NS + "UnresolvedTerm";
+ public static final String ERROR = NS + "Error";
+ public static final String USES_VOCABULARY = NS + "usesVocabulary";
+
+ public static final String COPY = NS + "copy";
+ public static final String PATTERN = NS + "Pattern";
+
+ private RDFa() {
+ }
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/vocab/XSD.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/vocab/XSD.java
new file mode 100644
index 00000000..12eb9e42
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/vocab/XSD.java
@@ -0,0 +1,72 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.vocab;
+
+/**
+ * Defines URIs for the XSD primitive datatypes.
+ */
+public final class XSD {
+
+ public static final String NS = "http://www.w3.org/2001/XMLSchema#";
+
+ public static final String ANY_URI = NS + "anyURI";
+ public static final String BASE64_BINARY = NS + "base64Binary";
+ public static final String DATE = NS + "date";
+ public static final String DATE_TIME = NS + "dateTime";
+ public static final String DECIMAL = NS + "decimal";
+ public static final String DURATION = NS + "duration";
+ public static final String ENTITIES = NS + "ENTITIES";
+ public static final String ENTITY = NS + "ENTITY";
+ public static final String G_DAY = NS + "gDay";
+ public static final String G_MONTH = NS + "gMonth";
+ public static final String G_MONTH_DAY = NS + "gMonthDay";
+ public static final String G_YEAR = NS + "gYear";
+ public static final String G_YEAR_MONTH = NS + "gYearMonth";
+ public static final String HEX_BINARY = NS + "hexBinary";
+ public static final String ID = NS + "ID";
+ public static final String IDREF = NS + "IDREF";
+ public static final String IDREFS = NS + "IDREFS";
+ public static final String INTEGER = NS + "integer";
+ public static final String LANGUAGE = NS + "language";
+ public static final String NAME = NS + "Name";
+ public static final String NC_NAME = NS + "NCName";
+ public static final String NEGATIVE_INTEGER = NS + "negativeInteger";
+ public static final String NMTOKEN = NS + "NMTOKEN";
+ public static final String NMTOKENS = NS + "NMTOKENS";
+ public static final String NON_NEGATIVE_INTEGER = NS + "nonNegativeInteger";
+ public static final String NON_POSITIVE_INTEGER = NS + "nonPositiveInteger";
+ public static final String NORMALIZED_STRING = NS + "normalizedString";
+ public static final String NOTATION = NS + "NOTATION";
+ public static final String POSITIVE_INTEGER = NS + "positiveInteger";
+ public static final String QNAME = NS + "QName";
+ public static final String TIME = NS + "time";
+ public static final String TOKEN = NS + "token";
+ public static final String UNSIGNED_BYTE = NS + "unsignedByte";
+ public static final String UNSIGNED_INT = NS + "unsignedInt";
+ public static final String UNSIGNED_LONG = NS + "unsignedLong";
+ public static final String UNSIGNED_SHORT = NS + "unsignedShort";
+ public static final String BOOLEAN = NS + "boolean";
+ public static final String BYTE = NS + "byte";
+ public static final String DOUBLE = NS + "double";
+ public static final String FLOAT = NS + "float";
+ public static final String INT = NS + "int";
+ public static final String LONG = NS + "long";
+ public static final String SHORT = NS + "short";
+ public static final String STRING = NS + "string";
+
+ private XSD() {
+ }
+}
diff --git a/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/xml/XmlUtils.java b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/xml/XmlUtils.java
new file mode 100644
index 00000000..65e96a6b
--- /dev/null
+++ b/sparql-anything-html/src/main/java/io/github/sparqlanything/html/org/semarglproject/xml/XmlUtils.java
@@ -0,0 +1,186 @@
+/**
+ * Copyright 2012-2013 the Semargl contributors. See AUTHORS for more details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.github.sparqlanything.html.org.semarglproject.xml;
+
+import org.xml.sax.Attributes;
+
+import java.util.BitSet;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+/**
+ * Lightweight XML utils for internal usage
+ */
+public final class XmlUtils {
+
+ /**
+ * XML identifier start char checker
+ */
+ public static final BitSet ID_START = new BitSet();
+
+ /**
+ * XML identifier char checker
+ */
+ public static final BitSet ID = new BitSet();
+
+ /**
+ * XML whitespace char checker
+ */
+ public static final BitSet WHITESPACE = new BitSet();
+
+ /**
+ * XML quote char checker
+ */
+ public static final BitSet QUOTE = new BitSet();
+
+ /**
+ * XML greater char checker
+ */
+ public static final BitSet GT = new BitSet();
+
+ /**
+ * XML right square bracket char checker
+ */
+ public static final BitSet RIGHT_SQ_BRACKET = new BitSet();
+
+ /**
+ * XML lang attribute name
+ */
+ public static final String XML_LANG = "xml:lang";
+
+ /**
+ * XML base attribute name
+ */
+ public static final String XML_BASE = "xml:base";
+
+ /**
+ * Lang attribute name
+ */
+ public static final String LANG = "lang";
+
+ private static final String NC_NAME_START_CHAR = "A-Za-z_\u00C0-\u00D6\u00D8-\u00F6"
+ + "\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F"
+ + "\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD";
+ // \u10000-\uEFFFF
+ private static final String NC_NAME_CHAR = "-.0-9\u00B7\u0300-\u036F\u203F-\u2040";
+ private static final Pattern XML_NAME_PATTERN = Pattern.compile("[" + NC_NAME_START_CHAR + "]"
+ + "[" + NC_NAME_START_CHAR + NC_NAME_CHAR + "]*");
+
+ private static final String ID_START_STR = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz";
+ private static final String ID_OTHER_STR = "-0123456789:";
+ private static final String WHITESPACE_STR = " \t\r\n\f\u000B\u001C\u001D\u001E\u00A0\u2007\u202F";
+
+ private static final BitSet ID_START_OR_GT = new BitSet();
+ private static final BitSet ID_START_OR_EQUAL = new BitSet();
+ private static final BitSet ID_OR_QUOTE_OR_APOS = new BitSet();
+ private static final BitSet APOS = new BitSet();
+ private static final BitSet LT = new BitSet();
+
+ static {
+ LT.set('<');
+ GT.set('>');
+ APOS.set('\'');
+ QUOTE.set('\"');
+ RIGHT_SQ_BRACKET.set(']');
+ for (int i = 0; i < ID_START_STR.length(); i++) {
+ char c = ID_START_STR.charAt(i);
+ ID_START.set(c);
+ ID_START_OR_EQUAL.set(c);
+ ID_START_OR_GT.set(c);
+ ID.set(c);
+ ID_OR_QUOTE_OR_APOS.set(c);
+ }
+ for (int i = 0; i < ID_OTHER_STR.length(); i++) {
+ char c = ID_OTHER_STR.charAt(i);
+ ID.set(c);
+ ID_OR_QUOTE_OR_APOS.set(c);
+ }
+ ID_START_OR_GT.set('>');
+ ID_START_OR_EQUAL.set('=');
+ ID_OR_QUOTE_OR_APOS.set('\'');
+ ID_OR_QUOTE_OR_APOS.set('\"');
+ for (int i = 0; i < WHITESPACE_STR.length(); i++) {
+ char c = WHITESPACE_STR.charAt(i);
+ WHITESPACE.set(c);
+ }
+ }
+
+ private XmlUtils() {
+ }
+
+
+ /**
+ * Checks if specified value is valid XML name
+ * @param value value to check
+ * @return true if value is valid XML name
+ */
+ public static boolean isValidNCName(String value) {
+ return XML_NAME_PATTERN.matcher(value).matches();
+ }
+
+ /**
+ * Serializes node open tag
+ * @param nsUri node's NS URI
+ * @param qname node's QName
+ * @param nsMappings node's namespace mappings
+ * @param attrs node's attributes
+ * @param optimizeNs should unused namespaces be skipped
+ * @return string representation of open tag
+ */
+ public static String serializeOpenTag(String nsUri, String qname, Map nsMappings,
+ Attributes attrs, boolean optimizeNs) {
+ String result = "<" + qname;
+ if (nsUri != null && nsUri.length() > 0) {
+ int idx = Math.max(qname.indexOf(':'), 0);
+ nsMappings.put(qname.substring(0, idx), nsUri);
+ }
+ for (int i = 0; i < attrs.getLength(); i++) {
+ result += " " + attrs.getQName(i) + "=\"" + attrs.getValue(i) + "\"";
+ }
+ for (String key : nsMappings.keySet()) {
+ if (optimizeNs && isPrefixIgnorable(key, qname, attrs)) {
+ continue;
+ }
+
+ if (key.isEmpty()) {
+ String value = nsMappings.get(key);
+ result += " xmlns=\"" + value + "\"";
+ } else {
+ result += " xmlns:" + key + "=\"" + nsMappings.get(key) + "\"";
+ }
+ }
+ result += ">";
+ return result;
+ }
+
+ private static boolean isPrefixIgnorable(String key, String qname, Attributes attrs) {
+ boolean usagesFound = key.isEmpty() && qname.indexOf(':') == -1 || key.length() > 0
+ && qname.startsWith(key + ":");
+ for (int i = 0; i < attrs.getLength(); i++) {
+ String aqn = attrs.getQName(i);
+ if (aqn.startsWith("xml")) {
+ continue;
+ }
+ if (key.isEmpty() && aqn.indexOf(':') == -1 || key.length() > 0
+ && aqn.startsWith(key + ":")) {
+ usagesFound = true;
+ break;
+ }
+ }
+ return !usagesFound;
+ }
+
+}
diff --git a/sparql-anything-html/src/test/java/io/github/sparqlanything/html/RDFaSandbox.java b/sparql-anything-html/src/test/java/io/github/sparqlanything/html/RDFaSandbox.java
new file mode 100644
index 00000000..9cfa8a2c
--- /dev/null
+++ b/sparql-anything-html/src/test/java/io/github/sparqlanything/html/RDFaSandbox.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2024 SPARQL Anything Contributors @ http://github.com/sparql-anything
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.github.sparqlanything.html;
+
+import io.github.sparqlanything.html.org.semarglproject.rdf.ParseException;
+import io.github.sparqlanything.html.org.semarglproject.rdf.rdfa.RdfaParser;
+import io.github.sparqlanything.html.org.semarglproject.sink.TripleSink;
+import io.github.sparqlanything.html.org.semarglproject.source.StreamProcessor;
+import org.apache.jena.rdf.model.Model;
+import org.apache.jena.rdf.model.ModelFactory;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+
+public class RDFaSandbox {
+ private static final Logger logger = LoggerFactory.getLogger(TestRDFaParser.class);
+ @Test
+ public void RDFa() throws ParseException {
+
+ TripleSink ts = new TripleSink() {
+ @Override
+ public void addNonLiteral(String subj, String pred, String obj) {
+ logger.trace("add non literal {} {} {}", subj, pred, obj);
+ }
+
+ @Override
+ public void addPlainLiteral(String subj, String pred, String content, String lang) {
+ logger.trace("addPlainLiteral {} {} {} {}", subj, pred, content, lang);
+ }
+
+ @Override
+ public void addTypedLiteral(String subj, String pred, String content, String type) {
+ logger.trace("addTypedLiteral {} {} {} {}", subj, pred, content, type);
+ }
+
+ @Override
+ public void setBaseUri(String baseUri) {
+ logger.trace("setBaseURI {}", baseUri);
+
+ }
+
+ @Override
+ public void startStream() throws ParseException {
+ logger.trace("start stream");
+ }
+
+ @Override
+ public void endStream() throws ParseException {
+ logger.trace("end stream");
+ }
+
+ @Override
+ public boolean setProperty(String key, Object value) {
+ logger.trace("set property {} {}", key,value);
+ return false;
+ }
+ };
+
+ logger.trace("test logger");
+
+ StreamProcessor streamProcessor = new StreamProcessor(RdfaParser.connect(ts));
+ streamProcessor.process(new File("/Users/lgu/workspace/SPARQLAnything/sparql.anything/sparql-anything-html/src/test/resources/RDFa.html"));
+ }
+}
diff --git a/sparql-anything-html/src/test/java/io/github/sparqlanything/html/TestRDFaParser.java b/sparql-anything-html/src/test/java/io/github/sparqlanything/html/TestRDFaParser.java
new file mode 100644
index 00000000..3ff7a270
--- /dev/null
+++ b/sparql-anything-html/src/test/java/io/github/sparqlanything/html/TestRDFaParser.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2024 SPARQL Anything Contributors @ http://github.com/sparql-anything
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.github.sparqlanything.html;
+
+import io.github.sparqlanything.testutils.AbstractTriplifierTester;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.util.Properties;
+
+
+public class TestRDFaParser extends AbstractTriplifierTester {
+
+ public TestRDFaParser() {
+ super(new HTMLTriplifier(), new Properties(), "html", "nq");
+ this.printWholeGraph = true;
+ }
+
+ @Ignore
+ @Test
+ public void testRDFa() {
+ this.assertResultIsIsomorphicWithExpected();
+ }
+
+ protected void properties(Properties properties) {
+ properties.setProperty(HTMLTriplifier.PROPERTY_METADATA.toString(), "true");
+ }
+
+
+
+
+}
diff --git a/sparql-anything-html/src/test/resources/RDFa.html b/sparql-anything-html/src/test/resources/RDFa.html
new file mode 100644
index 00000000..692d2df7
--- /dev/null
+++ b/sparql-anything-html/src/test/resources/RDFa.html
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/sparql-anything-html/src/test/resources/RDFa.nq b/sparql-anything-html/src/test/resources/RDFa.nq
new file mode 100644
index 00000000..07288c54
--- /dev/null
+++ b/sparql-anything-html/src/test/resources/RDFa.nq
@@ -0,0 +1,60 @@
+ "Director: James Cameron (born August 16, 1954)" .
+ "Director: James Cameron (born August 16, 1954)" .
+ .
+ "Director: James Cameron (born August 16, 1954)" .
+ "Avatar Director: James Cameron (born August 16, 1954) " .
+ "Avatar Director: James Cameron (born August 16, 1954)" .
+ .
+ "" .
+ "https://schema.org/Movie" .
+ .
+ .
+ .
+ "\n\n \n
Avatar Director: James Cameron (born August 16, 1954) \n \n" .
+ "Avatar Director: James Cameron (born August 16, 1954)" .
+ .
+ .
+ .
+_:Bnode1hn5vtssbx2 .
+_:Bnode1hn5vtssbx2 "Avatar" .
+ "Avatar" .
+ "Avatar" .
+ .
+ "name" .
+ "Avatar" .
+ .
+ _:Bnode1hn5vtssbx2 .
+ "\n
Avatar Director: James Cameron (born August 16, 1954) \n" .
+ "Avatar Director: James Cameron (born August 16, 1954)" .
+ .
+ .
+ "Director: James Cameron (born August 16, 1954)" .
+ .
+ "Director: James Cameron (born August 16, 1954)" .
+ "Director: James Cameron (born August 16, 1954)" .
+ .
+ .
+ "https://schema.org/Movie" .
+ "" .
+ .
+ "Avatar Director: James Cameron (born August 16, 1954)" .
+ "Avatar Director: James Cameron (born August 16, 1954) " .
+ .
+ .
+ .
+ "Avatar Director: James Cameron (born August 16, 1954)" .
+ "\n\n \n
Avatar Director: James Cameron (born August 16, 1954) \n \n" .
+ .
+_:Bnode1hn5vtssbx2 "Avatar" .
+_:Bnode1hn5vtssbx2 .
+ "Avatar" .
+ "name" .
+ .
+ "Avatar" .
+ "Avatar" .
+