Skip to content

Commit

Permalink
Added OWL/XML and Manchester syntax as possible output formats.
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelRoeder committed Aug 25, 2023
1 parent 7e60e3b commit 0c53058
Show file tree
Hide file tree
Showing 11 changed files with 815 additions and 362 deletions.
28 changes: 22 additions & 6 deletions module.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -25,34 +25,50 @@
<parameter/outputMediaType> a alg:Parameter ;
rdfs:label "Output format" ;
rdfs:comment "The format of the output dataset." ;
rdfs:range <StreamableRDFSerialization> .
rdfs:range <KGSerialization> .

<result/output> a alg:Result ;
rdfs:label "Output dataset" ;
rdfs:comment "The output dataset that is created as result of the transformation." ;
rdfs:range prov:Entity .

<KGSerialization> a owl:Class ;
rdfs:subClassOf <KGSerialization> ;
rdfs:label "knowledge graph serializations" ;
rdfs:comment "The class of knowledge graph serializations supported by the ENEXA transformator module." .

<StreamableRDFSerialization> a owl:Class ;
rdfs:subClassOf <KGSerialization> ;
rdfs:label "stream-able RDF serializations" ;
rdfs:comment "The class of RDF serializations that can be streamed and, hence, are supported as output format by the ENEXA transformator module." .
rdfs:comment "The class of RDF serializations that can be streamed and, hence, are easier to handle as output format by the ENEXA transformator module." .

iana-a:n-quads a <StreamableRDFSerialization> ;
iana-a:n-quads a <KGSerialization>, <StreamableRDFSerialization> ;
rdfs:label "N-Quads" ;
rdfs:comment "N-Quads is a line-based, plain text format for encoding an RDF dataset." ;
rdfs:isDefinedBy <https://www.w3.org/TR/n-quads/> .

iana-a:n-triples a <StreamableRDFSerialization> ;
iana-a:n-triples a <KGSerialization>, <StreamableRDFSerialization> ;
rdfs:label "N-Triples" ;
rdfs:comment "N-Triples is a line-based, plain text format for encoding an RDF graph." ;
rdfs:isDefinedBy <https://www.w3.org/TR/n-triples/> .

iana-a:trig a <StreamableRDFSerialization> ;
iana-a:trig a <KGSerialization>, <StreamableRDFSerialization> ;
rdfs:label "TriG" ;
rdfs:comment "TriG is a concrete syntax for RDF as defined in the RDF Concepts and Abstract Syntax document, and an extension of Turtle to support representing a complete RDF Dataset." ;
rdfs:isDefinedBy <https://www.w3.org/TR/trig/> .

iana-t:turtle a <StreamableRDFSerialization> ;
iana-t:turtle a <KGSerialization>, <StreamableRDFSerialization> ;
rdfs:label "Turtle" ;
rdfs:comment "The terse RDF Triple Language (Turtle) is a concrete syntax for RDF as defined in the RDF Concepts and Abstract Syntax W3C Recommendation." ;
rdfs:isDefinedBy <https://www.w3.org/TeamSubmission/turtle/> .

iana-a:owl+xml a <KGSerialization>, <StreamableRDFSerialization> ;
rdfs:label "OWL/XML" ;
rdfs:comment "The XML serialization for OWL 2 Web Ontology Language that mirrors its structural specification." ;
rdfs:isDefinedBy <http://www.w3.org/TR/owl2-xml-serialization/> .

iana-t:owl-manchester a <KGSerialization>, <StreamableRDFSerialization> ;
rdfs:label "Manchester Syntax" ;
rdfs:comment "The Manchester syntax is a user-friendly compact syntax for OWL 2 ontologies." ;
rdfs:isDefinedBy <https://www.w3.org/TR/owl2-manchester-syntax/> .

28 changes: 22 additions & 6 deletions module.ttl.template
Original file line number Diff line number Diff line change
Expand Up @@ -24,34 +24,50 @@
<parameter/outputMediaType> a alg:Parameter ;
rdfs:label "Output format" ;
rdfs:comment "The format of the output dataset." ;
rdfs:range <StreamableRDFSerialization> .
rdfs:range <KGSerialization> .

<result/output> a alg:Result ;
rdfs:label "Output dataset" ;
rdfs:comment "The output dataset that is created as result of the transformation." ;
rdfs:range prov:Entity .

<KGSerialization> a owl:Class ;
rdfs:subClassOf <KGSerialization> ;
rdfs:label "knowledge graph serializations" ;
rdfs:comment "The class of knowledge graph serializations supported by the ENEXA transformator module." .

<StreamableRDFSerialization> a owl:Class ;
rdfs:subClassOf <KGSerialization> ;
rdfs:label "stream-able RDF serializations" ;
rdfs:comment "The class of RDF serializations that can be streamed and, hence, are supported as output format by the ENEXA transformator module." .
rdfs:comment "The class of RDF serializations that can be streamed and, hence, are easier to handle as output format by the ENEXA transformator module." .

iana-a:n-quads a <StreamableRDFSerialization> ;
iana-a:n-quads a <KGSerialization>, <StreamableRDFSerialization> ;
rdfs:label "N-Quads" ;
rdfs:comment "N-Quads is a line-based, plain text format for encoding an RDF dataset." ;
rdfs:isDefinedBy <https://www.w3.org/TR/n-quads/> .

iana-a:n-triples a <StreamableRDFSerialization> ;
iana-a:n-triples a <KGSerialization>, <StreamableRDFSerialization> ;
rdfs:label "N-Triples" ;
rdfs:comment "N-Triples is a line-based, plain text format for encoding an RDF graph." ;
rdfs:isDefinedBy <https://www.w3.org/TR/n-triples/> .

iana-a:trig a <StreamableRDFSerialization> ;
iana-a:trig a <KGSerialization>, <StreamableRDFSerialization> ;
rdfs:label "TriG" ;
rdfs:comment "TriG is a concrete syntax for RDF as defined in the RDF Concepts and Abstract Syntax document, and an extension of Turtle to support representing a complete RDF Dataset." ;
rdfs:isDefinedBy <https://www.w3.org/TR/trig/> .

iana-t:turtle a <StreamableRDFSerialization> ;
iana-t:turtle a <KGSerialization>, <StreamableRDFSerialization> ;
rdfs:label "Turtle" ;
rdfs:comment "The terse RDF Triple Language (Turtle) is a concrete syntax for RDF as defined in the RDF Concepts and Abstract Syntax W3C Recommendation." ;
rdfs:isDefinedBy <https://www.w3.org/TeamSubmission/turtle/> .

iana-a:owl+xml a <KGSerialization>, <StreamableRDFSerialization> ;
rdfs:label "OWL/XML" ;
rdfs:comment "The XML serialization for OWL 2 Web Ontology Language that mirrors its structural specification." ;
rdfs:isDefinedBy <http://www.w3.org/TR/owl2-xml-serialization/> .

iana-t:owl-manchester a <KGSerialization>, <StreamableRDFSerialization> ;
rdfs:label "Manchester Syntax" ;
rdfs:comment "The Manchester syntax is a user-friendly compact syntax for OWL 2 ontologies." ;
rdfs:isDefinedBy <https://www.w3.org/TR/owl2-manchester-syntax/> .

6 changes: 6 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@

<!-- DEPENDENCIES -->
<dependencies>
<!-- This is the version that is used by Protege -->
<dependency>
<groupId>net.sourceforge.owlapi</groupId>
<artifactId>owlapi-osgidistribution</artifactId>
<version>4.5.25</version>
</dependency>
<!-- ENEXA Java utils -->
<dependency>
<groupId>org.dice-research</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.dice_research.enexa.vocab.IANAMediaType;
import org.dice_research.rdf.RdfHelper;
import org.dice_research.sparql.SparqlQueryUtils;
import org.semanticweb.owlapi.formats.OWLXMLDocumentFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -55,6 +56,9 @@ public static void main(String[] args) {
String sharedDir = getEnvVariable("ENEXA_SHARED_DIRECTORY");
String outputDir = getEnvVariable("ENEXA_MODULE_INSTANCE_DIRECTORY");
String enexaServiceUrl = getEnvVariable("ENEXA_SERVICE_URL");
if (!enexaServiceUrl.endsWith("/")) {
enexaServiceUrl += "/";
}

Resource moduleInsResource = ResourceFactory.createResource(moduleInstance);
Resource experimentResource = ResourceFactory.createResource(experimentIri);
Expand All @@ -70,17 +74,20 @@ public static void main(String[] args) {
}
Resource targetMediaResource = RdfHelper.getObjectResource(parameterModel, moduleInsResource,
TransformVocab.outputMediaType);
if (sourceFiles.size() == 0) {
if (targetMediaResource == null) {
LOGGER.error("The output media type has not been defined. Aborting.");
return;
}
Lang outputLang = getOutputLang(targetMediaResource);
// 2. create transformer
if (!targetMediaResource.isURIResource()) {
LOGGER.error("The output media type is not an IRI. Aborting.");
return;
}
// 3. create transformer
if (!sharedDir.endsWith(File.separator)) {
sharedDir += File.separator;
}
File outputFile = null;
try (StreamingTransformator transformator = StreamingTransformator.builder().setOutputFormat(outputLang)
try (Transformator transformator = new TransformatorBuilder().setOutputFormat(targetMediaResource.getURI())
// .setCompression(compression)
// .setOutputFileName(outputFile.getName())
.setOutputDirectory(new File(outputDir)).build();) {
Expand Down Expand Up @@ -114,7 +121,7 @@ public static void main(String[] args) {
// module instance
metadata.add(moduleInsResource, TransformVocab.output, fileResource);

if (sendRequest(enexaServiceUrl, metadata) != null) {
if (sendRequest(enexaServiceUrl + "add-resource", metadata) != null) {
LOGGER.info("This module seems to have been successful.");
}
}
Expand Down Expand Up @@ -171,7 +178,7 @@ private static String getEnvVariable(String key) {
* @throws IOException in case of an IO error while reading the file
*/
protected static void addFile(Resource sourceFile, Model parameterModel, String sharedDir,
StreamingTransformator transformator) throws IOException {
Transformator transformator) throws IOException {
String enexaPath = RdfHelper.getStringValue(parameterModel, sourceFile, ENEXA.location);
Resource mediaTypeResource = RdfHelper.getObjectResource(parameterModel, sourceFile, DCAT.mediaType);
transformator.addFile2Stream(new File(EnexaPathUtils.translateEnexa2LocalPath(enexaPath, sharedDir)),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package org.dice_research.enexa.transform;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.utils.IOUtils;
import org.semanticweb.owlapi.apibinding.OWLManager;
import org.semanticweb.owlapi.formats.ManchesterSyntaxDocumentFormat;
import org.semanticweb.owlapi.formats.OWLXMLDocumentFormat;
import org.semanticweb.owlapi.io.StreamDocumentSource;
import org.semanticweb.owlapi.model.IRI;
import org.semanticweb.owlapi.model.OWLDocumentFormat;
import org.semanticweb.owlapi.model.OWLOntology;
import org.semanticweb.owlapi.model.OWLOntologyCreationException;
import org.semanticweb.owlapi.model.OWLOntologyManager;
import org.semanticweb.owlapi.model.OWLOntologyStorageException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class RDF2OntologyTransformator implements Transformator {

private static final Logger LOGGER = LoggerFactory.getLogger(RDF2OntologyTransformator.class);

protected OWLOntology ontology;
protected OWLOntologyManager manager;
protected OWLDocumentFormat outputFormat;
protected File outputFile;
protected OutputStream outputStream;

protected RDF2OntologyTransformator(OWLOntology ontology, OWLOntologyManager manager,
OWLDocumentFormat outputFormat, File outputFile, OutputStream outputStream) {
super();
this.ontology = ontology;
this.manager = manager;
this.outputFormat = outputFormat;
this.outputFile = outputFile;
this.outputStream = outputStream;
}

public static OWLDocumentFormat getFormatForMediaType(String mediaType) {
switch (mediaType) {
case "application/owl+xml":
return new OWLXMLDocumentFormat();
case "text/owl-manchester":
return new ManchesterSyntaxDocumentFormat();
default:
return null;
}
}

@Override
public void close() throws Exception {
try {
if (ontology != null) {
ontology.saveOntology(new OWLXMLDocumentFormat(), outputStream);
}
} catch (OWLOntologyStorageException e) {
throw new IOException("Error while writing the ontology.", e);
} finally {
IOUtils.closeQuietly(outputStream);
}
}

@Override
public void addFile2Stream(File file, String contentType) throws IOException {
try {
String fileName = file.getName();
try (InputStream in = new BufferedInputStream(new FileInputStream(file))) {
InputStream ins = in;
if (fileName.endsWith(".gz")) {
ins = new GzipCompressorInputStream(ins);
} else if (fileName.endsWith(".bz2")) {
ins = new BZip2CompressorInputStream(ins);
}

LOGGER.info("Adding file {} ...", file.toString());
OWLOntology readOnt = manager.loadOntologyFromOntologyDocument(new StreamDocumentSource(ins));
manager.addAxioms(ontology, readOnt.getAxioms());
}
} catch (OWLOntologyCreationException e) {
throw new IOException(
"Error while reading the ontology \"" + (file == null ? "null" : file.toString()) + "\".", e);
}
}

public static RDF2OntologyTransformator create(OWLDocumentFormat outputFormat, File outputFile,
OutputStream outputStream) throws OWLOntologyCreationException {
OWLOntologyManager manager = OWLManager.createOWLOntologyManager();
OWLOntology ontology = manager.createOntology(IRI.create(outputFile));
return new RDF2OntologyTransformator(ontology, manager, outputFormat, outputFile, outputStream);
}

public String getOutputFileName() {
return outputFile.getName();
}

public File getOutputFile() {
return outputFile;
}
}
Loading

0 comments on commit 0c53058

Please sign in to comment.