From 0c53058b1077b9c2cf833379d64b775d92994d26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20R=C3=B6der?= Date: Fri, 25 Aug 2023 08:49:15 +0200 Subject: [PATCH] Added OWL/XML and Manchester syntax as possible output formats. --- module.ttl | 28 ++- module.ttl.template | 28 ++- pom.xml | 6 + .../enexa/transform/EnexaTransformator.java | 19 +- .../transform/RDF2OntologyTransformator.java | 106 +++++++++ .../transform/StreamingTransformator.java | 182 +-------------- .../enexa/transform/Transformator.java | 84 +++++++ .../enexa/transform/TransformatorBuilder.java | 216 ++++++++++++++++++ .../transform/AbstractTransformatorTest.java | 196 ++++++++++++++++ .../RDF2OntologyTransformatorTest.java | 133 +++++++++++ .../transform/StreamingTransformatorTest.java | 179 ++------------- 11 files changed, 815 insertions(+), 362 deletions(-) create mode 100644 src/main/java/org/dice_research/enexa/transform/RDF2OntologyTransformator.java create mode 100644 src/main/java/org/dice_research/enexa/transform/Transformator.java create mode 100644 src/main/java/org/dice_research/enexa/transform/TransformatorBuilder.java create mode 100644 src/test/java/org/dice_research/enexa/transform/AbstractTransformatorTest.java create mode 100644 src/test/java/org/dice_research/enexa/transform/RDF2OntologyTransformatorTest.java diff --git a/module.ttl b/module.ttl index d1c46d3..4e32041 100644 --- a/module.ttl +++ b/module.ttl @@ -25,34 +25,50 @@ a alg:Parameter ; rdfs:label "Output format" ; rdfs:comment "The format of the output dataset." ; - rdfs:range . + rdfs:range . a alg:Result ; rdfs:label "Output dataset" ; rdfs:comment "The output dataset that is created as result of the transformation." ; rdfs:range prov:Entity . + a owl:Class ; + rdfs:subClassOf ; + rdfs:label "knowledge graph serializations" ; + rdfs:comment "The class of knowledge graph serializations supported by the ENEXA transformator module." . + a owl:Class ; + rdfs:subClassOf ; rdfs:label "stream-able RDF serializations" ; - rdfs:comment "The class of RDF serializations that can be streamed and, hence, are supported as output format by the ENEXA transformator module." . + rdfs:comment "The class of RDF serializations that can be streamed and, hence, are easier to handle as output format by the ENEXA transformator module." . -iana-a:n-quads a ; +iana-a:n-quads a , ; rdfs:label "N-Quads" ; rdfs:comment "N-Quads is a line-based, plain text format for encoding an RDF dataset." ; rdfs:isDefinedBy . -iana-a:n-triples a ; +iana-a:n-triples a , ; rdfs:label "N-Triples" ; rdfs:comment "N-Triples is a line-based, plain text format for encoding an RDF graph." ; rdfs:isDefinedBy . -iana-a:trig a ; +iana-a:trig a , ; rdfs:label "TriG" ; rdfs:comment "TriG is a concrete syntax for RDF as defined in the RDF Concepts and Abstract Syntax document, and an extension of Turtle to support representing a complete RDF Dataset." ; rdfs:isDefinedBy . -iana-t:turtle a ; +iana-t:turtle a , ; rdfs:label "Turtle" ; rdfs:comment "The terse RDF Triple Language (Turtle) is a concrete syntax for RDF as defined in the RDF Concepts and Abstract Syntax W3C Recommendation." ; rdfs:isDefinedBy . +iana-a:owl+xml a , ; + rdfs:label "OWL/XML" ; + rdfs:comment "The XML serialization for OWL 2 Web Ontology Language that mirrors its structural specification." ; + rdfs:isDefinedBy . + +iana-t:owl-manchester a , ; + rdfs:label "Manchester Syntax" ; + rdfs:comment "The Manchester syntax is a user-friendly compact syntax for OWL 2 ontologies." ; + rdfs:isDefinedBy . + diff --git a/module.ttl.template b/module.ttl.template index ef79b3d..4b3541c 100644 --- a/module.ttl.template +++ b/module.ttl.template @@ -24,34 +24,50 @@ a alg:Parameter ; rdfs:label "Output format" ; rdfs:comment "The format of the output dataset." ; - rdfs:range . + rdfs:range . a alg:Result ; rdfs:label "Output dataset" ; rdfs:comment "The output dataset that is created as result of the transformation." ; rdfs:range prov:Entity . + a owl:Class ; + rdfs:subClassOf ; + rdfs:label "knowledge graph serializations" ; + rdfs:comment "The class of knowledge graph serializations supported by the ENEXA transformator module." . + a owl:Class ; + rdfs:subClassOf ; rdfs:label "stream-able RDF serializations" ; - rdfs:comment "The class of RDF serializations that can be streamed and, hence, are supported as output format by the ENEXA transformator module." . + rdfs:comment "The class of RDF serializations that can be streamed and, hence, are easier to handle as output format by the ENEXA transformator module." . -iana-a:n-quads a ; +iana-a:n-quads a , ; rdfs:label "N-Quads" ; rdfs:comment "N-Quads is a line-based, plain text format for encoding an RDF dataset." ; rdfs:isDefinedBy . -iana-a:n-triples a ; +iana-a:n-triples a , ; rdfs:label "N-Triples" ; rdfs:comment "N-Triples is a line-based, plain text format for encoding an RDF graph." ; rdfs:isDefinedBy . -iana-a:trig a ; +iana-a:trig a , ; rdfs:label "TriG" ; rdfs:comment "TriG is a concrete syntax for RDF as defined in the RDF Concepts and Abstract Syntax document, and an extension of Turtle to support representing a complete RDF Dataset." ; rdfs:isDefinedBy . -iana-t:turtle a ; +iana-t:turtle a , ; rdfs:label "Turtle" ; rdfs:comment "The terse RDF Triple Language (Turtle) is a concrete syntax for RDF as defined in the RDF Concepts and Abstract Syntax W3C Recommendation." ; rdfs:isDefinedBy . +iana-a:owl+xml a , ; + rdfs:label "OWL/XML" ; + rdfs:comment "The XML serialization for OWL 2 Web Ontology Language that mirrors its structural specification." ; + rdfs:isDefinedBy . + +iana-t:owl-manchester a , ; + rdfs:label "Manchester Syntax" ; + rdfs:comment "The Manchester syntax is a user-friendly compact syntax for OWL 2 ontologies." ; + rdfs:isDefinedBy . + diff --git a/pom.xml b/pom.xml index 125d1df..f50b534 100644 --- a/pom.xml +++ b/pom.xml @@ -32,6 +32,12 @@ + + + net.sourceforge.owlapi + owlapi-osgidistribution + 4.5.25 + org.dice-research diff --git a/src/main/java/org/dice_research/enexa/transform/EnexaTransformator.java b/src/main/java/org/dice_research/enexa/transform/EnexaTransformator.java index a260db8..ec97a8e 100644 --- a/src/main/java/org/dice_research/enexa/transform/EnexaTransformator.java +++ b/src/main/java/org/dice_research/enexa/transform/EnexaTransformator.java @@ -32,6 +32,7 @@ import org.dice_research.enexa.vocab.IANAMediaType; import org.dice_research.rdf.RdfHelper; import org.dice_research.sparql.SparqlQueryUtils; +import org.semanticweb.owlapi.formats.OWLXMLDocumentFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -55,6 +56,9 @@ public static void main(String[] args) { String sharedDir = getEnvVariable("ENEXA_SHARED_DIRECTORY"); String outputDir = getEnvVariable("ENEXA_MODULE_INSTANCE_DIRECTORY"); String enexaServiceUrl = getEnvVariable("ENEXA_SERVICE_URL"); + if (!enexaServiceUrl.endsWith("/")) { + enexaServiceUrl += "/"; + } Resource moduleInsResource = ResourceFactory.createResource(moduleInstance); Resource experimentResource = ResourceFactory.createResource(experimentIri); @@ -70,17 +74,20 @@ public static void main(String[] args) { } Resource targetMediaResource = RdfHelper.getObjectResource(parameterModel, moduleInsResource, TransformVocab.outputMediaType); - if (sourceFiles.size() == 0) { + if (targetMediaResource == null) { LOGGER.error("The output media type has not been defined. Aborting."); return; } - Lang outputLang = getOutputLang(targetMediaResource); - // 2. create transformer + if (!targetMediaResource.isURIResource()) { + LOGGER.error("The output media type is not an IRI. Aborting."); + return; + } + // 3. create transformer if (!sharedDir.endsWith(File.separator)) { sharedDir += File.separator; } File outputFile = null; - try (StreamingTransformator transformator = StreamingTransformator.builder().setOutputFormat(outputLang) + try (Transformator transformator = new TransformatorBuilder().setOutputFormat(targetMediaResource.getURI()) // .setCompression(compression) // .setOutputFileName(outputFile.getName()) .setOutputDirectory(new File(outputDir)).build();) { @@ -114,7 +121,7 @@ public static void main(String[] args) { // module instance metadata.add(moduleInsResource, TransformVocab.output, fileResource); - if (sendRequest(enexaServiceUrl, metadata) != null) { + if (sendRequest(enexaServiceUrl + "add-resource", metadata) != null) { LOGGER.info("This module seems to have been successful."); } } @@ -171,7 +178,7 @@ private static String getEnvVariable(String key) { * @throws IOException in case of an IO error while reading the file */ protected static void addFile(Resource sourceFile, Model parameterModel, String sharedDir, - StreamingTransformator transformator) throws IOException { + Transformator transformator) throws IOException { String enexaPath = RdfHelper.getStringValue(parameterModel, sourceFile, ENEXA.location); Resource mediaTypeResource = RdfHelper.getObjectResource(parameterModel, sourceFile, DCAT.mediaType); transformator.addFile2Stream(new File(EnexaPathUtils.translateEnexa2LocalPath(enexaPath, sharedDir)), diff --git a/src/main/java/org/dice_research/enexa/transform/RDF2OntologyTransformator.java b/src/main/java/org/dice_research/enexa/transform/RDF2OntologyTransformator.java new file mode 100644 index 0000000..21a268a --- /dev/null +++ b/src/main/java/org/dice_research/enexa/transform/RDF2OntologyTransformator.java @@ -0,0 +1,106 @@ +package org.dice_research.enexa.transform; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.formats.ManchesterSyntaxDocumentFormat; +import org.semanticweb.owlapi.formats.OWLXMLDocumentFormat; +import org.semanticweb.owlapi.io.StreamDocumentSource; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLDocumentFormat; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyCreationException; +import org.semanticweb.owlapi.model.OWLOntologyManager; +import org.semanticweb.owlapi.model.OWLOntologyStorageException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class RDF2OntologyTransformator implements Transformator { + + private static final Logger LOGGER = LoggerFactory.getLogger(RDF2OntologyTransformator.class); + + protected OWLOntology ontology; + protected OWLOntologyManager manager; + protected OWLDocumentFormat outputFormat; + protected File outputFile; + protected OutputStream outputStream; + + protected RDF2OntologyTransformator(OWLOntology ontology, OWLOntologyManager manager, + OWLDocumentFormat outputFormat, File outputFile, OutputStream outputStream) { + super(); + this.ontology = ontology; + this.manager = manager; + this.outputFormat = outputFormat; + this.outputFile = outputFile; + this.outputStream = outputStream; + } + + public static OWLDocumentFormat getFormatForMediaType(String mediaType) { + switch (mediaType) { + case "application/owl+xml": + return new OWLXMLDocumentFormat(); + case "text/owl-manchester": + return new ManchesterSyntaxDocumentFormat(); + default: + return null; + } + } + + @Override + public void close() throws Exception { + try { + if (ontology != null) { + ontology.saveOntology(new OWLXMLDocumentFormat(), outputStream); + } + } catch (OWLOntologyStorageException e) { + throw new IOException("Error while writing the ontology.", e); + } finally { + IOUtils.closeQuietly(outputStream); + } + } + + @Override + public void addFile2Stream(File file, String contentType) throws IOException { + try { + String fileName = file.getName(); + try (InputStream in = new BufferedInputStream(new FileInputStream(file))) { + InputStream ins = in; + if (fileName.endsWith(".gz")) { + ins = new GzipCompressorInputStream(ins); + } else if (fileName.endsWith(".bz2")) { + ins = new BZip2CompressorInputStream(ins); + } + + LOGGER.info("Adding file {} ...", file.toString()); + OWLOntology readOnt = manager.loadOntologyFromOntologyDocument(new StreamDocumentSource(ins)); + manager.addAxioms(ontology, readOnt.getAxioms()); + } + } catch (OWLOntologyCreationException e) { + throw new IOException( + "Error while reading the ontology \"" + (file == null ? "null" : file.toString()) + "\".", e); + } + } + + public static RDF2OntologyTransformator create(OWLDocumentFormat outputFormat, File outputFile, + OutputStream outputStream) throws OWLOntologyCreationException { + OWLOntologyManager manager = OWLManager.createOWLOntologyManager(); + OWLOntology ontology = manager.createOntology(IRI.create(outputFile)); + return new RDF2OntologyTransformator(ontology, manager, outputFormat, outputFile, outputStream); + } + + public String getOutputFileName() { + return outputFile.getName(); + } + + public File getOutputFile() { + return outputFile; + } +} diff --git a/src/main/java/org/dice_research/enexa/transform/StreamingTransformator.java b/src/main/java/org/dice_research/enexa/transform/StreamingTransformator.java index b379568..6706131 100644 --- a/src/main/java/org/dice_research/enexa/transform/StreamingTransformator.java +++ b/src/main/java/org/dice_research/enexa/transform/StreamingTransformator.java @@ -54,7 +54,7 @@ * @author Michael Röder (michael.roeder@uni-paderborn.de) * */ -public class StreamingTransformator implements AutoCloseable { +public class StreamingTransformator implements Transformator { private static final Logger LOGGER = LoggerFactory.getLogger(StreamingTransformator.class); @@ -185,184 +185,4 @@ public void close() throws Exception { out.close(); } - /** - * Creates a new {@link Builder} instance to build a new - * {@link StreamingTransformator}. - * - * @return a new {@link Builder} instance to build a new - * {@link StreamingTransformator}. - */ - public static Builder builder() { - return new Builder(); - } - - /** - * The Builder of the {@link StreamingTransformator} class. It can be extended - * to allow more complex transformations. - * - * @author Michael Röder (michael.roeder@uni-paderborn.de) - * - */ - public static class Builder { - - protected String outputFileName = null; - protected File outputDirectory = null; - protected Compression compression = Compression.NONE; - protected Lang outputFormat = Lang.NTRIPLES; - - public StreamingTransformator build() throws IOException { - if (outputFormat == null) { - LOGGER.error("The given output format is null."); - throw new IllegalArgumentException("The given output format is null."); - } - - File outputFile = createOutputFile(); - - // Create parent directoy if it doesn't exist - if (outputDirectory != null && !outputDirectory.exists() && !outputDirectory.mkdirs()) { - String msg = "Couldn't create the non-existing parent directory " + outputDirectory.toString() - + " for the output file. "; - LOGGER.error(msg); - throw new IOException(msg); - } - - OutputStream fout = null; - try { - fout = createOutputStream(outputFile); - return new StreamingTransformator(createRDFStream(fout), fout, outputFile); - } catch (Throwable e) { - // If the stream is open, try to close it - IOUtils.closeQuietly(fout); - throw e; - } - } - - protected File createOutputFile() { - // Create File object for the output file - StringBuilder outputFileBuilder = new StringBuilder(); - // Add parent path if it exists - if (outputDirectory != null) { - outputFileBuilder.append(outputDirectory.getAbsolutePath()); - outputFileBuilder.append(File.separator); - } - // Add file name (or choose a random name) - outputFileBuilder.append( - (outputFileName != null) ? outputFileName : Integer.toString(Math.abs((new Random()).nextInt()))); - // Add file extension of the output language - if (outputFormat.getFileExtensions().size() > 0) { - outputFileBuilder.append('.'); - outputFileBuilder.append(outputFormat.getFileExtensions().get(0)); - } - // Add file extension if we use compression - switch (compression) { - case BZIP2: { - outputFileBuilder.append(".bz2"); - break; - } - case GZIP: { - outputFileBuilder.append(".gz"); - break; - } - case NONE: // falls through - default: - break; - } - return new File(outputFileBuilder.toString()); - } - - protected OutputStream createOutputStream(File outputFile) throws IOException { - OutputStream fout = null; - try { - fout = new BufferedOutputStream(new FileOutputStream(outputFile)); - // Add compression if needed - switch (compression) { - case BZIP2: { - fout = new BZip2CompressorOutputStream(fout); - break; - } - case GZIP: { - fout = new GzipCompressorOutputStream(fout); - break; - } - case NONE: // falls through - default: - break; - } - - return fout; - } catch (Throwable e) { - // If the stream is open, try to close it - IOUtils.closeQuietly(fout); - throw e; - } - } - - protected StreamRDF createRDFStream(OutputStream fout) { - // Create RDF stream - StreamRDF outStream = StreamRDFWriter.getWriterStream(fout, outputFormat); - outStream.start(); - return outStream; - } - - /** - * @return the outputFormat - */ - public Lang getOutputFormat() { - return outputFormat; - } - - /** - * @param outputFormat the outputFormat to set - */ - public Builder setOutputFormat(Lang outputFormat) { - this.outputFormat = outputFormat; - return this; - } - - /** - * @return the compression - */ - public Compression getCompression() { - return compression; - } - - /** - * @param compression the compression to set - */ - public Builder setCompression(Compression compression) { - this.compression = compression; - return this; - } - - /** - * @return the outputFileName - */ - public String getOutputFileName() { - return outputFileName; - } - - /** - * @param outputFileName the outputFileName to set - */ - public Builder setOutputFileName(String outputFileName) { - this.outputFileName = outputFileName; - return this; - } - - /** - * @return the outputDirectory - */ - public File getOutputDirectory() { - return outputDirectory; - } - - /** - * @param outputDirectory the outputDirectory to set - */ - public Builder setOutputDirectory(File outputDirectory) { - this.outputDirectory = outputDirectory; - return this; - } - } - } diff --git a/src/main/java/org/dice_research/enexa/transform/Transformator.java b/src/main/java/org/dice_research/enexa/transform/Transformator.java new file mode 100644 index 0000000..751b2f2 --- /dev/null +++ b/src/main/java/org/dice_research/enexa/transform/Transformator.java @@ -0,0 +1,84 @@ +package org.dice_research.enexa.transform; + +import java.io.File; +import java.io.IOException; + +import javax.annotation.CheckForNull; + +public interface Transformator extends AutoCloseable { + + /** + * Add the given files to the target file. If the array contains directories, + * all files and sub directories are added. Note that all files must be + * RDF files. + * + *

+ * Note: it is suggested to use {@link #addFile2Stream(File, String)} with an + * explicit content type for each file. + *

+ * + * @param files an array of RDF files that should be added to the output file + * @throws IOException if an IO error occurs either during reading the given + * files or writing the read data to the output file. + */ + default void addFiles2Stream(File[] files) throws IOException { + for (File file : files) { + addFile2Stream(file); + } + } + + /** + * Add the given file to the target file. If the given file is a directory, all + * files and sub directories are added. Note that all files must be RDF + * files. + * + *

+ * Note: it is suggested to use {@link #addFile2Stream(File, String)} with an + * explicit content type for each file. + *

+ * + * @param file an RDF file or a directory with RDF files that should be added to + * the output file + * @throws IOException if an IO error occurs either during reading the given + * file(s) or writing the read data to the output file. + */ + default void addFile2Stream(File file) throws IOException { + if (file.isDirectory()) { + addFiles2Stream(file.listFiles()); + } else { + addFile2Stream(file, null); + } + } + + /** + * Add the given file to the target file. The given content type is used to + * guide the parsing of the file. If the given file is a directory, all files + * and sub directories are added. Note that all files must be RDF files. + * + * @param file an RDF file or a directory with RDF files that should be + * added to the output file + * @param contentType the content type String expressing the RDF serialization + * of the given file. The String is ignored if it is + * {@code null}, which means that the identification of the + * serialization is solely based on the file name. + * @throws IOException if an IO error occurs either during reading the given + * file(s) or writing the read data to the output file. + */ + void addFile2Stream(File file, @CheckForNull String contentType) throws IOException; + + /** + * @return the outputFile + */ + File getOutputFile(); + + /** + * Creates a new {@link TransformatorBuilder} instance to build a new + * {@link Transformator}. + * + * @return a new {@link TransformatorBuilder} instance to build a new + * {@link Transformator}. + */ + public static TransformatorBuilder builder() { + return new TransformatorBuilder(); + } +} diff --git a/src/main/java/org/dice_research/enexa/transform/TransformatorBuilder.java b/src/main/java/org/dice_research/enexa/transform/TransformatorBuilder.java new file mode 100644 index 0000000..525ebf9 --- /dev/null +++ b/src/main/java/org/dice_research/enexa/transform/TransformatorBuilder.java @@ -0,0 +1,216 @@ +package org.dice_research.enexa.transform; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.List; +import java.util.Random; + +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; +import org.apache.commons.io.IOUtils; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.system.StreamRDF; +import org.apache.jena.riot.system.StreamRDFWriter; +import org.dice_research.enexa.vocab.IANAMediaType; +import org.semanticweb.owlapi.model.OWLDocumentFormat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TransformatorBuilder { + + private static final Logger LOGGER = LoggerFactory.getLogger(TransformatorBuilder.class); + + protected String outputFileName = null; + protected File outputDirectory = null; + protected Compression compression = Compression.NONE; + protected String outputFormatIri = null; + protected Lang outputFormatJena = null; + protected OWLDocumentFormat outputFormatOwl = null; + + public Transformator build() throws IOException { + if (outputFormatIri == null) { + LOGGER.error("The given output format is null."); + throw new IllegalArgumentException("The given output format is null."); + } + + // Try to understand the output format + outputFormatJena = IANAMediaType.iri2Lang(outputFormatIri); + if (outputFormatJena == null) { + outputFormatOwl = RDF2OntologyTransformator + .getFormatForMediaType(IANAMediaType.iri2ContentType(outputFormatIri)); + } + if (outputFormatJena == null && outputFormatOwl == null) { + String msg = "The given output format \"" + outputFormatIri + "\" is unknown."; + LOGGER.error(msg); + throw new IllegalArgumentException(msg); + } + + File outputFile = createOutputFile(); + + // Create parent directoy if it doesn't exist + if (outputDirectory != null && !outputDirectory.exists() && !outputDirectory.mkdirs()) { + String msg = "Couldn't create the non-existing parent directory " + outputDirectory.toString() + + " for the output file. "; + LOGGER.error(msg); + throw new IOException(msg); + } + + OutputStream fout = null; + try { + fout = createOutputStream(outputFile); + if (outputFormatJena != null) { + return new StreamingTransformator(createRDFStream(fout), fout, outputFile); + } else { + return RDF2OntologyTransformator.create(outputFormatOwl, outputFile, fout); + } + } catch (Throwable e) { + // If the stream is open, try to close it + IOUtils.closeQuietly(fout); + throw new IOException("Error while building transformator.", e); + } + } + + protected File createOutputFile() { + // Create File object for the output file + StringBuilder outputFileBuilder = new StringBuilder(); + // Add parent path if it exists + if (outputDirectory != null) { + outputFileBuilder.append(outputDirectory.getAbsolutePath()); + outputFileBuilder.append(File.separator); + } + // Add file name (or choose a random name) + outputFileBuilder.append( + (outputFileName != null) ? outputFileName : Integer.toString(Math.abs((new Random()).nextInt()))); + // Add file extension of the output language + String fileExtension = getFileExtension(); + if (fileExtension != null && !fileExtension.isEmpty()) { + outputFileBuilder.append('.'); + outputFileBuilder.append(fileExtension); + } + // Add file extension if we use compression + switch (compression) { + case BZIP2: { + outputFileBuilder.append(".bz2"); + break; + } + case GZIP: { + outputFileBuilder.append(".gz"); + break; + } + case NONE: // falls through + default: + break; + } + return new File(outputFileBuilder.toString()); + } + + protected String getFileExtension() { + if (outputFormatJena != null) { + List extensions = outputFormatJena.getFileExtensions(); + return extensions.size() > 0 ? extensions.get(0) : null; + } else if (outputFormatIri.endsWith("application/owl+xml")) { + return "owl"; + } else if (outputFormatIri.endsWith("text/owl-manchester")) { + return "omn"; + } else { + throw new IllegalStateException("There is no file extension defined for the format " + outputFormatIri); + } + } + + protected OutputStream createOutputStream(File outputFile) throws IOException { + OutputStream fout = null; + try { + fout = new BufferedOutputStream(new FileOutputStream(outputFile)); + // Add compression if needed + switch (compression) { + case BZIP2: { + fout = new BZip2CompressorOutputStream(fout); + break; + } + case GZIP: { + fout = new GzipCompressorOutputStream(fout); + break; + } + case NONE: // falls through + default: + break; + } + + return fout; + } catch (Throwable e) { + // If the stream is open, try to close it + IOUtils.closeQuietly(fout); + throw e; + } + } + + protected StreamRDF createRDFStream(OutputStream fout) { + // Create RDF stream + StreamRDF outStream = StreamRDFWriter.getWriterStream(fout, outputFormatJena); + outStream.start(); + return outStream; + } + + /** + * @return the outputFormat + */ + public String getOutputFormat() { + return outputFormatIri; + } + + /** + * @param outputFormat the outputFormat to set + */ + public TransformatorBuilder setOutputFormat(String outputFormatIri) { + this.outputFormatIri = outputFormatIri; + return this; + } + + /** + * @return the compression + */ + public Compression getCompression() { + return compression; + } + + /** + * @param compression the compression to set + */ + public TransformatorBuilder setCompression(Compression compression) { + this.compression = compression; + return this; + } + + /** + * @return the outputFileName + */ + public String getOutputFileName() { + return outputFileName; + } + + /** + * @param outputFileName the outputFileName to set + */ + public TransformatorBuilder setOutputFileName(String outputFileName) { + this.outputFileName = outputFileName; + return this; + } + + /** + * @return the outputDirectory + */ + public File getOutputDirectory() { + return outputDirectory; + } + + /** + * @param outputDirectory the outputDirectory to set + */ + public TransformatorBuilder setOutputDirectory(File outputDirectory) { + this.outputDirectory = outputDirectory; + return this; + } +} diff --git a/src/test/java/org/dice_research/enexa/transform/AbstractTransformatorTest.java b/src/test/java/org/dice_research/enexa/transform/AbstractTransformatorTest.java new file mode 100644 index 0000000..f0f08e1 --- /dev/null +++ b/src/test/java/org/dice_research/enexa/transform/AbstractTransformatorTest.java @@ -0,0 +1,196 @@ +package org.dice_research.enexa.transform; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Random; + +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; +import org.apache.commons.io.IOUtils; +import org.apache.jena.rdf.model.Model; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.RDFDataMgr; +import org.dice_research.enexa.vocab.IANAMediaType; +import org.junit.Assert; +import org.junit.Test; + +/** + * An abstract test for {@link Transformator} classes that simply generates + * several files with varying serializations and concatenates them in different + * setups (with/without input compression, output compression and explicit file + * extensions). + * + * @author Michael Röder (michael.roeder@uni-paderborn.de) + * + */ +public abstract class AbstractTransformatorTest { + + protected Model expectedModel; + protected String outputFormatIri; + protected Model[] inputModels; + protected String[] inputFormatIris; + protected Lang[] inputLangs; + + public AbstractTransformatorTest(Model expectedModel, String outputFormatIri, Model[] inputModels, + String[] inputFormatIris) { + super(); + this.expectedModel = expectedModel; + this.outputFormatIri = outputFormatIri; + this.inputModels = inputModels; + this.inputFormatIris = inputFormatIris; + this.inputLangs = new Lang[inputFormatIris.length]; + // Print information about this test + System.out.println("Test output format: " + IANAMediaType.iri2ContentType(outputFormatIri.toString())); + System.out.print(" input formats: "); + for (int i = 0; i < inputFormatIris.length; ++i) { + inputLangs[i] = IANAMediaType.iri2Lang(inputFormatIris[i]); + Assert.assertNotNull("Input format cannot be represented as Lang instance.", inputLangs[i]); + if (i > 0) { + System.out.print(" "); + } + System.out.println(IANAMediaType.iri2ContentType(inputFormatIris[i])); + } + } + +// @Test +// public void testCompInFileExt() { +// try { +// createFilesAndTest(true, true); +// } catch (Exception e) { +// e.printStackTrace(); +// Assert.fail(); +// } +// } +// +// @Test +// public void testFileExt() { +// try { +// createFilesAndTest(true, false); +// } catch (Exception e) { +// e.printStackTrace(); +// Assert.fail(); +// } +// } +// +// @Test +// public void testCompIn() { +// try { +// createFilesAndTest(false, true); +// } catch (Exception e) { +// e.printStackTrace(); +// Assert.fail(); +// } +// } + + @Test + public void test() { + try { + createFilesAndTest(false, false); + } catch (Exception e) { + e.printStackTrace(); + Assert.fail(); + } + } + + public void createFilesAndTest(boolean fileExtensions, boolean compressedInput) throws Exception { + // Create temporary file with given content + Random random = new Random(); + File inputFiles[] = new File[inputModels.length]; + Compression[] compressions = new Compression[inputModels.length]; + StringBuilder endingBuilder = new StringBuilder(); + for (int i = 0; i < inputModels.length; ++i) { + endingBuilder.delete(0, endingBuilder.length()); + if (fileExtensions) { + endingBuilder.append('.'); + endingBuilder.append(inputLangs[i].getFileExtensions().get(0)); + } + if (compressedInput) { + if (random.nextBoolean()) { + endingBuilder.append(".bz2"); + compressions[i] = Compression.BZIP2; + } else { + endingBuilder.append(".gz"); + compressions[i] = Compression.GZIP; + } + } else { + compressions[i] = Compression.NONE; + } + inputFiles[i] = File.createTempFile("test-dataset-", endingBuilder.toString()); + } + // Write Input files + OutputStream out = null; + for (int i = 0; i < inputModels.length; ++i) { + try { + out = new BufferedOutputStream(new FileOutputStream(inputFiles[i])); + switch (compressions[i]) { + case BZIP2: { + out = new BZip2CompressorOutputStream(out); + break; + } + case GZIP: { + out = new GzipCompressorOutputStream(out); + break; + } + case NONE: // falls through + default: + break; + } + RDFDataMgr.write(out, inputModels[i], inputLangs[i]); + } finally { + IOUtils.closeQuietly(out); + } + } + + testTransformator(inputFiles, fileExtensions, Compression.NONE); + testTransformator(inputFiles, fileExtensions, Compression.BZIP2); + testTransformator(inputFiles, fileExtensions, Compression.GZIP); + } + + public void testTransformator(File[] inputFiles, boolean fileExtensions, Compression compression) throws Exception { + File outputFile = File.createTempFile("test-output-", ""); + try (Transformator transformator = new TransformatorBuilder().setOutputFormat(outputFormatIri) + .setCompression(compression).setOutputFileName(outputFile.getName()) + .setOutputDirectory(outputFile.getParentFile()).build();) { + if (fileExtensions) { + transformator.addFiles2Stream(inputFiles); + } else { + for (int i = 0; i < inputLangs.length; ++i) { + transformator.addFile2Stream(inputFiles[i], inputLangs[i].getContentType().getContentTypeStr()); + } + } + // update output file after writing + outputFile = transformator.getOutputFile(); + } + + InputStream is = null; + try { + is = new BufferedInputStream(new FileInputStream(outputFile)); + switch (compression) { + case BZIP2: { + is = new BZip2CompressorInputStream(is); + break; + } + case GZIP: { + is = new GzipCompressorInputStream(is); + break; + } + case NONE: // falls through + default: + break; + } + compareModels(is, outputFormatIri, expectedModel); + } finally { + IOUtils.closeQuietly(is); + } + } + + protected abstract void compareModels(InputStream is, String outputFormatIri, Model expectedModel); + +} diff --git a/src/test/java/org/dice_research/enexa/transform/RDF2OntologyTransformatorTest.java b/src/test/java/org/dice_research/enexa/transform/RDF2OntologyTransformatorTest.java new file mode 100644 index 0000000..67c1ce9 --- /dev/null +++ b/src/test/java/org/dice_research/enexa/transform/RDF2OntologyTransformatorTest.java @@ -0,0 +1,133 @@ +package org.dice_research.enexa.transform; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.Writer; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.ModelFactory; +import org.apache.jena.riot.Lang; +import org.apache.jena.vocabulary.RDF; +import org.dice_research.enexa.vocab.IANAMediaType; +import org.junit.Assert; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLAxiom; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyCreationException; +import org.semanticweb.owlapi.model.OWLOntologyManager; +import org.semanticweb.owlapi.model.OWLOntologyStorageException; + +/** + * A test for the {@link RDF2OntologyTransformator} class that simply generates + * 3 files with varying serializations and concatenates them in different setups + * (with/without input compression, output compression and explicit file + * extensions). + * + * @author Michael Röder (michael.roeder@uni-paderborn.de) + * + */ +@RunWith(Parameterized.class) +public class RDF2OntologyTransformatorTest extends AbstractTransformatorTest { + + public RDF2OntologyTransformatorTest(Model expectedModel, String outputFormatIri, Model[] inputModels, + String[] inputFormatIris) { + super(expectedModel, outputFormatIri, inputModels, inputFormatIris); + } + + @Override + protected void compareModels(InputStream is, String outputFormatIri, Model expectedModel) { +// String contentType = IANAMediaType.iri2ContentType(outputFormatIri); + try { + File expectedFile = File.createTempFile("test-expected-result-", ".nt"); + try (Writer writer = new FileWriter(expectedFile, StandardCharsets.UTF_8)) { + expectedModel.write(writer, "Turtle"); + } + + OWLOntologyManager manager = OWLManager.createOWLOntologyManager(); + OWLOntology expectedOntology = manager.loadOntologyFromOntologyDocument(expectedFile); + File expectedFile2 = File.createTempFile("test-expected-result-", ".nt"); + try (OutputStream out = new BufferedOutputStream(new FileOutputStream(expectedFile2))) { + manager.saveOntology(expectedOntology, out); + } + try (InputStream in = new BufferedInputStream(new FileInputStream(expectedFile2))) { + expectedOntology = manager.loadOntologyFromOntologyDocument(in); + } + + OWLOntology readOnt = manager.loadOntologyFromOntologyDocument(is); + + for (OWLAxiom a : expectedOntology.getAxioms()) { + System.out.println(a); + Assert.assertTrue("Read ontology does not contain the expected axiom " + a.toString(), + readOnt.containsAxiom(a)); + } + for (OWLAxiom a : readOnt.getAxioms()) { + System.out.println(a); + Assert.assertTrue("Read ontology contains the additional axiom " + a.toString(), + expectedOntology.containsAxiom(a)); + } + } catch (IOException | OWLOntologyStorageException | OWLOntologyCreationException e) { + e.printStackTrace(); + Assert.fail(e.getMessage()); + } + } + + @Parameters + public static Collection data() { + Model modelA = ModelFactory.createDefaultModel(); + modelA.add(modelA.createResource("http://example.org/mA/e1"), RDF.type, + modelA.createResource("http://example.org/mA/c1")); + modelA.add(modelA.createResource("http://example.org/mA/e2"), RDF.type, + modelA.createResource("http://example.org/mA/c1")); + modelA.add(modelA.createResource("http://example.org/mA/e3"), RDF.type, + modelA.createResource("http://example.org/mA/c2")); + + Model modelB = ModelFactory.createDefaultModel(); + modelB.add(modelB.createResource("http://example.org/mA/e1"), modelB.createProperty("http://example.org/mB/p1"), + modelB.createResource("http://example.org/mA/e2")); + modelB.add(modelB.createResource("http://example.org/mA/e2"), modelB.createProperty("http://example.org/mB/p1"), + modelB.createResource("http://example.org/mA/e3")); + modelB.add(modelB.createResource("http://example.org/mA/e3"), modelB.createProperty("http://example.org/mB/p1"), + modelB.createResource("http://example.org/mA/e4")); + + Model modelC = ModelFactory.createDefaultModel(); + modelC.add(modelC.createResource("http://example.org/mC/e1"), modelC.createProperty("http://example.org/mC/p1"), + modelC.createResource("http://example.org/mC/e2")); + modelC.add(modelC.createResource("http://example.org/mC/e2"), modelC.createProperty("http://example.org/mC/p1"), + modelC.createResource("http://example.org/mC/e3")); + modelC.add(modelC.createResource("http://example.org/mC/e3"), modelC.createProperty("http://example.org/mC/p1"), + modelC.createResource("http://example.org/mC/e4")); + + Model[] inputModels = new Model[] { modelA, modelB, modelC }; + + Model expectedModel = ModelFactory.createDefaultModel(); + for (int i = 0; i < inputModels.length; ++i) { + expectedModel.add(inputModels[i]); + } + + String[] outputLanguages = new String[] { IANAMediaType.contentType2Iri("application/owl+xml"), + IANAMediaType.contentType2Iri("text/owl-manchester") }; + String[] inputSet1 = new String[] { IANAMediaType.lang2Iri(Lang.NTRIPLES), IANAMediaType.lang2Iri(Lang.TTL), + IANAMediaType.lang2Iri(Lang.RDFXML) }; + + List testConfigs = new ArrayList(); + for (int i = 0; i < outputLanguages.length; ++i) { + testConfigs.add(new Object[] { expectedModel, outputLanguages[i], inputModels, inputSet1 }); + } + return testConfigs; + } +} diff --git a/src/test/java/org/dice_research/enexa/transform/StreamingTransformatorTest.java b/src/test/java/org/dice_research/enexa/transform/StreamingTransformatorTest.java index 17d2c3b..fa68b02 100644 --- a/src/test/java/org/dice_research/enexa/transform/StreamingTransformatorTest.java +++ b/src/test/java/org/dice_research/enexa/transform/StreamingTransformatorTest.java @@ -1,31 +1,17 @@ package org.dice_research.enexa.transform; -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.InputStream; -import java.io.OutputStream; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.List; -import java.util.Random; -import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; -import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; -import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; -import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; -import org.apache.commons.io.IOUtils; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.ModelFactory; import org.apache.jena.riot.Lang; import org.apache.jena.riot.RDFDataMgr; import org.apache.jena.vocabulary.RDF; +import org.dice_research.enexa.vocab.IANAMediaType; import org.dice_research.rdf.test.ModelComparisonHelper; -import org.junit.Assert; -import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameters; @@ -40,154 +26,17 @@ * */ @RunWith(Parameterized.class) -public class StreamingTransformatorTest { +public class StreamingTransformatorTest extends AbstractTransformatorTest { - private Model expectedModel; - private Lang outputLang; - private Model[] inputModels; - private Lang[] inputLangs; - - public StreamingTransformatorTest(Model expectedModel, Lang outputLang, Model[] inputModels, Lang[] inputLangs) { - super(); - this.expectedModel = expectedModel; - this.outputLang = outputLang; - this.inputModels = inputModels; - this.inputLangs = inputLangs; - System.out.println("Testing with " + outputLang.toString() - + " as output language and the following input languages: " + Arrays.toString(inputLangs)); - } - - @Test - public void testCompInFileExt() { - try { - createFilesAndTest(true, true); - } catch (Exception e) { - e.printStackTrace(); - Assert.fail(); - } - } - - @Test - public void testFileExt() { - try { - createFilesAndTest(true, false); - } catch (Exception e) { - e.printStackTrace(); - Assert.fail(); - } - } - - @Test - public void testCompIn() { - try { - createFilesAndTest(false, true); - } catch (Exception e) { - e.printStackTrace(); - Assert.fail(); - } + public StreamingTransformatorTest(Model expectedModel, String outputFormatIri, Model[] inputModels, + String[] inputFormatIris) { + super(expectedModel, outputFormatIri, inputModels, inputFormatIris); } - @Test - public void test() { - try { - createFilesAndTest(false, false); - } catch (Exception e) { - e.printStackTrace(); - Assert.fail(); - } - } - - public void createFilesAndTest(boolean fileExtensions, boolean compressedInput) throws Exception { - // Create temporary file with given content - Random random = new Random(); - File inputFiles[] = new File[inputModels.length]; - Compression[] compressions = new Compression[inputModels.length]; - StringBuilder endingBuilder = new StringBuilder(); - for (int i = 0; i < inputModels.length; ++i) { - endingBuilder.delete(0, endingBuilder.length()); - if (fileExtensions) { - endingBuilder.append('.'); - endingBuilder.append(inputLangs[i].getFileExtensions().get(0)); - } - if (compressedInput) { - if (random.nextBoolean()) { - endingBuilder.append(".bz2"); - compressions[i] = Compression.BZIP2; - } else { - endingBuilder.append(".gz"); - compressions[i] = Compression.GZIP; - } - } else { - compressions[i] = Compression.NONE; - } - inputFiles[i] = File.createTempFile("test-dataset-", endingBuilder.toString()); - } - // Write Input files - OutputStream out = null; - for (int i = 0; i < inputModels.length; ++i) { - try { - out = new BufferedOutputStream(new FileOutputStream(inputFiles[i])); - switch (compressions[i]) { - case BZIP2: { - out = new BZip2CompressorOutputStream(out); - break; - } - case GZIP: { - out = new GzipCompressorOutputStream(out); - break; - } - case NONE: // falls through - default: - break; - } - RDFDataMgr.write(out, inputModels[i], inputLangs[i]); - } finally { - IOUtils.closeQuietly(out); - } - } - - testTransformator(inputFiles, fileExtensions, Compression.NONE); - testTransformator(inputFiles, fileExtensions, Compression.BZIP2); - testTransformator(inputFiles, fileExtensions, Compression.GZIP); - } - - public void testTransformator(File[] inputFiles, boolean fileExtensions, Compression compression) throws Exception { - File outputFile = File.createTempFile("test-output-", ""); - try (StreamingTransformator transformator = StreamingTransformator.builder().setOutputFormat(outputLang) - .setCompression(compression).setOutputFileName(outputFile.getName()) - .setOutputDirectory(outputFile.getParentFile()).build();) { - if (fileExtensions) { - transformator.addFiles2Stream(inputFiles); - } else { - for (int i = 0; i < inputLangs.length; ++i) { - transformator.addFile2Stream(inputFiles[i], inputLangs[i].getContentType().getContentTypeStr()); - } - } - // update output file after writing - outputFile = transformator.getOutputFile(); - } - + @Override + protected void compareModels(InputStream is, String outputFormatIri, Model expectedModel) { Model readModel = ModelFactory.createDefaultModel(); - InputStream is = null; - try { - is = new BufferedInputStream(new FileInputStream(outputFile)); - switch (compression) { - case BZIP2: { - is = new BZip2CompressorInputStream(is); - break; - } - case GZIP: { - is = new GzipCompressorInputStream(is); - break; - } - case NONE: // falls through - default: - break; - } - RDFDataMgr.read(readModel, is, outputLang); - } finally { - IOUtils.closeQuietly(is); - } + RDFDataMgr.read(readModel, is, IANAMediaType.iri2Lang(outputFormatIri)); ModelComparisonHelper.assertModelsEqual(expectedModel, readModel); } @@ -224,10 +73,14 @@ public static Collection data() { expectedModel.add(inputModels[i]); } - Lang[] outputLanguages = new Lang[] { Lang.NTRIPLES, Lang.TTL, Lang.TRIG, Lang.NQUADS, Lang.RDFPROTO, - Lang.RDFTHRIFT, Lang.TRIX }; - Lang[] inputSet1 = new Lang[] { Lang.NTRIPLES, Lang.TTL, Lang.JSONLD }; - Lang[] inputSet2 = new Lang[] { Lang.RDFXML, Lang.RDFJSON, Lang.N3 }; + String[] outputLanguages = new String[] { IANAMediaType.lang2Iri(Lang.NTRIPLES), + IANAMediaType.lang2Iri(Lang.TTL), IANAMediaType.lang2Iri(Lang.TRIG), + IANAMediaType.lang2Iri(Lang.NQUADS), IANAMediaType.lang2Iri(Lang.RDFPROTO), + IANAMediaType.lang2Iri(Lang.RDFTHRIFT), IANAMediaType.lang2Iri(Lang.TRIX) }; + String[] inputSet1 = new String[] { IANAMediaType.lang2Iri(Lang.NTRIPLES), IANAMediaType.lang2Iri(Lang.TTL), + IANAMediaType.lang2Iri(Lang.JSONLD) }; + String[] inputSet2 = new String[] { IANAMediaType.lang2Iri(Lang.RDFXML), IANAMediaType.lang2Iri(Lang.RDFJSON), + IANAMediaType.lang2Iri(Lang.N3) }; List testConfigs = new ArrayList(); for (int i = 0; i < outputLanguages.length; ++i) {