Skip to content

Commit

Permalink
Merge branch 'development'
Browse files Browse the repository at this point in the history
  • Loading branch information
bjdmeest committed Nov 14, 2018
2 parents 5fcf3f4 + 8fbc27d commit 62e56a2
Show file tree
Hide file tree
Showing 16 changed files with 352 additions and 101 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

## Unreleased

## [4.2.0] - 2018-11-14

### Added

- output format: hdt

### Fixed

- local build on Windows 7 works
- object with template with an array as input ? return multiple objects

## [4.1.0] - 2018-10-15

### Added
Expand Down Expand Up @@ -97,6 +108,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- support for accessing remote files (via HTTP GET)
- basic support for functions

[4.2.0]: https://github.com/RMLio/rmlmapper-java/compare/v4.1.0...v4.2.0
[4.1.0]: https://github.com/RMLio/rmlmapper-java/compare/v4.0.0...v4.1.0
[4.0.0]: https://github.com/RMLio/rmlmapper-java/compare/v0.2.1...v4.0.0
[0.2.1]: https://github.com/RMLio/rmlmapper-java/compare/v0.2.0...v0.2.1
Expand Down
106 changes: 77 additions & 29 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,30 @@ A standalone jar can be found in `/target`.
## Usage

### CLI
The following options are available.
The following options are most common.

- `-m, --mapping <arg>`: path to mapping document
- `-o, --output <arg>`: path to output file
- `-t, --triplesmaps <arg>`: triplesmaps to be executed in order, split by `,` (default: all)
- `-c, --configfile <arg>`: path to config file
- `-d, --duplicates`: remove duplicates in the output
- `-f, --functionfile <arg>`: path to functions.ttl file (dynamic functions are found relative to functions.ttl)
- `-v, --verbose`: show more details
- `-h, --help`: show help
- `-s,--serialization <arg>`: serialization format (nquads (default), trig, trix, jsonld, hdt)

All options can be found when executing `java -jar rmlmapper.jar --help`,
that output is found below.

```
usage: java -jar mapper.jar <options>
options:
-c,--configfile <arg> path to configuration file
-d,--duplicates remove duplicates in the output
-e,--metadatafile <arg> path to metadata-test-cases file
-f,--functionfile <arg> path to functions.ttl file (dynamic functions are found relative to functions.ttl)
-h,--help show help info
-l,--metadataDetailLevel <arg> generate metadata-test-cases on given detail level (dataset - triple - term)
-m,--mappingfile <arg> path to mapping document
-o,--outputfile <arg> path to output file (default: stdout)
-s,--serialization <arg> serialization format (nquads (default), trig, trix, jsonld, hdt)
-t,--triplesmaps <arg> IRIs of the triplesmaps that should be executed in order, split by ',' (default is all triplesmaps)
-v,--verbose show more details in debugging output
```

### Library

Expand All @@ -65,21 +79,27 @@ import be.ugent.rml.records.RecordsFactory;
import be.ugent.rml.store.RDF4JStore;
import be.ugent.rml.store.QuadStore;

public class Main {
import java.io.FileInputStream;
import java.io.InputStream;


class Main {

public static void main(String[] args) {

boolean removeDuplicates = false; //set to true if you want to remove duplicates triples/quads from the output
String cwd = "/home/rml"; //path to default directory for local files
String mappingFile = "/home/rml/mapping.rml.ttl"; //path to the mapping file that needs to be executed
List<String> triplesMaps = new ArrayList<>(); //list of triplesmaps to execute. When this list is empty all triplesmaps in the mapping file are executed

InputStream mappingStream = new FileInputStream(mappingFile);
Model model = Rio.parse(mappingStream, "", RDFFormat.TURTLE);
RDF4JStore rmlStore = new RDF4JStore(model);

Executor executor = new Executor(rmlStore, new RecordsFactory(new DataFetcher(cwd, rmlStore)));
QuadStore result = executor.execute(triplesMaps, removeDuplicates);
try {
InputStream mappingStream = new FileInputStream(mappingFile);
Model model = Rio.parse(mappingStream, "", RDFFormat.TURTLE);
RDF4JStore rmlStore = new RDF4JStore(model);

Executor executor = new Executor(rmlStore, new RecordsFactory(new DataFetcher(cwd, rmlStore)));
QuadStore result = executor.execute(null);
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
}
```
Expand Down Expand Up @@ -123,24 +143,51 @@ You can change the functions.ttl path using a commandline-option (`-f`).
This overrides the dynamic loading.
See the snippet below for an example of how to do it.

```
```java
package be.ugent.rml;

import be.ugent.rml.functions.FunctionLoader;
import be.ugent.rml.functions.lib.GrelProcessor;
import be.ugent.rml.records.RecordsFactory;
import be.ugent.rml.store.QuadStore;
import com.google.common.io.Resources;

import java.io.File;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;


class Main {

String mapPath = "path/to/mapping/file";
String outPath = "path/to/where/the/output/triples/should/be/written";
Map<String, Class> libraryMap = new HashMap<>();
libraryMap.put("GrelFunctions.jar", GrelProcessor.class);
FunctionLoader functionLoader = new FunctionLoader(libraryMap);
try {
Executor executor = this.createExecutor(mapPath, functionLoader);
doMapping(executor, outPath);
} catch (IOException e) {
logger.error(e.getMessage(), e);
public static void main(String[] args) {
String mapPath = "path/to/mapping/file";
String functionPath = "path/to/functions.ttl/file";

URL url = Resources.getResource(functionPath);

Map<String, Class> libraryMap = new HashMap<>();
libraryMap.put("GrelFunctions.jar", GrelProcessor.class);
try {
File functionsFile = new File(url.toURI());
FunctionLoader functionLoader = new FunctionLoader(functionsFile, null, libraryMap);
ClassLoader classLoader = Main.class.getClassLoader();
// execute mapping file
File mappingFile = new File(classLoader.getResource(mapPath).getFile());
QuadStore rmlStore = Utils.readTurtle(mappingFile);

Executor executor = new Executor(rmlStore, new RecordsFactory(new DataFetcher(mappingFile.getParent(), rmlStore)),
functionLoader);
QuadStore result = executor.execute(null);
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
}
```

### Testing

#### RDBs
Make sure you have [Docker](https://www.docker.com) running.

Expand All @@ -167,6 +214,7 @@ Make sure you have [Docker](https://www.docker.com) running.
| com.opencsv opencsv | Apache License 2.0 |
| commons-lang | Apache License 2.0 |
| ch.qos.logback | Eclipse Public License 1.0 & GNU Lesser General Public License 2.1 |
| org.rdfhdt.hdt-jena | GNU Lesser General Public License v3.0 |

# UML Diagrams
## How to generate with IntelliJ IDEA
Expand All @@ -175,4 +223,4 @@ Make sure you have [Docker](https://www.docker.com) running.
* Right click on package: "be.ugent.rml"
* Diagrams > Show Diagram > Java Class Diagrams
* Choose what properties of the classes you want to show in the upper left corner
* Export to file > .png | Save diagram > .uml
* Export to file > .png | Save diagram > .uml
4 changes: 2 additions & 2 deletions buildNumber.properties
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#maven.buildNumber.plugin properties file
#Mon Oct 08 16:59:34 CEST 2018
buildNumber0=53
#Mon Nov 12 15:14:59 CET 2018
buildNumber0=60
72 changes: 58 additions & 14 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>be.ugent.rml</groupId>
<artifactId>rmlmapper</artifactId>
<version>4.1.0</version>
<version>4.2.0</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<junit.version>4.12</junit.version>
Expand All @@ -13,6 +13,13 @@
<url>https://github.com/RMLio/rmlmapper-java</url>
</scm>

<repositories>
<repository>
<id>jitpack.io</id>
<url>https://jitpack.io</url>
</repository>
</repositories>

<dependencies>
<dependency>
<groupId>ch.qos.logback</groupId>
Expand Down Expand Up @@ -104,6 +111,21 @@
<artifactId>jackson-core</artifactId>
<version>2.9.6</version>
</dependency>
<dependency>
<groupId>com.github.rdfhdt</groupId>
<artifactId>hdt-java</artifactId>
<version>hdt-2.1-SNAPSHOT</version>
</dependency>
<!--<dependency>-->
<!--<groupId>org.rdfhdt</groupId>-->
<!--<artifactId>hdt-java-core</artifactId>-->
<!--<version>2.1-SNAPSHOT</version>-->
<!--</dependency>-->
<!--<dependency>-->
<!--<groupId>org.rdfhdt</groupId>-->
<!--<artifactId>hdt-api</artifactId>-->
<!--<version>2.1-SNAPSHOT</version>-->
<!--</dependency>-->
</dependencies>

<build>
Expand All @@ -121,23 +143,44 @@
<version>3.7.0</version>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<configuration>
<archive>
<manifest>
<shadedArtifactAttached>false</shadedArtifactAttached>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>be.ugent.rml.cli.Main</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer"/>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
<addHeader>false</addHeader>
</transformer>
</transformers>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<!-- Some jars are signed but shading breaks that.
Don't include signing files.
-->
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
<executions>
<execution>
<id>make-assembly</id> <!-- this is used for inheritance merges -->
<phase>package</phase> <!-- bind to the packaging phase -->
<goals>
<goal>single</goal>
<goal>shade</goal>
</goals>
</execution>
</executions>
Expand All @@ -146,10 +189,11 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.17</version>
<!-- <configuration>
<parallel>methods</parallel>
<threadCount>10</threadCount>
</configuration>-->
<configuration>
<useSystemClassLoader>false</useSystemClassLoader>
<!--<parallel>methods</parallel>-->
<!--<threadCount>10</threadCount>-->
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/be/ugent/rml/Executor.java
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ private ProvenancedTerm getSubject(Term triplesMap, Mapping mapping, Record reco
List<Term> nodes = mapping.getSubjectMappingInfo().getTermGenerator().generate(record);

if (!nodes.isEmpty()) {
//todo: only create metadata-test-cases when it's required
//todo: only create metadata when it's required
this.subjectCache.get(triplesMap).put(i, new ProvenancedTerm(nodes.get(0), new Metadata(triplesMap, mapping.getSubjectMappingInfo().getTerm())));
}
}
Expand Down
22 changes: 22 additions & 0 deletions src/main/java/be/ugent/rml/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
import com.google.common.escape.Escaper;
import com.google.common.net.UrlEscapers;
import org.eclipse.rdf4j.rio.RDFParseException;
import org.rdfhdt.hdt.enums.RDFNotation;
import org.rdfhdt.hdt.exceptions.ParserException;
import org.rdfhdt.hdt.hdt.HDT;
import org.rdfhdt.hdt.hdt.HDTManager;
import org.rdfhdt.hdt.options.HDTSpecification;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.eclipse.rdf4j.model.Model;
Expand Down Expand Up @@ -400,4 +405,21 @@ public static String hashCode(String s) {
}
return Integer.toString(Math.abs(hash));
}

public static void ntriples2hdt(String rdfInputPath, String hdtOutputPath) {
// Configuration variables
String baseURI = "http://example.com/mydataset";
String inputType = "ntriples";

try {
// Create HDT from RDF file
HDT hdt = HDTManager.generateHDT(rdfInputPath, baseURI, RDFNotation.parse(inputType), new HDTSpecification(), null);
// Save generated HDT to a file
hdt.saveToHDT(hdtOutputPath, null);
// IMPORTANT: Free resources
hdt.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
Loading

0 comments on commit 62e56a2

Please sign in to comment.