Skip to content
This repository has been archived by the owner on Feb 22, 2024. It is now read-only.

Commit

Permalink
Adds the FP-329 - Thesaurus for Social Sciences; FP-330 - IPTC and FP…
Browse files Browse the repository at this point in the history
…-331 - STW to the Fusepool Stanbol launchers. In addition to the indexed data this also provides configurations for the FST linking engines as well as std. linking chains for those thesauri. Those chains can be directly used with the LiteralExtractionTransformer.
  • Loading branch information
westei committed Jul 1, 2015
1 parent 89dbc67 commit 0f714ee
Show file tree
Hide file tree
Showing 35 changed files with 492 additions and 4 deletions.
67 changes: 67 additions & 0 deletions bundlelist/ld-sites/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright (C) 2014 Bern University of Applied Sciences..
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>eu.fusepool.p3.stanbol-launcher</groupId>
<artifactId>stanbol-launcher-reactor</artifactId>
<version>1.0.0-SNAPSHOT</version>
<relativePath>../..</relativePath>
</parent>

<artifactId>stanbol-launcher-ld-sites-bundlelist</artifactId>
<packaging>feature</packaging>

<name>Fusepool Linked Data Site Bundlelist</name>
<description>
Provides Indexes for well known Vocabularies for EntityLinking. Each Site
already comes with a comprehencive default configuration for Entity Linking
that can be used wiht the Fusepool Literal Extraction Transformer
</description>


<dependencies>
<dependency>
<groupId>eu.fusepool.p3.stanbol-launcher</groupId>
<artifactId>stanbol-data-site-iptc</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>eu.fusepool.p3.stanbol-launcher</groupId>
<artifactId>stanbol-data-site-stw</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>eu.fusepool.p3.stanbol-launcher</groupId>
<artifactId>stanbol-data-site-thesoz</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.wymiwyg.karaf.tooling</groupId>
<artifactId>karaf-sling-maven-plugin</artifactId>
</plugin>
</plugins>
</build>

</project>
63 changes: 63 additions & 0 deletions data/site-iptc/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">

<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>eu.fusepool.p3.stanbol-launcher</groupId>
<artifactId>stanbol-launcher-reactor</artifactId>
<version>1.0.0-SNAPSHOT</version>
<relativePath>../..</relativePath>
</parent>

<artifactId>stanbol-data-site-iptc</artifactId>
<packaging>bundle</packaging>

<name>IPTC Media Topics</name>
<description>
Provides the IPTC mdia topics for entity linking
</description>

<inceptionYear>2015</inceptionYear>

<properties>
<data.path>site</data.path>
<config.path>config</config.path>
</properties>

<build>
<plugins>
<plugin>
<groupId>org.apache.felix</groupId>
<artifactId>maven-bundle-plugin</artifactId>
<extensions>true</extensions>
<configuration>
<instructions>
<Data-Files>${data.path}</Data-Files>
<Data-Files-Priority>
-100
</Data-Files-Priority>
<Install-Path>${config.path}</Install-Path>
</instructions>
</configuration>
</plugin>
</plugins>
</build>

</project>
5 changes: 5 additions & 0 deletions data/site-iptc/src/main/resources/config/iptc.solrindex.ref
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#Mon Jun 29 12:21:02 CEST 2015
Name=SolrIndex for iptc
Synchronized=true
Description=IPTC Media Topics
Index-Archive=iptc.solrindex.zip
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
stanbol.enhancer.chain.weighted.chain=["langdetect","opennlp-sentence","opennlp-token","opennlp-pos","iptc-linking","text-annotation-new-model","fise2fam"]
stanbol.enhancer.chain.chainproperties=[""]
stanbol.enhancer.chain.name="iptc-linking"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
stanbol.enhancer.chain.weighted.chain=["langdetect","opennlp-sentence","opennlp-token","iptc-plain-linking","text-annotation-new-model","fise2fam"]
stanbol.enhancer.chain.chainproperties=[""]
stanbol.enhancer.chain.name="iptc-plain-linking"
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
stanbol.enhancer.engine.name="iptc-linking"
enhancer.engines.linking.includeSimilarScore=B"true"
enhancer.engines.linking.lucenefst.fstThreadPoolSize=I"1"
enhancer.engines.linking.lucenefst.rankingField="entityhub:entityRank"
enhancer.engines.linking.caseSensitive=B"false"
enhancer.engines.linking.lucenefst.fieldEncoding="SolrYard"
enhancer.engines.linking.entityTypes=[""]
enhancer.engines.linking.suggestions=I"3"
enhancer.engines.linking.defaultMatchingLanguage=""
enhancer.engines.linking.lucenefst.entityCacheSize=I"65536"
enhancer.engines.linking.lucenefst.fstfolder="${solr-data-dir}/fst"
enhancer.engines.linking.lucenefst.fstconfig=["*;field\=rdfs:label;generate\=false"]
enhancer.engines.linking.lucenefst.solrcore="iptc"
enhancer.engines.linking.lucenefst.typeField="rdf:type"
enhancer.engines.linking.typeMappings=["skos:Concept"]
enhancer.engines.linking.processedLanguages=["*;lmmtip;uc\=LINK;prob\=0.75;pprob\=0.75","de;uc\=MATCH","es;lc\=Noun","nl;lc\=Noun"]
enhancer.engines.linking.properNounsState=B"false"
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
enhancer.engines.linking.includeSimilarScore=B"true"
enhancer.engines.linking.lucenefst.fstThreadPoolSize=I"1"
enhancer.engines.linking.lucenefst.rankingField="entityhub:entityRank"
enhancer.engines.linking.caseSensitive=B"false"
enhancer.engines.linking.lucenefst.fieldEncoding="SolrYard"
enhancer.engines.linking.entityTypes=[""]
enhancer.engines.linking.suggestions=I"3"
enhancer.engines.linking.defaultMatchingLanguage=""
stanbol.enhancer.engine.name="iptc-plain-linking"
enhancer.engines.linking.lucenefst.entityCacheSize=I"65536"
enhancer.engines.linking.lucenefst.fstfolder="${solr-data-dir}/fst"
enhancer.engines.linking.lucenefst.fstconfig=["*;field\=rdfs:label;generate\=false"]
enhancer.engines.linking.lucenefst.solrcore="iptc"
enhancer.engines.linking.lucenefst.typeField="rdf:type"
enhancer.engines.linking.typeMappings=["skos:Concept"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
org.apache.stanbol.entityhub.yard.name="iptc\ Cache"
org.apache.stanbol.entityhub.yard.cacheYardId="iptcIndex"
org.apache.stanbol.entityhub.yard.id="iptcIndex"
org.apache.stanbol.entityhub.yard.description="Cache\ for\ the\ iptc\ Referenced\ Site\ using\ the\ iptcIndex."
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
org.apache.stanbol.entityhub.site.licenseName=["Creative\ Commons\ Attribution\ (CC\ BY)\ 4.0\ license"]
org.apache.stanbol.entityhub.site.defaultExpireDuration=I"0"
org.apache.stanbol.entityhub.site.licenseUrl=["http://creativecommons.org/licenses/by/4.0/"]
org.apache.stanbol.entityhub.site.attributionUrl="https://iptc.org/about-iptc/"
org.apache.stanbol.entityhub.site.cacheId="iptcIndex"
org.apache.stanbol.entityhub.site.defaultSymbolState="proposed"
org.apache.stanbol.entityhub.site.name="iptc"
org.apache.stanbol.entityhub.site.entityPrefix=["http://cv.iptc.org/newscodes/"]
org.apache.stanbol.entityhub.site.id="iptc"
org.apache.stanbol.entityhub.site.description="IPTC\ Media\ Topics"
org.apache.stanbol.entityhub.site.attribution="IPTC\ International\ Press\ \u0003Telecommunications\ Council"
org.apache.stanbol.entityhub.site.defaultMappedEntityState="proposed"
org.apache.stanbol.entityhub.site.fieldMappings=("#\ Licensed\ to\ the\ Apache\ Software\ Foundation\ (ASF)\ under\ one\ or\ more","#\ contributor\ license\ agreements.\ \ See\ the\ NOTICE\ file\ distributed\ with","#\ this\ work\ for\ additional\ information\ regarding\ copyright\ ownership.","#\ The\ ASF\ licenses\ this\ file\ to\ You\ under\ the\ Apache\ License,\ Version\ 2.0","#\ (the\ \"License\");\ you\ may\ not\ use\ this\ file\ except\ in\ compliance\ with","#\ the\ License.\ \ You\ may\ obtain\ a\ copy\ of\ the\ License\ at","#","#\ \ \ \ \ http://www.apache.org/licenses/LICENSE-2.0","#","#\ Unless\ required\ by\ applicable\ law\ or\ agreed\ to\ in\ writing,\ software","#\ distributed\ under\ the\ License\ is\ distributed\ on\ an\ \"AS\ IS\"\ BASIS,","#\ WITHOUT\ WARRANTIES\ OR\ CONDITIONS\ OF\ ANY\ KIND,\ either\ express\ or\ implied.","#\ See\ the\ License\ for\ the\ specific\ language\ governing\ permissions\ and","#\ limitations\ under\ the\ License.","#","#NOTE:\ THIS\ IS\ A\ DEFAULT\ MAPPING\ SPECIFICATION\ THAT\ INCLUDES\ MAPPINGS\ FOR","#\ \ \ \ \ \ COMMON\ ONTOLOGIES.\ USERS\ MIGHT\ WANT\ TO\ ADAPT\ THIS\ CONFIGURATION\ BY","#\ \ \ \ \ \ COMMENTING/UNCOMMENTING\ AND/OR\ ADDING\ NEW\ MAPPINGS","","#\ ---\ Define\ the\ Languages\ for\ all\ fields\ ---","#\ to\ restrict\ languages\ to\ be\ imported\ (for\ all\ fields)","#|\ @\=null;en;de;fr;it","","#NOTE:\ null\ is\ used\ to\ import\ labels\ with\ no\ specified\ language","","#\ ---\ Define\ the\ Languages\ for\ all\ fields\ ---","#\ Uncomment\ to\ restrict\ indexing\ to\ a\ specific\ list\ of\ languages,\ otherwise\ all","#\ languages\ are\ indexed","#|\ @\=null;en;de;fr;it","","#\ ---\ RDF\ RDFS\ and\ OWL\ Mappings\ ---","#\ This\ configuration\ only\ index\ properties\ that\ are\ typically\ used\ to\ store","#\ instance\ data\ defined\ by\ such\ namespaces.\ This\ excludes\ ontology\ definitions","","#\ NOTE\ that\ nearly\ all\ other\ ontologies\ are\ are\ using\ properties\ of\ these\ three","#\ \ \ \ \ \ schemas,\ therefore\ it\ is\ strongly\ recommended\ to\ include\ such\ information!","","rdf:type\ |\ d\=entityhub:ref","","rdfs:label\ ","rdfs:comment","rdfs:seeAlso\ |\ d\=entityhub:ref","","","owl:sameAs\ |\ d\=entityhub:ref","","#If\ one\ likes\ to\ also\ index\ ontologies\ one\ should\ add\ the\ following\ statements","#owl:*","#rdfs:*","","#\ ---\ Dublin\ Core\ (DC)\ ---","#\ The\ default\ configuration\ imports\ all\ dc-terms\ data\ and\ copies\ values\ for\ the","#\ old\ dc-elements\ standard\ over\ to\ the\ according\ properties\ of\ the\ dc-terms","#\ standard.","","#\ NOTE\ that\ a\ lot\ of\ other\ ontologies\ are\ also\ using\ DC\ for\ some\ of\ there\ data","#\ \ \ \ \ \ therefore\ it\ is\ strongly\ recommended\ to\ include\ such\ information!","","#mapping\ for\ all\ dc-terms\ properties","dc:*","","#\ copy\ dc:title\ to\ rdfs:label","dc:title\ >\ rdfs:label","","#\ deactivated\ by\ default,\ because\ such\ mappings\ are\ mapped\ to\ dc-terms","#dc-elements:*","","#\ mappings\ for\ the\ dc-elements\ properties\ to\ the\ dc-terms","dc-elements:contributor\ >\ dc:contributor","dc-elements:coverage\ >\ dc:coverage","dc-elements:creator\ >\ dc:creator","dc-elements:date\ >\ dc:date","dc-elements:description\ >\ dc:description","dc-elements:format\ >\ dc:format","dc-elements:identifier\ >\ dc:identifier","dc-elements:language\ >\ dc:language","dc-elements:publisher\ >\ dc:publisher","dc-elements:relation\ >\ dc:relation","dc-elements:rights\ >\ dc:rights","dc-elements:source\ >\ dc:source","dc-elements:subject\ >\ dc:subject","dc-elements:title\ >\ dc:title","dc-elements:type\ >\ dc:type","#also\ use\ dc-elements:title\ as\ label","dc-elements:title\ >\ rdfs:label","","#\ ---\ Social\ Networks\ (via\ foaf)\ ---","#The\ Friend\ of\ a\ Friend\ schema\ is\ often\ used\ to\ describe\ social\ relations\ between\ people","foaf:*","","#\ copy\ the\ name\ of\ a\ person\ over\ to\ rdfs:label","foaf:name\ >\ rdfs:label","","#\ additional\ data\ types\ checks","foaf:knows\ |\ d\=entityhub:ref","foaf:made\ |\ d\=entityhub:ref","foaf:maker\ |\ d\=entityhub:ref","foaf:member\ |\ d\=entityhub:ref","foaf:homepage\ |\ d\=xsd:anyURI","foaf:depiction\ |\ d\=xsd:anyURI","foaf:img\ |\ d\=xsd:anyURI","foaf:logo\ |\ d\=xsd:anyURI","#page\ about\ the\ entity","foaf:page\ |\ d\=xsd:anyURI","","","#\ ---\ Schema.org\ --","","#\ Defines\ an\ Ontology\ used\ by\ search\ engines\ (Google,\ Yahoo\ and\ Bing)\ for\ ","#\ indexing\ websites.","","schema:*","#\ Copy\ all\ names\ of\ schema\ instances\ over\ to\ rdfs:label","schema:name\ >\ rdfs:label","","#\ ---\ Simple\ Knowledge\ Organization\ System\ (SKOS)\ ---","","#\ A\ common\ data\ model\ for\ sharing\ and\ linking\ knowledge\ organization\ systems\ ","#\ via\ the\ Semantic\ Web.\ Typically\ used\ to\ encode\ controlled\ vocabularies\ as","#\ a\ thesaurus\ \ ","skos:*","","#\ copy\ all\ SKOS\ labels\ (preferred,\ alternative\ and\ hidden)\ over\ to\ rdfs:label","skos:prefLabel\ >\ rdfs:label","skos:altLabel\ >\ rdfs:label","skos:hiddenLabel\ >\ rdfs:label","","#\ copy\ values\ of\ **Match\ relations\ to\ the\ according\ related,\ broader\ and\ narrower","skos:relatedMatch\ >\ skos:related","skos:broadMatch\ >\ skos:broader","skos:narrowMatch\ >\ skos:skos:narrower","","#similar\ mappings\ for\ transitive\ variants\ are\ not\ contained,\ because\ transitive","#reasoning\ is\ not\ directly\ supported\ by\ the\ Entityhub.","","#\ Some\ SKOS\ thesaurus\ do\ use\ \"skos:transitiveBroader\"\ and\ \"skos:transitiveNarrower\"","#\ however\ such\ properties\ are\ only\ intended\ to\ be\ used\ by\ reasoners\ to","#\ calculate\ transitive\ closures\ over\ broader/narrower\ hierarchies.","#\ see\ http://www.w3.org/TR/skos-reference/#L2413\ for\ details","#\ to\ correct\ such\ cases\ we\ will\ copy\ transitive\ relations\ to\ their\ counterpart","skos:narrowerTransitive\ >\ skos:narrower","skos:broaderTransitive\ >\ skos:broader","","","#\ ---\ Semantically-Interlinked\ Online\ Communities\ (SIOC)\ ---","","#\ An\ ontology\ for\ describing\ the\ information\ in\ online\ communities.\ ","#\ This\ information\ can\ be\ used\ to\ export\ information\ from\ online\ communities\ ","#\ and\ to\ link\ them\ together.\ The\ scope\ of\ the\ application\ areas\ that\ SIOC\ can\ ","#\ be\ used\ for\ includes\ (and\ is\ not\ limited\ to)\ weblogs,\ message\ boards,\ ","#\ mailing\ lists\ and\ chat\ channels.","sioc:*","","#\ ---\ biographical\ information\ (bio)","#\ A\ vocabulary\ for\ describing\ biographical\ information\ about\ people,\ both\ living","#\ and\ dead.\ (see\ http://vocab.org/bio/0.1/)","bio:*","","#\ ---\ Rich\ Site\ Summary\ (rss)\ ---","rss:*","","#\ ---\ GoodRelations\ (gr)\ ---","#\ GoodRelations\ is\ a\ standardised\ vocabulary\ for\ product,\ price,\ and\ company\ data","gr:*","","#\ ---\ Creative\ Commons\ Rights\ Expression\ Language\ (cc)","#\ The\ Creative\ Commons\ Rights\ Expression\ Language\ (CC\ REL)\ lets\ you\ describe\ ","#\ copyright\ licenses\ in\ RDF.","cc:*","","","","","","")
org.apache.stanbol.entityhub.site.cacheStrategy="all"
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
org.apache.stanbol.entityhub.yard.solr.solrUri="iptc"
org.apache.stanbol.entityhub.yard.name="iptc\ Index"
org.apache.stanbol.entityhub.yard.solr.multiYardIndexLayout=B"false"
org.apache.stanbol.entityhub.yard.solr.useDefaultConfig=B"false"
org.apache.stanbol.entityhub.yard.id="iptcIndex"
http://stanbol.apache.org/ontology/entityhub/entityhub#entityRank="http://stanbol.apache.org/ontology/entityhub/entityhub#entityRank"
org.apache.stanbol.entityhub.yard.description="Full\ local\ index\ for\ the\ Referenced\ Site\ \"iptc\"."
Binary file not shown.
63 changes: 63 additions & 0 deletions data/site-stw/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">

<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>eu.fusepool.p3.stanbol-launcher</groupId>
<artifactId>stanbol-launcher-reactor</artifactId>
<version>1.0.0-SNAPSHOT</version>
<relativePath>../..</relativePath>
</parent>

<artifactId>stanbol-data-site-stw</artifactId>
<packaging>bundle</packaging>

<name>STW - Standard-Thesaurus Wirtschaft</name>
<description>
Provides the STW for entity linking
</description>

<inceptionYear>2015</inceptionYear>

<properties>
<data.path>site</data.path>
<config.path>config</config.path>
</properties>

<build>
<plugins>
<plugin>
<groupId>org.apache.felix</groupId>
<artifactId>maven-bundle-plugin</artifactId>
<extensions>true</extensions>
<configuration>
<instructions>
<Data-Files>${data.path}</Data-Files>
<Data-Files-Priority>
-100
</Data-Files-Priority>
<Install-Path>${config.path}</Install-Path>
</instructions>
</configuration>
</plugin>
</plugins>
</build>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
stanbol.enhancer.chain.weighted.chain=["langdetect","opennlp-sentence","opennlp-token","opennlp-pos","stw-linking","text-annotation-new-model","fise2fam"]
stanbol.enhancer.chain.chainproperties=[""]
stanbol.enhancer.chain.name="stw-linking"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
stanbol.enhancer.chain.weighted.chain=["langdetect","opennlp-sentence","opennlp-token","stw-plain-linking","text-annotation-new-model","fise2fam"]
stanbol.enhancer.chain.chainproperties=[""]
stanbol.enhancer.chain.name="stw-plain-linking"
Loading

0 comments on commit 0f714ee

Please sign in to comment.