diff --git a/Dockerfile b/Dockerfile index fdcf6f93..33047e77 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,14 @@ -# This docker file is used for populating and pushing the container for the official HRA KG with all data within -FROM ghcr.io/hubmapconsortium/hra-do-server:main -COPY dist /dist +FROM phenoscape/blazegraph:latest + +# The URL of the blazegraph journal to use +ENV DB_URL=http://cdn-humanatlas-io.s3-website.us-east-2.amazonaws.com/digital-objects/blazegraph.jnl + +RUN apt-get update && apt-get -y install cron + +COPY ./context/blazegraph.properties . +COPY ./context/sync.cron /etc/cron.d/sync.cron +RUN crontab /etc/cron.d/sync.cron + +COPY ./context/startup.sh ./context/sync.sh / + +ENTRYPOINT [ "/startup.sh" ] diff --git a/context/blazegraph.properties b/context/blazegraph.properties new file mode 100644 index 00000000..a45b33d9 --- /dev/null +++ b/context/blazegraph.properties @@ -0,0 +1,50 @@ +# +# Note: These options are applied when the journal and the triple store are +# first created. + +## +## Journal options. +## + +# The backing file. This contains all your data. You want to put this someplace +# safe. The default locator will wind up in the directory from which you start +# your servlet container. +com.bigdata.journal.AbstractJournal.file=blazegraph.jnl + +# The persistence engine. Use 'Disk' for the WORM or 'DiskRW' for the RWStore. +com.bigdata.journal.AbstractJournal.bufferMode=DiskRW + +# Setup for the RWStore recycler rather than session protection. +com.bigdata.service.AbstractTransactionService.minReleaseAge=1 + +# Enable group commit. See http://wiki.blazegraph.com/wiki/index.php/GroupCommit +# Note: Group commit is a beta feature in BlazeGraph release 1.5.1. +#com.bigdata.journal.Journal.groupCommit=true + +com.bigdata.btree.writeRetentionQueue.capacity=4000 +com.bigdata.btree.BTree.branchingFactor=128 + +# 200M initial extent. +com.bigdata.journal.AbstractJournal.initialExtent=209715200 +com.bigdata.journal.AbstractJournal.maximumExtent=209715200 + +## +## Setup for QUADS mode without the full text index. +## +com.bigdata.rdf.sail.truthMaintenance=false +com.bigdata.rdf.store.AbstractTripleStore.quads=true +com.bigdata.rdf.store.AbstractTripleStore.statementIdentifiers=false +com.bigdata.rdf.store.AbstractTripleStore.textIndex=false +com.bigdata.rdf.store.AbstractTripleStore.axiomsClass=com.bigdata.rdf.axioms.NoAxioms + +# Bump up the branching factor for the lexicon indices on the default kb. +com.bigdata.namespace.kb.lex.com.bigdata.btree.BTree.branchingFactor=400 + +# Bump up the branching factor for the statement indices on the default kb. +com.bigdata.namespace.kb.spo.com.bigdata.btree.BTree.branchingFactor=1024 + +# Uncomment to enable collection of OS level performance counters. When +# collected they will be self-reported through the /counters servlet and +# the workbench "Performance" tab. +# +# com.bigdata.journal.Journal.collectPlatformStatistics=true diff --git a/context/startup.sh b/context/startup.sh new file mode 100755 index 00000000..65e3799d --- /dev/null +++ b/context/startup.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +echo "export DB_URL=${DB_URL}" > /sync.env +echo "export BLAZEGRAPH_MEMORY=${BLAZEGRAPH_MEMORY}" >> /sync.env +echo "export BLAZEGRAPH_TIMEOUT=${BLAZEGRAPH_TIMEOUT}" >> /sync.env +echo "export BLAZEGRAPH_READONLY=${BLAZEGRAPH_READONLY}" >> /sync.env + +/sync.sh && cron -f diff --git a/context/sync.cron b/context/sync.cron new file mode 100644 index 00000000..9847ddad --- /dev/null +++ b/context/sync.cron @@ -0,0 +1 @@ +*/30 * * * * /sync.sh > /proc/1/fd/1 2>/proc/1/fd/2 diff --git a/context/sync.sh b/context/sync.sh new file mode 100755 index 00000000..b0ca4987 --- /dev/null +++ b/context/sync.sh @@ -0,0 +1,20 @@ +#!/bin/bash +source /sync.env +PATH=$PATH:/usr/local/openjdk-8/bin + +LOCAL_DB=/data/blazegraph.jnl +ETAG_FILE=${LOCAL_DB}.etag + +REMOTE_ETAG=$(curl -sIX GET $DB_URL | grep -i etag | cut -d\" -f 2) +LOCAL_ETAG=$(touch $ETAG_FILE && cat $ETAG_FILE) + +if [ "$REMOTE_ETAG" != "$LOCAL_ETAG" ]; then + echo "Syncing DB with" $REMOTE_ETAG + + curl --compressed -H 'Accept-encoding: gzip' -so $LOCAL_DB $DB_URL + echo $REMOTE_ETAG > $ETAG_FILE + + killall -q java + cd /data + /blazegraph/entrypoint.sh & +fi