Skip to content

Commit

Permalink
Updated docker container to only be used for the sparql server and up…
Browse files Browse the repository at this point in the history
…dates every 30 minutes
  • Loading branch information
bherr2 committed Jun 2, 2023
1 parent 5f782be commit 245d436
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 3 deletions.
17 changes: 14 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
# This docker file is used for populating and pushing the container for the official HRA KG with all data within
FROM ghcr.io/hubmapconsortium/hra-do-server:main
COPY dist /dist
FROM phenoscape/blazegraph:latest

# The URL of the blazegraph journal to use
ENV DB_URL=http://cdn-humanatlas-io.s3-website.us-east-2.amazonaws.com/digital-objects/blazegraph.jnl

RUN apt-get update && apt-get -y install cron

COPY ./context/blazegraph.properties .
COPY ./context/sync.cron /etc/cron.d/sync.cron
RUN crontab /etc/cron.d/sync.cron

COPY ./context/startup.sh ./context/sync.sh /

ENTRYPOINT [ "/startup.sh" ]
50 changes: 50 additions & 0 deletions context/blazegraph.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#
# Note: These options are applied when the journal and the triple store are
# first created.

##
## Journal options.
##

# The backing file. This contains all your data. You want to put this someplace
# safe. The default locator will wind up in the directory from which you start
# your servlet container.
com.bigdata.journal.AbstractJournal.file=blazegraph.jnl

# The persistence engine. Use 'Disk' for the WORM or 'DiskRW' for the RWStore.
com.bigdata.journal.AbstractJournal.bufferMode=DiskRW

# Setup for the RWStore recycler rather than session protection.
com.bigdata.service.AbstractTransactionService.minReleaseAge=1

# Enable group commit. See http://wiki.blazegraph.com/wiki/index.php/GroupCommit
# Note: Group commit is a beta feature in BlazeGraph release 1.5.1.
#com.bigdata.journal.Journal.groupCommit=true

com.bigdata.btree.writeRetentionQueue.capacity=4000
com.bigdata.btree.BTree.branchingFactor=128

# 200M initial extent.
com.bigdata.journal.AbstractJournal.initialExtent=209715200
com.bigdata.journal.AbstractJournal.maximumExtent=209715200

##
## Setup for QUADS mode without the full text index.
##
com.bigdata.rdf.sail.truthMaintenance=false
com.bigdata.rdf.store.AbstractTripleStore.quads=true
com.bigdata.rdf.store.AbstractTripleStore.statementIdentifiers=false
com.bigdata.rdf.store.AbstractTripleStore.textIndex=false
com.bigdata.rdf.store.AbstractTripleStore.axiomsClass=com.bigdata.rdf.axioms.NoAxioms

# Bump up the branching factor for the lexicon indices on the default kb.
com.bigdata.namespace.kb.lex.com.bigdata.btree.BTree.branchingFactor=400

# Bump up the branching factor for the statement indices on the default kb.
com.bigdata.namespace.kb.spo.com.bigdata.btree.BTree.branchingFactor=1024

# Uncomment to enable collection of OS level performance counters. When
# collected they will be self-reported through the /counters servlet and
# the workbench "Performance" tab.
#
# com.bigdata.journal.Journal.collectPlatformStatistics=true
8 changes: 8 additions & 0 deletions context/startup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

echo "export DB_URL=${DB_URL}" > /sync.env
echo "export BLAZEGRAPH_MEMORY=${BLAZEGRAPH_MEMORY}" >> /sync.env
echo "export BLAZEGRAPH_TIMEOUT=${BLAZEGRAPH_TIMEOUT}" >> /sync.env
echo "export BLAZEGRAPH_READONLY=${BLAZEGRAPH_READONLY}" >> /sync.env

/sync.sh && cron -f
1 change: 1 addition & 0 deletions context/sync.cron
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*/30 * * * * /sync.sh > /proc/1/fd/1 2>/proc/1/fd/2
20 changes: 20 additions & 0 deletions context/sync.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash
source /sync.env
PATH=$PATH:/usr/local/openjdk-8/bin

LOCAL_DB=/data/blazegraph.jnl
ETAG_FILE=${LOCAL_DB}.etag

REMOTE_ETAG=$(curl -sIX GET $DB_URL | grep -i etag | cut -d\" -f 2)
LOCAL_ETAG=$(touch $ETAG_FILE && cat $ETAG_FILE)

if [ "$REMOTE_ETAG" != "$LOCAL_ETAG" ]; then
echo "Syncing DB with" $REMOTE_ETAG

curl --compressed -H 'Accept-encoding: gzip' -so $LOCAL_DB $DB_URL
echo $REMOTE_ETAG > $ETAG_FILE

killall -q java
cd /data
/blazegraph/entrypoint.sh &
fi

0 comments on commit 245d436

Please sign in to comment.