Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
alerman committed Dec 11, 2023
0 parents commit d686543
Show file tree
Hide file tree
Showing 16 changed files with 573 additions and 0 deletions.
5 changes: 5 additions & 0 deletions datawave-accumulo/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ACCUMULO_VERSION=2.1.2
HADOOP_VERSION=3.3.6
ZOOKEEPER_VERSION=3.7.1
ACCUMULO_CONF_DIR=/opt/accumulo/conf
HADOOP_CONF_DIR=/opt/hadoop/conf
115 changes: 115 additions & 0 deletions datawave-accumulo/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@


ARG BUILDER_IMAGE_NAME=ghcr.io/nationalsecurityagency/datawave-hadoop
ARG BUILDER_IMAGE_TAG=docker-images

ARG BASE_IMAGE_NAME=ghcr.io/nationalsecurityagency/datawave-hadoop
ARG BASE_IMAGE_TAG=docker-images

ARG ACCUMULO_VERSION=2.1.2
ARG ZOOKEEPER_VERSION=3.7.2

FROM ${BUILDER_IMAGE_NAME}:${BUILDER_IMAGE_TAG} as builder

ARG ACCUMULO_VERSION
ARG ZOOKEEPER_VERSION

#ARG ACCUMULO_DOWNLOAD_URL="https://www.apache.org/dyn/closer.cgi?action=download&filename=accumulo/${ACCUMULO_VERSION}/accumulo-${ACCUMULO_VERSION}-bin.tar.gz"
ARG ACCUMULO_DOWNLOAD_URL="https://dlcdn.apache.org/accumulo/${ACCUMULO_VERSION}/accumulo-${ACCUMULO_VERSION}-bin.tar.gz"
ARG ACCUMULO_BACKUP_DOWNLOAD_URL="https://archive.apache.org/dist/accumulo/${ACCUMULO_VERSION}/accumulo-${ACCUMULO_VERSION}-bin.tar.gz"

#ARG ZOOKEEPER_DOWNLOAD_URL="https://www.apache.org/dyn/closer.cgi?action=download&filename=zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz"
ARG ZOOKEEPER_DOWNLOAD_URL="https://dlcdn.apache.org/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz"
ARG ZOOKEEPER_BACKUP_DOWNLOAD_URL="https://archive.apache.org/dist/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz"

USER root

RUN yum -y update && \
yum -y install \
gcc-c++ \
make \
wget \
&& yum clean all

ENV JAVA_HOME /usr/lib/jvm/java
# Allow users to provide their own builds of Accumulo, ZooKeeper and Hadoop
COPY ./files/ .
# Otherwise, download official distributions
RUN cd / && \
if [ ! -f "./accumulo-${ACCUMULO_VERSION}-bin.tar.gz" ]; then \
(wget -nv -O ./accumulo-${ACCUMULO_VERSION}-bin.tar.gz ${ACCUMULO_DOWNLOAD_URL} || wget -nv -O ./accumulo-${ACCUMULO_VERSION}-bin.tar.gz ${ACCUMULO_BACKUP_DOWNLOAD_URL}); \
fi
RUN cd / && \
if [ ! -f "./apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz" ]; then \
(wget -nv -O ./apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz ${ZOOKEEPER_DOWNLOAD_URL} || wget -nv -O ./apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz ${ZOOKEEPER_BACKUP_DOWNLOAD_URL}); \
fi

# Extract required files
RUN cd / && \
tar -xf ./accumulo-${ACCUMULO_VERSION}-bin.tar.gz accumulo-${ACCUMULO_VERSION}/bin/ accumulo-${ACCUMULO_VERSION}/lib/ && \
rm -f ./accumulo-${ACCUMULO_VERSION}-bin.tar.gz && \
accumulo-${ACCUMULO_VERSION}/bin/accumulo-util build-native

RUN \
rm -rf ./hadoop-${HADOOP_VERSION}/share/hadoop/yarn && \
tar -xf ./apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz --wildcards "apache-zookeeper-${ZOOKEEPER_VERSION}-bin/lib/zookeeper*.jar" && \
rm -f ./apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz

RUN cd / && pwd && ls -al

FROM ${BASE_IMAGE_NAME}:${BASE_IMAGE_TAG}
ARG ACCUMULO_VERSION
ARG ZOOKEEPER_VERSION
ARG USER=accumulo
ARG GROUP=accumulo

USER root

RUN yum -y update && \
yum -y install \
wget \
&& yum clean all
RUN wget -O /usr/bin/dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64
RUN chmod +x /usr/bin/dumb-init

RUN cd /
COPY --from=builder /accumulo-${ACCUMULO_VERSION} /opt/accumulo-${ACCUMULO_VERSION}/
COPY ./conf /opt/accumulo-${ACCUMULO_VERSION}/conf
COPY --from=builder /apache-zookeeper-${ZOOKEEPER_VERSION}-bin /opt/zookeeper-${ZOOKEEPER_VERSION}/

RUN groupadd ${GROUP} && useradd --gid ${GROUP} -G hadoop --no-create-home --shell /bin/bash ${USER}

RUN cd /opt \
&& ln -s ./accumulo-${ACCUMULO_VERSION} ./accumulo \
&& ln -s /usr/local/hadoop ./hadoop \
&& ln -s ./zookeeper-${ZOOKEEPER_VERSION} ./zookeeper \
&& mkdir -p -m 755 /var/log/accumulo \
&& chown ${USER}:${GROUP} /var/log/accumulo \
&& chown -LR ${USER}:${GROUP} ./accumulo \
&& chown -LR ${USER}:${GROUP} ./zookeeper \
&& chown -h ${USER}:${GROUP} ./accumulo \
&& chown -h ${USER}:${GROUP} ./zookeeper \
&& usermod -d /opt/accumulo ${USER}


USER ${USER}

ENV ACCUMULO_HOME /opt/accumulo
ENV ACCUMULO_CONF_DIR ${ACCUMULO_HOME}/conf
ENV ACCUMULO_LOG_DIR /var/log/accumulo
ENV ZOOKEEPER_HOME /opt/zookeeper
ENV PATH $ACCUMULO_HOME/bin:$PATH
ENV HADOOP_HOME=/usr/local/hadoop \
HADOOP_COMMON_HOME=/usr/local/hadoop \
HADOOP_HDFS_HOME=/usr/local/hadoop \
HADOOP_MAPRED_HOME=/usr/local/hadoop \
HADOOP_YARN_HOME=/usr/local/hadoop \
HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop \
YARN_CONF_DIR=/usr/local/hadoop/etc/hadoop \
JAVA_HOME=/usr/lib/jvm/java \
HADOOP_CLASSPATH=/usr/local/hadoop/*:/usr/local/hadoop/lib/*:/usr/local/hadoop-hdfs/*:/usr/local/hadoop-hdfs/lib/*


COPY ./entrypoint.sh /
ENTRYPOINT ["/entrypoint.sh", "accumulo"]
CMD ["help"]
4 changes: 4 additions & 0 deletions datawave-accumulo/conf/accumulo-client.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
instance.name=dev
instance.zookeepers=zookeeper:2181
auth.principal=root
auth.token=root
146 changes: 146 additions & 0 deletions datawave-accumulo/conf/accumulo-env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#! /usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

## Before accumulo-env.sh is loaded, these environment variables are set and can be used in this file:

# cmd - Command that is being called such as tserver, manager, etc.
# basedir - Root of Accumulo installation
# bin - Directory containing Accumulo scripts
# conf - Directory containing Accumulo configuration
# lib - Directory containing Accumulo libraries

############################
# Variables that must be set
############################

## Accumulo logs directory. Referenced by logger config.
ACCUMULO_LOG_DIR="${ACCUMULO_LOG_DIR:-${basedir}/logs}"
## Hadoop installation
HADOOP_HOME="${HADOOP_HOME:-/opt/hadoop}"
## Hadoop configuration
HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-${HADOOP_HOME}/etc/hadoop}"
## Zookeeper installation
ZOOKEEPER_HOME="${ZOOKEEPER_HOME:-/opt/zookeeper}"

##########################
# Build CLASSPATH variable
##########################

## Verify that Hadoop & Zookeeper installation directories exist
if [[ ! -d $ZOOKEEPER_HOME ]]; then
echo "ZOOKEEPER_HOME=$ZOOKEEPER_HOME is not set to a valid directory in accumulo-env.sh"
exit 1
fi
if [[ ! -d $HADOOP_HOME ]]; then
echo "HADOOP_HOME=$HADOOP_HOME is not set to a valid directory in accumulo-env.sh"
exit 1
fi

## Build using existing CLASSPATH, conf/ directory, dependencies in lib/, and external Hadoop & Zookeeper dependencies
if [[ -n $CLASSPATH ]]; then
# conf is set by calling script that sources this env file
#shellcheck disable=SC2154
CLASSPATH="${CLASSPATH}:${conf}"
else
CLASSPATH="${conf}"
fi
ZK_JARS=$(find "$ZOOKEEPER_HOME/lib/" -maxdepth 1 -name '*.jar' -not -name '*slf4j*' -not -name '*log4j*' | paste -sd:)
# lib is set by calling script that sources this env file
#shellcheck disable=SC2154
CLASSPATH="${CLASSPATH}:${lib}/*:${HADOOP_CONF_DIR}:${ZOOKEEPER_HOME}/*:${ZK_JARS}:${HADOOP_HOME}/share/hadoop/client/*"
export CLASSPATH

##################################################################
# Build JAVA_OPTS variable. Defaults below work but can be edited.
##################################################################

## JVM options set for all processes. Extra options can be passed in by setting ACCUMULO_JAVA_OPTS to an array of options.
read -r -a accumulo_initial_opts < <(echo "$ACCUMULO_JAVA_OPTS")
JAVA_OPTS=(
'-XX:OnOutOfMemoryError=kill -9 %p'
'-XX:-OmitStackTraceInFastThrow'
'-Djava.net.preferIPv4Stack=true'
"-Daccumulo.native.lib.path=${lib}/native"
"${accumulo_initial_opts[@]}"
)

## Make sure Accumulo native libraries are built since they are enabled by default
# bin is set by calling script that sources this env file
#shellcheck disable=SC2154
"${bin}"/accumulo-util build-native &>/dev/null

## JVM options set for individual applications
# cmd is set by calling script that sources this env file
#shellcheck disable=SC2154
case "$cmd" in
manager | master) JAVA_OPTS=('-Xmx512m' '-Xms512m' "${JAVA_OPTS[@]}") ;;
monitor) JAVA_OPTS=('-Xmx1g' '-Xms1g' "${JAVA_OPTS[@]}") ;;
gc) JAVA_OPTS=('-Xmx256m' '-Xms256m' "${JAVA_OPTS[@]}") ;;
tserver) JAVA_OPTS=('-Xmx768m' '-Xms768m' "${JAVA_OPTS[@]}") ;;
compaction-coordinator) JAVA_OPTS=('-Xmx512m' '-Xms512m' "${JAVA_OPTS[@]}") ;;
compactor) JAVA_OPTS=('-Xmx256m' '-Xms256m' "${JAVA_OPTS[@]}") ;;
sserver) JAVA_OPTS=('-Xmx512m' '-Xms512m' "${JAVA_OPTS[@]}") ;;
*) JAVA_OPTS=('-Xmx256m' '-Xms64m' "${JAVA_OPTS[@]}") ;;
esac

## JVM options set for logging. Review log4j2.properties file to see how they are used.
JAVA_OPTS=("-Daccumulo.log.dir=${ACCUMULO_LOG_DIR}"
"-Daccumulo.application=${cmd}${ACCUMULO_SERVICE_INSTANCE}_$(hostname)"
"-Daccumulo.metrics.service.instance=${ACCUMULO_SERVICE_INSTANCE}"
"-Dlog4j2.contextSelector=org.apache.logging.log4j.core.async.AsyncLoggerContextSelector"
"-Dotel.service.name=${cmd}${ACCUMULO_SERVICE_INSTANCE}"
"${JAVA_OPTS[@]}"
)

## Optionally setup OpenTelemetry SDK AutoConfigure
## See https://github.com/open-telemetry/opentelemetry-java/tree/main/sdk-extensions/autoconfigure
#JAVA_OPTS=('-Dotel.traces.exporter=jaeger' '-Dotel.metrics.exporter=none' '-Dotel.logs.exporter=none' "${JAVA_OPTS[@]}")

## Optionally setup OpenTelemetry Java Agent
## See https://github.com/open-telemetry/opentelemetry-java-instrumentation for more options
#JAVA_OPTS=('-javaagent:path/to/opentelemetry-javaagent-all.jar' "${JAVA_OPTS[@]}")

case "$cmd" in
monitor | gc | manager | master | tserver | compaction-coordinator | compactor | sserver)
JAVA_OPTS=('-Dlog4j.configurationFile=log4j2-service.properties' "${JAVA_OPTS[@]}")
;;
*)
# let log4j use its default behavior (log4j2.properties, etc.)
true
;;
esac

############################
# Variables set to a default
############################

export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-1}
## Add Hadoop native libraries to shared library paths given operating system
case "$(uname)" in
Darwin) export DYLD_LIBRARY_PATH="${HADOOP_HOME}/lib/native:${DYLD_LIBRARY_PATH}" ;;
*) export LD_LIBRARY_PATH="${HADOOP_HOME}/lib/native:${LD_LIBRARY_PATH}" ;;
esac

###############################################
# Variables that are optional. Uncomment to set
###############################################

## Specifies command that will be placed before calls to Java in accumulo script
# export ACCUMULO_JAVA_PREFIX=""
23 changes: 23 additions & 0 deletions datawave-accumulo/conf/accumulo.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
## Sets location in HDFS where Accumulo will store data
instance.volumes=hdfs://hdfs-nn:9000/accumulo

## Sets location of Zookeepers
instance.zookeeper.host=zookeeper:2181

## Change secret before initialization. All Accumulo servers must have same secret
instance.secret=DEFAULT
trace.user=root
trace.token.property.password=secret

## Set to false if 'accumulo-util build-native' fails
tserver.memory.maps.native.enabled=true
tserver.memory.maps.max=128M
tserver.cache.data.size=15M
tserver.cache.index.size=40M
tserver.total.mutation.queue.max=16M
tserver.sort.buffer.size=50M
tserver.walog.max.size=128M

general.classpaths=$ACCUMULO_HOME/lib/accumulo-server.jar,$ACCUMULO_HOME/lib/accumulo-core.jar,$ACCUMULO_HOME/lib/accumulo-start.jar,$ACCUMULO_HOME/lib/accumulo-fate.jar,$ACCUMULO_HOME/lib/accumulo-proxy.jar,$ACCUMULO_HOME/lib/[^.].*.jar,$ZOOKEEPER_HOME/lib/zookeeper[^.].*.jar,$HADOOP_CONF_DIR,$HADOOP_HOME/share/hadoop/client/[^.].*.jar,$HADOOP_HOME/share/hadoop/common/lib/(?!slf4j)[^.].*.jar

general.vfs.context.classpath.datawave=hdfs://hdfs-nn:9000/datawave/accumulo-vfs-classpath/.*.jar
22 changes: 22 additions & 0 deletions datawave-accumulo/conf/client.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

instance.zookeeper.host=zookeeper:2181
# instance.rpc.ssl.enabled=false

# instance.rcp.sasl.enabled=false
# rpc.sasl.qop=auth
auth.principal=root
auth.token=root
1 change: 1 addition & 0 deletions datawave-accumulo/conf/core-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<configuration></configuration>
45 changes: 45 additions & 0 deletions datawave-accumulo/conf/generic_logger.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Send most things to the console
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%-8c{2}] %-5p: %m%n

# Send all logging data to a centralized logger
# If the centralized logger is down, buffer the log events, but drop them if it stays down
log4j.appender.ASYNC=org.apache.accumulo.core.util.AsyncSocketAppender
log4j.appender.ASYNC.RemoteHost=${org.apache.accumulo.core.host.log}
log4j.appender.ASYNC.Port=${org.apache.accumulo.core.host.log.port}
log4j.appender.ASYNC.Application=${org.apache.accumulo.core.application}:${org.apache.accumulo.core.ip.localhost.hostname}
log4j.appender.ASYNC.Threshold=WARN

# Log accumulo events to the debug, normal and remote logs.
log4j.logger.org.apache.accumulo=DEBUG, CONSOLE, ASYNC
log4j.additivity.org.apache.accumulo=false

# change to INFO for authorization events
log4j.logger.org.apache.accumulo.server.security.Auditor=WARN

log4j.logger.org.apache.accumulo.core.file.rfile.bcfile=INFO

log4j.logger.org.apache.accumulo.examples.wikisearch=INFO

log4j.logger.org.mortbay.log=WARN

log4j.logger.com.yahoo.zookeeper=ERROR

# Log non-accumulo events to the debug and normal logs
log4j.rootLogger=INFO, CONSOLE
24 changes: 24 additions & 0 deletions datawave-accumulo/conf/log4j2-service.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
status = info
dest = err
name = AccumuloCoreTestLoggingProperties
monitorInterval = 30

appender.console.type = Console
appender.console.name = STDOUT
appender.console.target = SYSTEM_OUT
appender.console.layout.type = PatternLayout
appender.console.layout.pattern = [%t] %-5p %c %x - %m%n

appender.console.filter.threshold.type = ThresholdFilter
appender.console.filter.threshold.level = warn

logger.accumulo.name = org.apache.accumulo
logger.accumulo.level = debug

logger.zookeeper.name = org.apache.zookeeper
logger.zookeeper.level = error

logger.hadoop.name = org.apache.hadoop
logger.hadoop.level = warn

rootLogger.level = info
Loading

0 comments on commit d686543

Please sign in to comment.