From d686543504a82e557476af907b8d273c3f47bfe6 Mon Sep 17 00:00:00 2001 From: Adam Lerman awlerma Date: Mon, 11 Dec 2023 13:20:48 -0500 Subject: [PATCH] init --- datawave-accumulo/.env | 5 + datawave-accumulo/Dockerfile | 115 ++++++++++++++ .../conf/accumulo-client.properties | 4 + datawave-accumulo/conf/accumulo-env.sh | 146 ++++++++++++++++++ datawave-accumulo/conf/accumulo.properties | 23 +++ datawave-accumulo/conf/client.conf | 22 +++ datawave-accumulo/conf/core-site.xml | 1 + .../conf/generic_logger.properties | 45 ++++++ .../conf/log4j2-service.properties | 24 +++ .../conf/monitor_logger.properties | 31 ++++ datawave-accumulo/entrypoint.sh | 53 +++++++ datawave-accumulo/files/.gitkeep | 0 datawave-base/Dockerfile | 25 +++ datawave-hadoop/.gitignore | 4 + datawave-hadoop/Dockerfile | 69 +++++++++ datawave-hadoop/bigtop.repo | 6 + 16 files changed, 573 insertions(+) create mode 100644 datawave-accumulo/.env create mode 100644 datawave-accumulo/Dockerfile create mode 100644 datawave-accumulo/conf/accumulo-client.properties create mode 100755 datawave-accumulo/conf/accumulo-env.sh create mode 100644 datawave-accumulo/conf/accumulo.properties create mode 100644 datawave-accumulo/conf/client.conf create mode 100644 datawave-accumulo/conf/core-site.xml create mode 100644 datawave-accumulo/conf/generic_logger.properties create mode 100644 datawave-accumulo/conf/log4j2-service.properties create mode 100644 datawave-accumulo/conf/monitor_logger.properties create mode 100755 datawave-accumulo/entrypoint.sh create mode 100644 datawave-accumulo/files/.gitkeep create mode 100644 datawave-base/Dockerfile create mode 100644 datawave-hadoop/.gitignore create mode 100644 datawave-hadoop/Dockerfile create mode 100644 datawave-hadoop/bigtop.repo diff --git a/datawave-accumulo/.env b/datawave-accumulo/.env new file mode 100644 index 0000000..07257c2 --- /dev/null +++ b/datawave-accumulo/.env @@ -0,0 +1,5 @@ +ACCUMULO_VERSION=2.1.2 +HADOOP_VERSION=3.3.6 +ZOOKEEPER_VERSION=3.7.1 +ACCUMULO_CONF_DIR=/opt/accumulo/conf +HADOOP_CONF_DIR=/opt/hadoop/conf diff --git a/datawave-accumulo/Dockerfile b/datawave-accumulo/Dockerfile new file mode 100644 index 0000000..30cb305 --- /dev/null +++ b/datawave-accumulo/Dockerfile @@ -0,0 +1,115 @@ + + +ARG BUILDER_IMAGE_NAME=ghcr.io/nationalsecurityagency/datawave-hadoop +ARG BUILDER_IMAGE_TAG=docker-images + +ARG BASE_IMAGE_NAME=ghcr.io/nationalsecurityagency/datawave-hadoop +ARG BASE_IMAGE_TAG=docker-images + +ARG ACCUMULO_VERSION=2.1.2 +ARG ZOOKEEPER_VERSION=3.7.2 + +FROM ${BUILDER_IMAGE_NAME}:${BUILDER_IMAGE_TAG} as builder + +ARG ACCUMULO_VERSION +ARG ZOOKEEPER_VERSION + +#ARG ACCUMULO_DOWNLOAD_URL="https://www.apache.org/dyn/closer.cgi?action=download&filename=accumulo/${ACCUMULO_VERSION}/accumulo-${ACCUMULO_VERSION}-bin.tar.gz" +ARG ACCUMULO_DOWNLOAD_URL="https://dlcdn.apache.org/accumulo/${ACCUMULO_VERSION}/accumulo-${ACCUMULO_VERSION}-bin.tar.gz" +ARG ACCUMULO_BACKUP_DOWNLOAD_URL="https://archive.apache.org/dist/accumulo/${ACCUMULO_VERSION}/accumulo-${ACCUMULO_VERSION}-bin.tar.gz" + +#ARG ZOOKEEPER_DOWNLOAD_URL="https://www.apache.org/dyn/closer.cgi?action=download&filename=zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz" +ARG ZOOKEEPER_DOWNLOAD_URL="https://dlcdn.apache.org/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz" +ARG ZOOKEEPER_BACKUP_DOWNLOAD_URL="https://archive.apache.org/dist/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz" + +USER root + +RUN yum -y update && \ + yum -y install \ + gcc-c++ \ + make \ + wget \ + && yum clean all + +ENV JAVA_HOME /usr/lib/jvm/java +# Allow users to provide their own builds of Accumulo, ZooKeeper and Hadoop +COPY ./files/ . +# Otherwise, download official distributions +RUN cd / && \ + if [ ! -f "./accumulo-${ACCUMULO_VERSION}-bin.tar.gz" ]; then \ + (wget -nv -O ./accumulo-${ACCUMULO_VERSION}-bin.tar.gz ${ACCUMULO_DOWNLOAD_URL} || wget -nv -O ./accumulo-${ACCUMULO_VERSION}-bin.tar.gz ${ACCUMULO_BACKUP_DOWNLOAD_URL}); \ + fi +RUN cd / && \ + if [ ! -f "./apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz" ]; then \ + (wget -nv -O ./apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz ${ZOOKEEPER_DOWNLOAD_URL} || wget -nv -O ./apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz ${ZOOKEEPER_BACKUP_DOWNLOAD_URL}); \ + fi + +# Extract required files +RUN cd / && \ + tar -xf ./accumulo-${ACCUMULO_VERSION}-bin.tar.gz accumulo-${ACCUMULO_VERSION}/bin/ accumulo-${ACCUMULO_VERSION}/lib/ && \ + rm -f ./accumulo-${ACCUMULO_VERSION}-bin.tar.gz && \ + accumulo-${ACCUMULO_VERSION}/bin/accumulo-util build-native + +RUN \ + rm -rf ./hadoop-${HADOOP_VERSION}/share/hadoop/yarn && \ + tar -xf ./apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz --wildcards "apache-zookeeper-${ZOOKEEPER_VERSION}-bin/lib/zookeeper*.jar" && \ + rm -f ./apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz + +RUN cd / && pwd && ls -al + +FROM ${BASE_IMAGE_NAME}:${BASE_IMAGE_TAG} +ARG ACCUMULO_VERSION +ARG ZOOKEEPER_VERSION +ARG USER=accumulo +ARG GROUP=accumulo + +USER root + +RUN yum -y update && \ + yum -y install \ + wget \ + && yum clean all +RUN wget -O /usr/bin/dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 +RUN chmod +x /usr/bin/dumb-init + +RUN cd / +COPY --from=builder /accumulo-${ACCUMULO_VERSION} /opt/accumulo-${ACCUMULO_VERSION}/ +COPY ./conf /opt/accumulo-${ACCUMULO_VERSION}/conf +COPY --from=builder /apache-zookeeper-${ZOOKEEPER_VERSION}-bin /opt/zookeeper-${ZOOKEEPER_VERSION}/ + +RUN groupadd ${GROUP} && useradd --gid ${GROUP} -G hadoop --no-create-home --shell /bin/bash ${USER} + +RUN cd /opt \ + && ln -s ./accumulo-${ACCUMULO_VERSION} ./accumulo \ + && ln -s /usr/local/hadoop ./hadoop \ + && ln -s ./zookeeper-${ZOOKEEPER_VERSION} ./zookeeper \ + && mkdir -p -m 755 /var/log/accumulo \ + && chown ${USER}:${GROUP} /var/log/accumulo \ + && chown -LR ${USER}:${GROUP} ./accumulo \ + && chown -LR ${USER}:${GROUP} ./zookeeper \ + && chown -h ${USER}:${GROUP} ./accumulo \ + && chown -h ${USER}:${GROUP} ./zookeeper \ + && usermod -d /opt/accumulo ${USER} + + +USER ${USER} + +ENV ACCUMULO_HOME /opt/accumulo +ENV ACCUMULO_CONF_DIR ${ACCUMULO_HOME}/conf +ENV ACCUMULO_LOG_DIR /var/log/accumulo +ENV ZOOKEEPER_HOME /opt/zookeeper +ENV PATH $ACCUMULO_HOME/bin:$PATH +ENV HADOOP_HOME=/usr/local/hadoop \ + HADOOP_COMMON_HOME=/usr/local/hadoop \ + HADOOP_HDFS_HOME=/usr/local/hadoop \ + HADOOP_MAPRED_HOME=/usr/local/hadoop \ + HADOOP_YARN_HOME=/usr/local/hadoop \ + HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop \ + YARN_CONF_DIR=/usr/local/hadoop/etc/hadoop \ + JAVA_HOME=/usr/lib/jvm/java \ + HADOOP_CLASSPATH=/usr/local/hadoop/*:/usr/local/hadoop/lib/*:/usr/local/hadoop-hdfs/*:/usr/local/hadoop-hdfs/lib/* + + +COPY ./entrypoint.sh / +ENTRYPOINT ["/entrypoint.sh", "accumulo"] +CMD ["help"] diff --git a/datawave-accumulo/conf/accumulo-client.properties b/datawave-accumulo/conf/accumulo-client.properties new file mode 100644 index 0000000..bc3e4c3 --- /dev/null +++ b/datawave-accumulo/conf/accumulo-client.properties @@ -0,0 +1,4 @@ +instance.name=dev +instance.zookeepers=zookeeper:2181 +auth.principal=root +auth.token=root \ No newline at end of file diff --git a/datawave-accumulo/conf/accumulo-env.sh b/datawave-accumulo/conf/accumulo-env.sh new file mode 100755 index 0000000..fa04b94 --- /dev/null +++ b/datawave-accumulo/conf/accumulo-env.sh @@ -0,0 +1,146 @@ +#! /usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +## Before accumulo-env.sh is loaded, these environment variables are set and can be used in this file: + +# cmd - Command that is being called such as tserver, manager, etc. +# basedir - Root of Accumulo installation +# bin - Directory containing Accumulo scripts +# conf - Directory containing Accumulo configuration +# lib - Directory containing Accumulo libraries + +############################ +# Variables that must be set +############################ + +## Accumulo logs directory. Referenced by logger config. +ACCUMULO_LOG_DIR="${ACCUMULO_LOG_DIR:-${basedir}/logs}" +## Hadoop installation +HADOOP_HOME="${HADOOP_HOME:-/opt/hadoop}" +## Hadoop configuration +HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-${HADOOP_HOME}/etc/hadoop}" +## Zookeeper installation +ZOOKEEPER_HOME="${ZOOKEEPER_HOME:-/opt/zookeeper}" + +########################## +# Build CLASSPATH variable +########################## + +## Verify that Hadoop & Zookeeper installation directories exist +if [[ ! -d $ZOOKEEPER_HOME ]]; then + echo "ZOOKEEPER_HOME=$ZOOKEEPER_HOME is not set to a valid directory in accumulo-env.sh" + exit 1 +fi +if [[ ! -d $HADOOP_HOME ]]; then + echo "HADOOP_HOME=$HADOOP_HOME is not set to a valid directory in accumulo-env.sh" + exit 1 +fi + +## Build using existing CLASSPATH, conf/ directory, dependencies in lib/, and external Hadoop & Zookeeper dependencies +if [[ -n $CLASSPATH ]]; then + # conf is set by calling script that sources this env file + #shellcheck disable=SC2154 + CLASSPATH="${CLASSPATH}:${conf}" +else + CLASSPATH="${conf}" +fi +ZK_JARS=$(find "$ZOOKEEPER_HOME/lib/" -maxdepth 1 -name '*.jar' -not -name '*slf4j*' -not -name '*log4j*' | paste -sd:) +# lib is set by calling script that sources this env file +#shellcheck disable=SC2154 +CLASSPATH="${CLASSPATH}:${lib}/*:${HADOOP_CONF_DIR}:${ZOOKEEPER_HOME}/*:${ZK_JARS}:${HADOOP_HOME}/share/hadoop/client/*" +export CLASSPATH + +################################################################## +# Build JAVA_OPTS variable. Defaults below work but can be edited. +################################################################## + +## JVM options set for all processes. Extra options can be passed in by setting ACCUMULO_JAVA_OPTS to an array of options. +read -r -a accumulo_initial_opts < <(echo "$ACCUMULO_JAVA_OPTS") +JAVA_OPTS=( + '-XX:OnOutOfMemoryError=kill -9 %p' + '-XX:-OmitStackTraceInFastThrow' + '-Djava.net.preferIPv4Stack=true' + "-Daccumulo.native.lib.path=${lib}/native" + "${accumulo_initial_opts[@]}" +) + +## Make sure Accumulo native libraries are built since they are enabled by default +# bin is set by calling script that sources this env file +#shellcheck disable=SC2154 +"${bin}"/accumulo-util build-native &>/dev/null + +## JVM options set for individual applications +# cmd is set by calling script that sources this env file +#shellcheck disable=SC2154 +case "$cmd" in + manager | master) JAVA_OPTS=('-Xmx512m' '-Xms512m' "${JAVA_OPTS[@]}") ;; + monitor) JAVA_OPTS=('-Xmx1g' '-Xms1g' "${JAVA_OPTS[@]}") ;; + gc) JAVA_OPTS=('-Xmx256m' '-Xms256m' "${JAVA_OPTS[@]}") ;; + tserver) JAVA_OPTS=('-Xmx768m' '-Xms768m' "${JAVA_OPTS[@]}") ;; + compaction-coordinator) JAVA_OPTS=('-Xmx512m' '-Xms512m' "${JAVA_OPTS[@]}") ;; + compactor) JAVA_OPTS=('-Xmx256m' '-Xms256m' "${JAVA_OPTS[@]}") ;; + sserver) JAVA_OPTS=('-Xmx512m' '-Xms512m' "${JAVA_OPTS[@]}") ;; + *) JAVA_OPTS=('-Xmx256m' '-Xms64m' "${JAVA_OPTS[@]}") ;; +esac + +## JVM options set for logging. Review log4j2.properties file to see how they are used. +JAVA_OPTS=("-Daccumulo.log.dir=${ACCUMULO_LOG_DIR}" + "-Daccumulo.application=${cmd}${ACCUMULO_SERVICE_INSTANCE}_$(hostname)" + "-Daccumulo.metrics.service.instance=${ACCUMULO_SERVICE_INSTANCE}" + "-Dlog4j2.contextSelector=org.apache.logging.log4j.core.async.AsyncLoggerContextSelector" + "-Dotel.service.name=${cmd}${ACCUMULO_SERVICE_INSTANCE}" + "${JAVA_OPTS[@]}" +) + +## Optionally setup OpenTelemetry SDK AutoConfigure +## See https://github.com/open-telemetry/opentelemetry-java/tree/main/sdk-extensions/autoconfigure +#JAVA_OPTS=('-Dotel.traces.exporter=jaeger' '-Dotel.metrics.exporter=none' '-Dotel.logs.exporter=none' "${JAVA_OPTS[@]}") + +## Optionally setup OpenTelemetry Java Agent +## See https://github.com/open-telemetry/opentelemetry-java-instrumentation for more options +#JAVA_OPTS=('-javaagent:path/to/opentelemetry-javaagent-all.jar' "${JAVA_OPTS[@]}") + +case "$cmd" in + monitor | gc | manager | master | tserver | compaction-coordinator | compactor | sserver) + JAVA_OPTS=('-Dlog4j.configurationFile=log4j2-service.properties' "${JAVA_OPTS[@]}") + ;; + *) + # let log4j use its default behavior (log4j2.properties, etc.) + true + ;; +esac + +############################ +# Variables set to a default +############################ + +export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-1} +## Add Hadoop native libraries to shared library paths given operating system +case "$(uname)" in + Darwin) export DYLD_LIBRARY_PATH="${HADOOP_HOME}/lib/native:${DYLD_LIBRARY_PATH}" ;; + *) export LD_LIBRARY_PATH="${HADOOP_HOME}/lib/native:${LD_LIBRARY_PATH}" ;; +esac + +############################################### +# Variables that are optional. Uncomment to set +############################################### + +## Specifies command that will be placed before calls to Java in accumulo script +# export ACCUMULO_JAVA_PREFIX="" \ No newline at end of file diff --git a/datawave-accumulo/conf/accumulo.properties b/datawave-accumulo/conf/accumulo.properties new file mode 100644 index 0000000..a7a41e7 --- /dev/null +++ b/datawave-accumulo/conf/accumulo.properties @@ -0,0 +1,23 @@ +## Sets location in HDFS where Accumulo will store data +instance.volumes=hdfs://hdfs-nn:9000/accumulo + +## Sets location of Zookeepers +instance.zookeeper.host=zookeeper:2181 + +## Change secret before initialization. All Accumulo servers must have same secret +instance.secret=DEFAULT +trace.user=root +trace.token.property.password=secret + +## Set to false if 'accumulo-util build-native' fails +tserver.memory.maps.native.enabled=true +tserver.memory.maps.max=128M +tserver.cache.data.size=15M +tserver.cache.index.size=40M +tserver.total.mutation.queue.max=16M +tserver.sort.buffer.size=50M +tserver.walog.max.size=128M + +general.classpaths=$ACCUMULO_HOME/lib/accumulo-server.jar,$ACCUMULO_HOME/lib/accumulo-core.jar,$ACCUMULO_HOME/lib/accumulo-start.jar,$ACCUMULO_HOME/lib/accumulo-fate.jar,$ACCUMULO_HOME/lib/accumulo-proxy.jar,$ACCUMULO_HOME/lib/[^.].*.jar,$ZOOKEEPER_HOME/lib/zookeeper[^.].*.jar,$HADOOP_CONF_DIR,$HADOOP_HOME/share/hadoop/client/[^.].*.jar,$HADOOP_HOME/share/hadoop/common/lib/(?!slf4j)[^.].*.jar + +general.vfs.context.classpath.datawave=hdfs://hdfs-nn:9000/datawave/accumulo-vfs-classpath/.*.jar \ No newline at end of file diff --git a/datawave-accumulo/conf/client.conf b/datawave-accumulo/conf/client.conf new file mode 100644 index 0000000..59825d8 --- /dev/null +++ b/datawave-accumulo/conf/client.conf @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +instance.zookeeper.host=zookeeper:2181 +# instance.rpc.ssl.enabled=false + +# instance.rcp.sasl.enabled=false +# rpc.sasl.qop=auth +auth.principal=root +auth.token=root diff --git a/datawave-accumulo/conf/core-site.xml b/datawave-accumulo/conf/core-site.xml new file mode 100644 index 0000000..636172f --- /dev/null +++ b/datawave-accumulo/conf/core-site.xml @@ -0,0 +1 @@ + diff --git a/datawave-accumulo/conf/generic_logger.properties b/datawave-accumulo/conf/generic_logger.properties new file mode 100644 index 0000000..474e7a6 --- /dev/null +++ b/datawave-accumulo/conf/generic_logger.properties @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Send most things to the console +log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender +log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout +log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%-8c{2}] %-5p: %m%n + +# Send all logging data to a centralized logger +# If the centralized logger is down, buffer the log events, but drop them if it stays down +log4j.appender.ASYNC=org.apache.accumulo.core.util.AsyncSocketAppender +log4j.appender.ASYNC.RemoteHost=${org.apache.accumulo.core.host.log} +log4j.appender.ASYNC.Port=${org.apache.accumulo.core.host.log.port} +log4j.appender.ASYNC.Application=${org.apache.accumulo.core.application}:${org.apache.accumulo.core.ip.localhost.hostname} +log4j.appender.ASYNC.Threshold=WARN + +# Log accumulo events to the debug, normal and remote logs. +log4j.logger.org.apache.accumulo=DEBUG, CONSOLE, ASYNC +log4j.additivity.org.apache.accumulo=false + +# change to INFO for authorization events +log4j.logger.org.apache.accumulo.server.security.Auditor=WARN + +log4j.logger.org.apache.accumulo.core.file.rfile.bcfile=INFO + +log4j.logger.org.apache.accumulo.examples.wikisearch=INFO + +log4j.logger.org.mortbay.log=WARN + +log4j.logger.com.yahoo.zookeeper=ERROR + +# Log non-accumulo events to the debug and normal logs +log4j.rootLogger=INFO, CONSOLE diff --git a/datawave-accumulo/conf/log4j2-service.properties b/datawave-accumulo/conf/log4j2-service.properties new file mode 100644 index 0000000..843235b --- /dev/null +++ b/datawave-accumulo/conf/log4j2-service.properties @@ -0,0 +1,24 @@ +status = info +dest = err +name = AccumuloCoreTestLoggingProperties +monitorInterval = 30 + +appender.console.type = Console +appender.console.name = STDOUT +appender.console.target = SYSTEM_OUT +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = [%t] %-5p %c %x - %m%n + +appender.console.filter.threshold.type = ThresholdFilter +appender.console.filter.threshold.level = warn + +logger.accumulo.name = org.apache.accumulo +logger.accumulo.level = debug + +logger.zookeeper.name = org.apache.zookeeper +logger.zookeeper.level = error + +logger.hadoop.name = org.apache.hadoop +logger.hadoop.level = warn + +rootLogger.level = info diff --git a/datawave-accumulo/conf/monitor_logger.properties b/datawave-accumulo/conf/monitor_logger.properties new file mode 100644 index 0000000..7d2332f --- /dev/null +++ b/datawave-accumulo/conf/monitor_logger.properties @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Write out INFO and higher to the console +log4j.appender.A3=org.apache.log4j.ConsoleAppender +log4j.appender.A3.layout=org.apache.log4j.PatternLayout +log4j.appender.A3.layout.ConversionPattern=%d{ISO8601} [%-8c{2}] %-5p: %X{application} %m%n + +# Keep the last few log messages for display to the user +log4j.appender.GUI=org.apache.accumulo.server.monitor.LogService +log4j.appender.GUI.Keep=50 +log4j.appender.GUI.Threshold=WARN + +# Log accumulo messages to debug, normal and GUI +log4j.logger.org.apache.accumulo=DEBUG, A3, GUI +log4j.additivity.org.apache.accumulo=false + +# Log non-accumulo messages to debug, normal logs. +log4j.rootLogger=INFO, A3 diff --git a/datawave-accumulo/entrypoint.sh b/datawave-accumulo/entrypoint.sh new file mode 100755 index 0000000..97b12de --- /dev/null +++ b/datawave-accumulo/entrypoint.sh @@ -0,0 +1,53 @@ +#!/bin/bash + + + +test -z "${ACCUMULO_INSTANCE_NAME}" && ACCUMULO_INSTANCE_NAME="dev" + +if [ "$1" = "accumulo" ] && [ "$2" = "master" ]; then + # Try to find desired root password from trace config + TRACE_USER=root + CLIENT_USERNAME=root + + # Try to find desired root password from environment variable + [ ! -z "${ACCUMULO_ROOT_PASSWORD}" ] && PASSWORD="${ACCUMULO_ROOT_PASSWORD}" + + if [ -z "${PASSWORD}" ]; then + echo "Unable to determine what the Accumulo root user's password should be." + echo "Please set:" + echo "- ACCUMULO_ROOT_PASSWORD environment variable" + exit 1 + fi + + # If possible, wait until all the HDFS instances that Accumulo will be using are available i.e. not in Safe Mode and directory is writeable + ACCUMULO_VOLUMES="hdfs://hdfs-nn:9000/accumulo" + if [ ! -z "${ACCUMULO_VOLUMES}" ]; then + + until [ "${ALL_VOLUMES_READY}" == "true" ] || [ $(( ATTEMPTS++ )) -gt 6 ]; do + echo "$(date) - Waiting for all HDFS instances to be ready..." + ALL_VOLUMES_READY="true" + for ACCUMULO_VOLUME in ${ACCUMULO_VOLUMES//,/ }; do + SAFE_MODE_CHECK="OFF" + SAFE_MODE_CHECK_OUTPUT=$(hdfs dfsadmin -safemode get) + echo ${SAFE_MODE_CHECK_OUTPUT} | grep -q "Safe mode is OFF" + [ "$?" != "0" ] && ALL_VOLUMES_READY="false" && SAFE_MODE_CHECK="ON" + + WRITE_CHECK="writeable" + hdfs dfs -mkdir -p ${ACCUMULO_VOLUME} + hdfs dfs -test -w ${ACCUMULO_VOLUME} + [ "$?" != "0" ] && ALL_VOLUMES_READY="false" && WRITE_CHECK="not writeable" + + echo ${ACCUMULO_VOLUME} "- Safe mode is" ${SAFE_MODE_CHECK} "-" ${WRITE_CHECK} + done + [ "${ALL_VOLUMES_READY}" == "true" ] || sleep 10 + done + [ "${ALL_VOLUMES_READY}" != "true" ] && echo "$(date) - ERROR: Timed out waiting for HDFS instances to be ready..." && exit 1 + fi + + echo "Initializing Accumulo..." + ALREADY_INIT=`/opt/accumulo/bin/accumulo org.apache.accumulo.server.util.ListInstances | grep ${ACCUMULO_INSTANCE_NAME}|wc -l` + echo "Checked init. Was: ${ALREADY_INIT}" + [ $ALREADY_INIT -eq 0 ] && echo "Initializing" && accumulo init --instance-name ${ACCUMULO_INSTANCE_NAME} --password ${PASSWORD} +fi + +exec /usr/bin/dumb-init -- "$@" diff --git a/datawave-accumulo/files/.gitkeep b/datawave-accumulo/files/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/datawave-base/Dockerfile b/datawave-base/Dockerfile new file mode 100644 index 0000000..1803ce3 --- /dev/null +++ b/datawave-base/Dockerfile @@ -0,0 +1,25 @@ +ARG BUILDER_IMAGE_NAME=registry.access.redhat.com/ubi8/ubi +ARG BUILDER_IMAGE_TAG=8.7 +ARG CORRETTO_VERSION=11.0.19.7 + +FROM ${BUILDER_IMAGE_NAME}:${BUILDER_IMAGE_TAG} + +ARG CORRETTO_VERSION + +# add amazon yum repo +RUN rpm --import https://yum.corretto.aws/corretto.key && \ + curl -L -o /etc/yum.repos.d/corretto.repo https://yum.corretto.aws/corretto.repo && \ + rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm && \ + yum install -y java-11-amazon-corretto-devel-${CORRETTO_VERSION} \ + gcc-c++ \ + make \ + ncurses \ + wget \ + pdsh \ + genders \ + pdsh-mod-genders \ + hostname \ + && yum clean all && rm -Rf /var/cache/yum + +ENV JAVA_HOME /usr/lib/jvm/java + diff --git a/datawave-hadoop/.gitignore b/datawave-hadoop/.gitignore new file mode 100644 index 0000000..a8cae4a --- /dev/null +++ b/datawave-hadoop/.gitignore @@ -0,0 +1,4 @@ +*.tar +*.tar.gz +*.tar.bz2 +*.rpm diff --git a/datawave-hadoop/Dockerfile b/datawave-hadoop/Dockerfile new file mode 100644 index 0000000..a18d571 --- /dev/null +++ b/datawave-hadoop/Dockerfile @@ -0,0 +1,69 @@ +FROM ghcr.io/nationalsecurityagency/datawave-base-jdk-11:docker-images + +# Add native libs +ARG HADOOP_VERSION=3.3.6 +ADD bigtop.repo /etc/yum.repos.d/ + +RUN yum clean all && \ + rm -rf /var/cache/yum && \ + yum clean expire-cache && \ + yum makecache && \ + yum update -y && \ + yum install -y bigtop-utils \ + openssl \ + glibc \ + compat-openssl10 \ + lzo \ + protobuf-c \ + hadoop hadoop-hdfs hadoop-libhdfs hadoop-client \ + hadoop-yarn hadoop-yarn-router hadoop-yarn-proxyserver hadoop-debuginfo \ + hadoop-yarn-timelineserver hadoop-yarn-nodemanager hadoop-hdfs-journalnode \ + hadoop-mapreduce hadoop-mapreduce-historyserver && yum clean all && \ + rm -rf /var/cache/yum && \ + yum clean expire-cache && \ + usermod -u 1000 hdfs && \ + ln -fns /lib/hadoop /usr/local/hadoop && \ + ln -fns /lib/hadoop-hdfs /usr/local/hadoop-hdfs && \ + ln -fns /lib/hadoop-mapreduce /usr/local/hadoop-mapreduce && \ + ln -fns /lib/hadoop-yarn /usr/local/hadoop-yarn && \ + mkdir -p /usr/local/hadoop/logs && \ + mkdir -p /usr/local/hadoop-hdfs/logs && \ + mkdir -p /usr/local/hadoop-yarn/logs && \ + mkdir -p /var/lib/hadoop-yarn && \ + mkdir -p /var/log/hadoop-yarn && \ + mkdir -p /usr/local/hadoop/etc/hadoop && \ + mkdir -p /opt/hdfs/hadoop-yarn/cache/hadoop/nm-local-dir && \ + mkdir -p /usr/local/hadoop/hdfs && \ + mkdir -p /usr/lib/hadoop/data && \ + chown hdfs:hdfs -R /usr/local/hadoop/* && \ + chown hdfs:hdfs -R /lib/hadoop/* && \ + chown hdfs:hdfs -R /lib/hadoop-hdfs/* && \ + chown hdfs:hdfs -R /lib/hadoop-yarn/* && \ + chown hdfs:hdfs -R /lib/hadoop-mapreduce/* && \ + chown hdfs:hdfs -R /var/lib/hadoop* && \ + chown hdfs:hdfs -R /var/log/hadoop* && \ + chmod 777 -R /etc/hadoop/ && \ + chmod 777 -R /opt/hdfs/hadoop-yarn && \ + rm -Rf /usr/lib/hadoop/tools + +USER hadoop + +ENV HADOOP_HOME=/usr/local/hadoop \ + HADOOP_COMMON_HOME=/usr/local/hadoop \ + HADOOP_HDFS_HOME=/usr/local/hadoop-hdfs \ + HADOOP_MAPRED_HOME=/usr/local/hadoop-mapreduce \ + HADOOP_YARN_HOME=/usr/local/hadoop-yarn \ + HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop \ + YARN_CONF_DIR=/usr/local/hadoop-yarn/etc/hadoop \ + PATH=${PATH}:/usr/local/hadoop/bin:/usr/local/hadoop-hdfs/bin + +# Hdfs ports +EXPOSE 50010 50020 50070 50075 50090 8020 9000 +# Mapred ports +EXPOSE 19888 +#Yarn ports +EXPOSE 8030 8031 8032 8033 8040 8042 8088 +#Other ports +EXPOSE 49707 2122 + +USER hdfs diff --git a/datawave-hadoop/bigtop.repo b/datawave-hadoop/bigtop.repo new file mode 100644 index 0000000..ab262fa --- /dev/null +++ b/datawave-hadoop/bigtop.repo @@ -0,0 +1,6 @@ +[bigtop] +name=Bigtop +enabled=1 +gpgcheck=1 +baseurl=http://repos.bigtop.apache.org/releases/3.2.1/centos/7/$basearch +gpgkey=https://dist.apache.org/repos/dist/release/bigtop/KEYS \ No newline at end of file