Skip to content

Commit

Permalink
Merge pull request #152 from DataONEorg/develop
Browse files Browse the repository at this point in the history
3.1.0 release
  • Loading branch information
artntek authored Nov 22, 2024
2 parents 1004701 + e2a9215 commit f44bf49
Show file tree
Hide file tree
Showing 24 changed files with 773 additions and 820 deletions.
22 changes: 22 additions & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,27 @@
# dataone-indexer Release Notes

## dataone-indexer version 3.1.0 & helm chart version 1.1.0

* Release date: 2024-11-21
* dataone-indexer version 3.1.0
* Integrate with the new Dataone hash-based storage library
[`hashstore-java`](https://github.com/DataONEorg/hashstore-java).
* Indexer no longer needs access to an aut token in order to index private datasets.
* Update Docker base image to eclipse-temurin:17.0.12_7-jre-jammy
* Upgrade log4j-core to 2.24.0 to fix "method can't be found" issue
* Bump dependencies:
* org.apache.commons:commons-lang3 from 3.4 to 3.17.0
* org.slf4j:slf4j-api from 1.7.36 to 2.0.16
* org.springframework.data:spring-data-commons from 1.6.5.RELEASE to 3.3.4
* org.apache.maven.plugins:maven-compiler-plugin from 2.0.1 to 3.13.0
* com.coderplus.maven.plugins:copy-rename-maven-plugin from 1.0 to 1.0.1
* org.apache.logging.log4j:log4j-jcl from 2.17.1 to 2.24.0
* org.apache.maven.plugins:maven-clean-plugin from 3.2.0 to 3.4.0
* com.fasterxml.jackson.core:jackson-annotations from 2.13.3 to 2.18.0
* helm chart version 1.0.2
* Bump Application version to 3.1.0
* Add `storage` to values.yaml for new hashstore integration

## dataone-indexer version 3.0.2 & helm chart version 1.0.2

* Release date: 2024-07-29
Expand Down
4 changes: 2 additions & 2 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# `nerdctl build -t dataone-index-worker:2.4.0 -f docker/Dockerfile --build-arg TAG=2.4.0 .`
# Use an OpenJDK runtime as a parent image
# Note: the prior alpine-based openjdk image had network DNS issues, so replacing with Eclipse Temurin
FROM eclipse-temurin:17.0.8.1_1-jre-jammy
FROM eclipse-temurin:17.0.12_7-jre-jammy

ARG TAG=3.0.0-SNAPSHOT
ENV TAG=${TAG}
Expand All @@ -23,7 +23,7 @@ RUN groupadd -g 1000 d1indexer && useradd -u 1000 -g 1000 d1indexer \

# The most recently built jar file is copied from the maven build directory to this dir by maven, so that
# it can be copied to the image.
COPY ../target/dataone-index-worker-${TAG}-shaded.jar .
COPY ./target/dataone-index-worker-${TAG}-shaded.jar .
COPY ./docker/entrypoint.sh .

# Change the ownership of the jar and sh files
Expand Down
4 changes: 2 additions & 2 deletions helm/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.0.2
version: "1.1.0"

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "3.0.2"
appVersion: "3.1.0"

# Chart dependencies
dependencies:
Expand Down
9 changes: 9 additions & 0 deletions helm/config/dataone-indexer.properties
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,12 @@ index.resourcemap.waitingComponent.time={{ default 800 .Values.idxworker.resourc
index.resourcemap.waitingComponent.max.attempts={{ default 25 .Values.idxworker.resourcemapMaxTries }}
index.solr.versionConflict.waiting.time={{ default 1000 .Values.idxworker.solrVerConflictWaitMs }}
index.solr.versionConflict.max.attempts={{ default 50 .Values.idxworker.solrVerConflictMaxTries }}

# Storage properties
storage.className={{ default "org.dataone.hashstore.filehashstore.FileHashStore" .Values.idxworker.storage.hashStoreClassName }}
storage.hashstore.rootDirectory={{ default "/var/metacat/hashstore" .Values.idxworker.storage.hashStoreRootDir }}
storage.hashstore.defaultNamespace={{ default "https://ns.dataone.org/service/types/v2.0#SystemMetadata" .Values.idxworker.storage.hashStoreDefaultNamespace }}
# The following three properties must NOT be modified after the hash store is initialized
storage.hashstore.fileNameAlgorithm={{ default "SHA-256" .Values.idxworker.storage.hashStoreAlgorithm }}
storage.hashstore.directory.width={{ default 2 .Values.idxworker.storage.hashStoreDirWidth }}
storage.hashstore.directory.depth={{ default 3 .Values.idxworker.storage.hashStoreDirDepth }}
28 changes: 25 additions & 3 deletions helm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,23 @@ global:
## @section Dataone-Indexer Application-Specific Properties

image:
## @param image.repository repository that the image will be pulled from
##
repository: ghcr.io/dataoneorg/dataone-index-worker

## @param image.pullPolicy image pull policy - Always, Never, or IfNotPresent
##
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
#tag: ""

## @param image.tag Overrides the image tag. Will default to the chart appVersion if set to ""
##
tag: ""

## @param image.debug Specify if container debugging should be enabled (sets log level to "DEBUG")
## Set to true if you would like to see extra information in metacat/tomcat logs.
## * * WARNING - FOR TESTING ONLY! * * May result in secrets being printed to logs in plain text.
##
debug: false

imagePullSecrets: []

Expand Down Expand Up @@ -181,6 +194,15 @@ idxworker:
##
tripleDbDirectory: /etc/dataone/tdb-cache

storage:
hashStoreClassName: "org.dataone.hashstore.filehashstore.FileHashStore"
hashStoreRootDir: "/var/metacat/hashstore"
hashStoreDefaultNamespace: "https://ns.dataone.org/service/types/v2.0#SystemMetadata"
# The following three properties must NOT be modified after the hashstore is initialized
hashStoreAlgorithm: "SHA-256"
hashStoreDirWidth: 2
hashStoreDirDepth: 3

## @section RabbitMQ Bitnami Sub-Chart Configuration
##
rabbitmq:
Expand All @@ -200,7 +222,7 @@ rabbitmq:
## @param rabbitmq.existingPasswordSecret the k8s secret holding the rabbitmq password
## (must be associated with key: 'rabbitmq-password')
##
existingPasswordSecret: ""
existingPasswordSecret: "ssss"


## @section Solr Bitnami Sub-Chart Configuration
Expand Down
85 changes: 67 additions & 18 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.dataone</groupId>
<artifactId>dataone-index-worker</artifactId>
<version>3.0.2</version>
<version>3.1.0</version>
<packaging>jar</packaging>
<name>dataone-index-worker</name>
<url>http://maven.apache.org</url>
Expand All @@ -12,7 +12,7 @@
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<spring.version>5.3.33</spring.version>
<spring.version>5.3.39</spring.version>
<d1_libclient_java.version>2.3.1</d1_libclient_java.version>
<solr.version>8.11.2</solr.version>
<solr.test.home>solr8home</solr.test.home>
Expand Down Expand Up @@ -67,7 +67,7 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<version>4.13.2</version>
<scope>test</scope>
</dependency>
<dependency>
Expand All @@ -79,6 +79,18 @@
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
</exclusion>
</exclusions>
<scope>test</scope>
</dependency>
Expand All @@ -91,18 +103,30 @@
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
</exclusion>
</exclusions>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-beanutils</groupId>
<artifactId>commons-beanutils</artifactId>
<version>1.8.3</version>
<version>1.9.4</version>
</dependency>
<dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
<version>1.3.163</version>
<version>2.3.232</version>
<scope>test</scope>
</dependency>
<dependency>
Expand All @@ -128,12 +152,12 @@
<dependency>
<groupId>org.springframework.data</groupId>
<artifactId>spring-data-jpa</artifactId>
<version>1.4.5.RELEASE</version>
<version>3.3.4</version>
</dependency>
<dependency>
<groupId>org.springframework.data</groupId>
<artifactId>spring-data-commons</artifactId>
<version>1.6.5.RELEASE</version>
<version>3.3.4</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
Expand Down Expand Up @@ -188,12 +212,12 @@
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.4</version>
<version>3.17.0</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.36</version>
<version>2.0.16</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
Expand All @@ -209,7 +233,7 @@
<dependency>
<groupId>net.minidev</groupId>
<artifactId>json-smart</artifactId>
<version>1.0.9</version>
<version>2.5.1</version>
</dependency>
<dependency>
<groupId>org.apache.jena</groupId>
Expand All @@ -229,7 +253,7 @@
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<version>3.2.1</version>
<version>3.2.2</version>
</dependency>
<dependency>
<groupId>ch.hsr</groupId>
Expand All @@ -254,17 +278,17 @@
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
<version>2.17.1</version>
<version>2.24.0</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.17.1</version>
<version>2.24.0</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-jcl</artifactId>
<version>2.17.1</version>
<version>2.24.0</version>
</dependency>
<dependency>
<groupId>org.apache.solr</groupId>
Expand All @@ -274,7 +298,7 @@
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<version>2.13.3</version>
<version>2.17.2</version>
</dependency>
<!-- API, java.xml.bind module -->
<dependency>
Expand All @@ -288,12 +312,17 @@
<artifactId>jaxb-runtime</artifactId>
<version>2.3.2</version>
</dependency>
<dependency>
<groupId>org.dataone</groupId>
<artifactId>hashstore</artifactId>
<version>1.1.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.2.0</version>
<version>3.4.0</version>
<configuration>
<filesets>
<fileset>
Expand All @@ -318,7 +347,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.1</version>
<version>3.13.0</version>
</plugin>
<plugin>
<groupId>com.mycila.maven-license-plugin</groupId>
Expand Down Expand Up @@ -398,7 +427,7 @@
<plugin>
<groupId>com.coderplus.maven.plugins</groupId>
<artifactId>copy-rename-maven-plugin</artifactId>
<version>1.0</version>
<version>1.0.1</version>
<executions>
<execution>
<id>copy-file</id>
Expand Down Expand Up @@ -430,6 +459,15 @@
</includes>
</resource>
</resources>

<extensions>
<!-- Enabling the use of SSH; see `<distributionManagement>` section -->
<extension>
<groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-ssh-external</artifactId>
<version>3.5.3</version>
</extension>
</extensions>
</build>
<scm>
<url>https://repository.dataone.org/software/cicore</url>
Expand All @@ -443,4 +481,15 @@
<url>LICENSE.txt</url>
</license>
</licenses>
<!-- Config for mvn deploy to upload to repo. Also see `<build><extensions>` section -->
<!-- Also requires authentication info in ~/.m2/settings.xml; -->
<!-- see: https://maven.apache.org/guides/mini/guide-deployment-security-settings.html -->
<!-- ...but note pvt key location should be an absolute path (/home/myname/...), NOT ~/... -->
<distributionManagement>
<repository>
<id>dataone.org</id>
<name>DataONE Repository</name>
<url>scpexe://maven.dataone.org/var/www/maven</url>
</repository>
</distributionManagement>
</project>
7 changes: 2 additions & 5 deletions src/main/java/org/dataone/cn/indexer/IndexWorker.java
Original file line number Diff line number Diff line change
Expand Up @@ -427,23 +427,21 @@ private void indexObject(IndexQueueMessageParser parser, boolean multipleThread)
Identifier pid = parser.getIdentifier();
String indexType = parser.getIndexType();
int priority = parser.getPriority();
String finalFilePath = parser.getObjectPath();
try {
long threadId = Thread.currentThread().getId();
logger.info("IndexWorker.consumer.indexObject by multiple thread? " + multipleThread
+ ", with the thread id " + threadId
+ " - Received the index task from the index queue with the identifier: "
+ pid.getValue() + " , the index type: " + indexType
+ ", the file path (null means not to have): " + finalFilePath
+ ", the priority: " + priority);
switch (indexType) {
case CREATE_INDEXT_TYPE -> {
boolean sysmetaOnly = false;
solrIndex.update(pid, finalFilePath, sysmetaOnly);
solrIndex.update(pid, sysmetaOnly);
}
case SYSMETA_CHANGE_TYPE -> {
boolean sysmetaOnly = true;
solrIndex.update(pid, finalFilePath, sysmetaOnly);
solrIndex.update(pid, sysmetaOnly);
}
case DELETE_INDEX_TYPE -> solrIndex.remove(pid);
default -> throw new InvalidRequest(
Expand All @@ -455,7 +453,6 @@ private void indexObject(IndexQueueMessageParser parser, boolean multipleThread)
logger.info("IndexWorker.indexOjbect with the thread id " + threadId
+ " - Completed the index task from the index queue with the identifier: "
+ pid.getValue() + " , the index type: " + indexType
+ ", the file path (null means not to have): " + finalFilePath
+ ", the priority: " + priority + " and the time taking is "
+ (end - start) + " milliseconds");

Expand Down
Loading

0 comments on commit f44bf49

Please sign in to comment.