diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index d6fdf6e4..ea17fce0 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -1,5 +1,27 @@
# dataone-indexer Release Notes
+## dataone-indexer version 3.1.0 & helm chart version 1.1.0
+
+* Release date: 2024-11-21
+* dataone-indexer version 3.1.0
+ * Integrate with the new Dataone hash-based storage library
+ [`hashstore-java`](https://github.com/DataONEorg/hashstore-java).
+ * Indexer no longer needs access to an aut token in order to index private datasets.
+ * Update Docker base image to eclipse-temurin:17.0.12_7-jre-jammy
+ * Upgrade log4j-core to 2.24.0 to fix "method can't be found" issue
+ * Bump dependencies:
+ * org.apache.commons:commons-lang3 from 3.4 to 3.17.0
+ * org.slf4j:slf4j-api from 1.7.36 to 2.0.16
+ * org.springframework.data:spring-data-commons from 1.6.5.RELEASE to 3.3.4
+ * org.apache.maven.plugins:maven-compiler-plugin from 2.0.1 to 3.13.0
+ * com.coderplus.maven.plugins:copy-rename-maven-plugin from 1.0 to 1.0.1
+ * org.apache.logging.log4j:log4j-jcl from 2.17.1 to 2.24.0
+ * org.apache.maven.plugins:maven-clean-plugin from 3.2.0 to 3.4.0
+ * com.fasterxml.jackson.core:jackson-annotations from 2.13.3 to 2.18.0
+* helm chart version 1.0.2
+ * Bump Application version to 3.1.0
+ * Add `storage` to values.yaml for new hashstore integration
+
## dataone-indexer version 3.0.2 & helm chart version 1.0.2
* Release date: 2024-07-29
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 354a7937..3e012d47 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -2,7 +2,7 @@
# `nerdctl build -t dataone-index-worker:2.4.0 -f docker/Dockerfile --build-arg TAG=2.4.0 .`
# Use an OpenJDK runtime as a parent image
# Note: the prior alpine-based openjdk image had network DNS issues, so replacing with Eclipse Temurin
-FROM eclipse-temurin:17.0.8.1_1-jre-jammy
+FROM eclipse-temurin:17.0.12_7-jre-jammy
ARG TAG=3.0.0-SNAPSHOT
ENV TAG=${TAG}
@@ -23,7 +23,7 @@ RUN groupadd -g 1000 d1indexer && useradd -u 1000 -g 1000 d1indexer \
# The most recently built jar file is copied from the maven build directory to this dir by maven, so that
# it can be copied to the image.
-COPY ../target/dataone-index-worker-${TAG}-shaded.jar .
+COPY ./target/dataone-index-worker-${TAG}-shaded.jar .
COPY ./docker/entrypoint.sh .
# Change the ownership of the jar and sh files
diff --git a/helm/Chart.yaml b/helm/Chart.yaml
index 2f92eb41..44f72768 100644
--- a/helm/Chart.yaml
+++ b/helm/Chart.yaml
@@ -21,13 +21,13 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 1.0.2
+version: "1.1.0"
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
-appVersion: "3.0.2"
+appVersion: "3.1.0"
# Chart dependencies
dependencies:
diff --git a/helm/config/dataone-indexer.properties b/helm/config/dataone-indexer.properties
index c51ce0aa..5c72e7c3 100644
--- a/helm/config/dataone-indexer.properties
+++ b/helm/config/dataone-indexer.properties
@@ -40,3 +40,12 @@ index.resourcemap.waitingComponent.time={{ default 800 .Values.idxworker.resourc
index.resourcemap.waitingComponent.max.attempts={{ default 25 .Values.idxworker.resourcemapMaxTries }}
index.solr.versionConflict.waiting.time={{ default 1000 .Values.idxworker.solrVerConflictWaitMs }}
index.solr.versionConflict.max.attempts={{ default 50 .Values.idxworker.solrVerConflictMaxTries }}
+
+# Storage properties
+storage.className={{ default "org.dataone.hashstore.filehashstore.FileHashStore" .Values.idxworker.storage.hashStoreClassName }}
+storage.hashstore.rootDirectory={{ default "/var/metacat/hashstore" .Values.idxworker.storage.hashStoreRootDir }}
+storage.hashstore.defaultNamespace={{ default "https://ns.dataone.org/service/types/v2.0#SystemMetadata" .Values.idxworker.storage.hashStoreDefaultNamespace }}
+# The following three properties must NOT be modified after the hash store is initialized
+storage.hashstore.fileNameAlgorithm={{ default "SHA-256" .Values.idxworker.storage.hashStoreAlgorithm }}
+storage.hashstore.directory.width={{ default 2 .Values.idxworker.storage.hashStoreDirWidth }}
+storage.hashstore.directory.depth={{ default 3 .Values.idxworker.storage.hashStoreDirDepth }}
diff --git a/helm/values.yaml b/helm/values.yaml
index fe5f099b..983dff71 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -32,10 +32,23 @@ global:
## @section Dataone-Indexer Application-Specific Properties
image:
+ ## @param image.repository repository that the image will be pulled from
+ ##
repository: ghcr.io/dataoneorg/dataone-index-worker
+
+ ## @param image.pullPolicy image pull policy - Always, Never, or IfNotPresent
+ ##
pullPolicy: IfNotPresent
- # Overrides the image tag whose default is the chart appVersion.
- #tag: ""
+
+ ## @param image.tag Overrides the image tag. Will default to the chart appVersion if set to ""
+ ##
+ tag: ""
+
+ ## @param image.debug Specify if container debugging should be enabled (sets log level to "DEBUG")
+ ## Set to true if you would like to see extra information in metacat/tomcat logs.
+ ## * * WARNING - FOR TESTING ONLY! * * May result in secrets being printed to logs in plain text.
+ ##
+ debug: false
imagePullSecrets: []
@@ -181,6 +194,15 @@ idxworker:
##
tripleDbDirectory: /etc/dataone/tdb-cache
+ storage:
+ hashStoreClassName: "org.dataone.hashstore.filehashstore.FileHashStore"
+ hashStoreRootDir: "/var/metacat/hashstore"
+ hashStoreDefaultNamespace: "https://ns.dataone.org/service/types/v2.0#SystemMetadata"
+ # The following three properties must NOT be modified after the hashstore is initialized
+ hashStoreAlgorithm: "SHA-256"
+ hashStoreDirWidth: 2
+ hashStoreDirDepth: 3
+
## @section RabbitMQ Bitnami Sub-Chart Configuration
##
rabbitmq:
@@ -200,7 +222,7 @@ rabbitmq:
## @param rabbitmq.existingPasswordSecret the k8s secret holding the rabbitmq password
## (must be associated with key: 'rabbitmq-password')
##
- existingPasswordSecret: ""
+ existingPasswordSecret: "ssss"
## @section Solr Bitnami Sub-Chart Configuration
diff --git a/pom.xml b/pom.xml
index b78f79c4..e635f78f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
4.0.0
org.dataone
dataone-index-worker
- 3.0.2
+ 3.1.0
jar
dataone-index-worker
http://maven.apache.org
@@ -12,7 +12,7 @@
17
17
UTF-8
- 5.3.33
+ 5.3.39
2.3.1
8.11.2
solr8home
@@ -67,7 +67,7 @@
junit
junit
- 4.12
+ 4.13.2
test
@@ -79,6 +79,18 @@
log4j
log4j
+
+ org.apache.logging.log4j
+ log4j-core
+
+
+ org.apache.logging.log4j
+ log4j-api
+
+
+ org.apache.logging.log4j
+ log4j-1.2-api
+
test
@@ -91,18 +103,30 @@
log4j
log4j
+
+ org.apache.logging.log4j
+ log4j-core
+
+
+ org.apache.logging.log4j
+ log4j-api
+
+
+ org.apache.logging.log4j
+ log4j-1.2-api
+
test
commons-beanutils
commons-beanutils
- 1.8.3
+ 1.9.4
com.h2database
h2
- 1.3.163
+ 2.3.232
test
@@ -128,12 +152,12 @@
org.springframework.data
spring-data-jpa
- 1.4.5.RELEASE
+ 3.3.4
org.springframework.data
spring-data-commons
- 1.6.5.RELEASE
+ 3.3.4
org.springframework
@@ -188,12 +212,12 @@
org.apache.commons
commons-lang3
- 3.4
+ 3.17.0
org.slf4j
slf4j-api
- 1.7.36
+ 2.0.16
org.slf4j
@@ -209,7 +233,7 @@
net.minidev
json-smart
- 1.0.9
+ 2.5.1
org.apache.jena
@@ -229,7 +253,7 @@
commons-collections
commons-collections
- 3.2.1
+ 3.2.2
ch.hsr
@@ -254,17 +278,17 @@
org.apache.logging.log4j
log4j-1.2-api
- 2.17.1
+ 2.24.0
org.apache.logging.log4j
log4j-core
- 2.17.1
+ 2.24.0
org.apache.logging.log4j
log4j-jcl
- 2.17.1
+ 2.24.0
org.apache.solr
@@ -274,7 +298,7 @@
com.fasterxml.jackson.core
jackson-annotations
- 2.13.3
+ 2.17.2
@@ -288,12 +312,17 @@
jaxb-runtime
2.3.2
+
+ org.dataone
+ hashstore
+ 1.1.0
+
maven-clean-plugin
- 3.2.0
+ 3.4.0
@@ -318,7 +347,7 @@
org.apache.maven.plugins
maven-compiler-plugin
- 2.0.1
+ 3.13.0
com.mycila.maven-license-plugin
@@ -398,7 +427,7 @@
com.coderplus.maven.plugins
copy-rename-maven-plugin
- 1.0
+ 1.0.1
copy-file
@@ -430,6 +459,15 @@
+
+
+
+
+ org.apache.maven.wagon
+ wagon-ssh-external
+ 3.5.3
+
+
https://repository.dataone.org/software/cicore
@@ -443,4 +481,15 @@
LICENSE.txt
+
+
+
+
+
+
+ dataone.org
+ DataONE Repository
+ scpexe://maven.dataone.org/var/www/maven
+
+
diff --git a/src/main/java/org/dataone/cn/indexer/IndexWorker.java b/src/main/java/org/dataone/cn/indexer/IndexWorker.java
index 754040b6..3f5c6094 100644
--- a/src/main/java/org/dataone/cn/indexer/IndexWorker.java
+++ b/src/main/java/org/dataone/cn/indexer/IndexWorker.java
@@ -427,23 +427,21 @@ private void indexObject(IndexQueueMessageParser parser, boolean multipleThread)
Identifier pid = parser.getIdentifier();
String indexType = parser.getIndexType();
int priority = parser.getPriority();
- String finalFilePath = parser.getObjectPath();
try {
long threadId = Thread.currentThread().getId();
logger.info("IndexWorker.consumer.indexObject by multiple thread? " + multipleThread
+ ", with the thread id " + threadId
+ " - Received the index task from the index queue with the identifier: "
+ pid.getValue() + " , the index type: " + indexType
- + ", the file path (null means not to have): " + finalFilePath
+ ", the priority: " + priority);
switch (indexType) {
case CREATE_INDEXT_TYPE -> {
boolean sysmetaOnly = false;
- solrIndex.update(pid, finalFilePath, sysmetaOnly);
+ solrIndex.update(pid, sysmetaOnly);
}
case SYSMETA_CHANGE_TYPE -> {
boolean sysmetaOnly = true;
- solrIndex.update(pid, finalFilePath, sysmetaOnly);
+ solrIndex.update(pid, sysmetaOnly);
}
case DELETE_INDEX_TYPE -> solrIndex.remove(pid);
default -> throw new InvalidRequest(
@@ -455,7 +453,6 @@ private void indexObject(IndexQueueMessageParser parser, boolean multipleThread)
logger.info("IndexWorker.indexOjbect with the thread id " + threadId
+ " - Completed the index task from the index queue with the identifier: "
+ pid.getValue() + " , the index type: " + indexType
- + ", the file path (null means not to have): " + finalFilePath
+ ", the priority: " + priority + " and the time taking is "
+ (end - start) + " milliseconds");
diff --git a/src/main/java/org/dataone/cn/indexer/SolrIndex.java b/src/main/java/org/dataone/cn/indexer/SolrIndex.java
index 104a158a..b07211e0 100644
--- a/src/main/java/org/dataone/cn/indexer/SolrIndex.java
+++ b/src/main/java/org/dataone/cn/indexer/SolrIndex.java
@@ -24,6 +24,7 @@
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
+import java.io.InputStream;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Collection;
@@ -109,8 +110,10 @@ public class SolrIndex {
* @throws SAXException
* @throws IOException
*/
- public SolrIndex(XMLNamespaceConfig xmlNamespaceConfig, BaseXPathDocumentSubprocessor systemMetadataProcessor, HTTPService httpService)
- throws XPathExpressionException, ParserConfigurationException, IOException, SAXException {
+ public SolrIndex(XMLNamespaceConfig xmlNamespaceConfig,
+ BaseXPathDocumentSubprocessor systemMetadataProcessor, HTTPService httpService)
+ throws XPathExpressionException, ParserConfigurationException,
+ IOException, SAXException {
this.xmlNamespaceConfig = xmlNamespaceConfig;
this.systemMetadataProcessor = systemMetadataProcessor;
this.httpService = httpService;
@@ -121,7 +124,8 @@ private void init() throws ParserConfigurationException, XPathExpressionExceptio
sysmetaSolrFields = systemMetadataProcessor.getFieldList();
copyFields = httpService.getSolrCopyFields();
if (copyFields != null) {
- log.info("SolrIndex.init - the size of the copy fields from the solr schema is : " + copyFields.size());
+ log.info("SolrIndex.init - the size of the copy fields from the solr schema is : "
+ + copyFields.size());
for(String copyField : copyFields) {
log.debug("SolrIndex.init - the copy field from the solr schema: " + copyField);
}
@@ -166,10 +170,9 @@ public void setDeleteSubprocessors(
/**
* Generate the index for the given information
- * @param id
- * @param systemMetadata
- * @param dataStream
- * @return
+ * @param id the id which will be indexed
+ * @param isSystemetaChange if this is a change on the system metadata only
+ * @return a map of solr doc with ids
* @throws IOException
* @throws SAXException
* @throws ParserConfigurationException
@@ -181,24 +184,21 @@ public void setDeleteSubprocessors(
* @throws NotFound
* @throws NotImplemented
*/
- private Map process(String id, SystemMetadata systemMetadata, String objectPath, boolean isSysmetaChangeOnly)
- throws IOException, SAXException, ParserConfigurationException,
- XPathExpressionException, MarshallingException, EncoderException, SolrServerException, NotImplemented, NotFound, UnsupportedType{
+ private Map process(String id, boolean isSysmetaChangeOnly)
+ throws IOException, SAXException, ParserConfigurationException,
+ XPathExpressionException, MarshallingException, EncoderException,
+ SolrServerException, NotImplemented, NotFound, UnsupportedType{
log.debug("SolrIndex.process - trying to generate the solr doc object for the pid "+id);
long start = System.currentTimeMillis();
Map docs = new HashMap();
// Load the System Metadata document
- ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream();
- TypeMarshaller.marshalTypeToOutputStream(systemMetadata, systemMetadataOutputStream);
- ByteArrayInputStream systemMetadataStream = new ByteArrayInputStream(systemMetadataOutputStream.toByteArray());
- try {
+ try (InputStream systemMetadataStream = ObjectManager.getInstance().getSystemMetadataStream(id)){
docs = systemMetadataProcessor.processDocument(id, docs, systemMetadataStream);
} catch (Exception e) {
log.error(e.getMessage(), e);
throw new SolrServerException(e.getMessage());
}
long end = System.currentTimeMillis();
- //log.info("SolrIndex.process - the time for processing the system metadata for the pid " + id + " is " + (end-start) + "milliseconds.");
// get the format id for this object
String formatId = docs.get(id).getFirstFieldValue(SolrElementField.FIELD_OBJECTFORMAT);
boolean skipOtherProcessor = false;
@@ -207,19 +207,19 @@ private Map process(String id, SystemMetadata systemMetadata, S
//we need to make the solr doc exists (means the resource map was processed
SolrDoc existingResourceMapSolrDoc = httpService.getSolrDocumentById(solrQueryUri, id);
if (existingResourceMapSolrDoc != null ) {
- log.info("SolrIndex.process - This is a systemmetadata-change-only event for the resource map " + id +
- ". So we only use the system metadata subprocessor");
+ log.info("SolrIndex.process - This is a systemmetadata-change-only event for the "
+ + "resource map " + id + ". So we only use the system metadata subprocessor");
skipOtherProcessor = true;
} else {
- log.info("SolrIndex.process - There is no solr doc for the resource map " + id +
- ". Even though this is a systemmetadata-change-only event, we can NOT just reindex the systemmeta only.");
+ log.info("SolrIndex.process - There is no solr doc for the resource map " + id
+ + ". Even though this is a systemmetadata-change-only event, we can NOT "
+ + "just reindex the systemmeta only.");
}
-
}
- log.debug("SolrIndex.process - the value of skipOtherProcessors is " + skipOtherProcessor +
- " and the object path is " + objectPath + " for the id " + id);
+ log.debug("SolrIndex.process - the value of skipOtherProcessors is " + skipOtherProcessor
+ + " for the id " + id);
//if the objectPath is null, we should skip the other processes
- if (!skipOtherProcessor && objectPath != null) {
+ if (!skipOtherProcessor) {
log.debug("SolrIndex.process - Start to use subprocessor list to process " + id);
// Determine if subprocessors are available for this ID
if (subprocessors != null) {
@@ -229,28 +229,21 @@ private Map process(String id, SystemMetadata systemMetadata, S
if (subprocessor.canProcess(formatId)) {
// if so, then extract the additional information from the
// document.
- try {
+ try (InputStream dataStream = ObjectManager.getInstance().getObject(id)) {
// docObject = the resource map document or science
// metadata document.
// note that resource map processing touches all objects
// referenced by the resource map.
- //start = System.currentTimeMillis();
- FileInputStream dataStream = new FileInputStream(objectPath);
- //end = System.currentTimeMillis();
- //log.info("SolrIndex.process - the time for reading the file input stream " + " for the pid " + id + " is " + (end-start) + "milliseconds.");
- if (!dataStream.getFD().valid()) {
- log.error("SolrIndex.process - subprocessor "+ subprocessor.getClass().getName() +" couldn't process since it could not load OBJECT file for ID,Path=" + id + ", "
- + objectPath);
- //throw new Exception("Could not load OBJECT for ID " + id );
- } else {
- start = System.currentTimeMillis();
- docs = subprocessor.processDocument(id, docs, dataStream);
- end = System.currentTimeMillis();
- log.info("SolrIndex.process - the time for calling processDocument for the subprocessor " + subprocessor.getClass().getName() +" for the pid " + id + " is " + (end-start) + "milliseconds.");
- log.debug("SolrIndex.process - subprocessor "+ subprocessor.getClass().getName() +" generated solr doc for id "+id);
- }
+ start = System.currentTimeMillis();
+ docs = subprocessor.processDocument(id, docs, dataStream);
+ end = System.currentTimeMillis();
+ log.info("SolrIndex.process - the time for calling processDocument "
+ + "for the subprocessor " + subprocessor.getClass().getName()
+ +" for the pid " + id + " is " + (end-start) + "milliseconds.");
+ log.debug("SolrIndex.process - subprocessor "
+ + subprocessor.getClass().getName()
+ +" generated solr doc for id "+id);
} catch (Exception e) {
- e.printStackTrace();
log.error(e.getMessage(), e);
throw new SolrServerException(e.getMessage());
}
@@ -258,14 +251,6 @@ private Map process(String id, SystemMetadata systemMetadata, S
}
}
}
-
- /*if(docs != null) {
- SolrDoc solrDoc = docs.get(id);
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- solrDoc.serialize(baos, "UTF-8");
- log.warn("after process the science metadata, the solr doc is \n"+baos.toString());
- }*/
-
// TODO: in the XPathDocumentParser class in d1_cn_index_process module,
// merge is only for resource map. We need more work here.
for (SolrDoc mergeDoc : docs.values()) {
@@ -273,15 +258,6 @@ private Map process(String id, SystemMetadata systemMetadata, S
mergeWithIndexedDocument(mergeDoc);
}
}
-
- /*if(docs != null) {
- SolrDoc solrDoc = docs.get(id);
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- solrDoc.serialize(baos, "UTF-8");
- log.warn("after merge, the solr doc is \n"+baos.toString());
- }*/
- //SolrElementAdd addCommand = getAddCommand(new ArrayList(docs.values()));
-
return docs;
}
@@ -310,16 +286,11 @@ private Map process(String id, SystemMetadata systemMetadata, S
// TODO:combine merge function with resourcemap merge function
private SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument) throws IOException,
- EncoderException, XPathExpressionException, SolrServerException, ParserConfigurationException, SAXException, NotImplemented, NotFound, UnsupportedType {
- //Retrieve the existing solr document from the solr server for the id. If it doesn't exist, null or empty solr doc will be returned.
+ EncoderException, XPathExpressionException, SolrServerException,
+ ParserConfigurationException, SAXException, NotImplemented, NotFound, UnsupportedType {
+ //Retrieve the existing solr document from the solr server for the id. If it doesn't exist,
+ //null or empty solr doc will be returned.
SolrDoc indexedDocument = httpService.getSolrDocumentById(solrQueryUri, indexDocument.getIdentifier());
- /*int wait = new Double(Math.random() * 10000).intValue();
- System.out.println("++++++++++++++++++++++++++++ the wait time is " + wait);
- try {
- Thread.sleep(wait);
- } catch (Exception e) {
-
- }*/
if (indexedDocument == null || indexedDocument.getFieldList().size() <= 0) {
return indexDocument;
} else {
@@ -330,17 +301,25 @@ private SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument) throws IOExcepti
.getName().equals(SolrElementField.FIELD_RESOURCEMAP))
&& !indexDocument.hasFieldWithValue(field.getName(), field.getValue())) {
indexDocument.addField(field);
- } else if (!copyFields.contains(field.getName()) && !indexDocument.hasField(field.getName()) && !isSystemMetadataField(field.getName())) {
+ } else if (!copyFields.contains(field.getName())
+ && !indexDocument.hasField(field.getName())
+ && !isSystemMetadataField(field.getName())) {
// we don't merge the system metadata field since they can be removed.
// we don't merge the copyFields as well
- log.debug("SolrIndex.mergeWithIndexedDocument - put the merge-needed existing solr field "+field.getName()+" with value "+field.getValue()+" from the solr server to a vector. We will merge it later.");
- //indexDocument.addField(field);
- mergeNeededFields.add(field);//record this name since we can have mutiple name/value for the same name. See https://projects.ecoinformatics.org/ecoinfo/issues/7168
+ log.debug("SolrIndex.mergeWithIndexedDocument - put the merge-needed existing solr field "
+ + field.getName() + " with value " + field.getValue()
+ + " from the solr server to a vector. We will merge it later.");
+ //record this name since we can have mutiple name/value for the same name.
+ //See https://projects.ecoinformatics.org/ecoinfo/issues/7168
+ mergeNeededFields.add(field);
}
}
if(mergeNeededFields != null) {
for(SolrElementField field: mergeNeededFields) {
- log.debug("SolrIndex.mergeWithIndexedDocument - merge the existing solr field "+field.getName()+" with value "+field.getValue()+" from the solr server to the currently processing document of "+indexDocument.getIdentifier());
+ log.debug("SolrIndex.mergeWithIndexedDocument - merge the existing solr field "
+ + field.getName() + " with value " + field.getValue()
+ +" from the solr server to the currently processing document of "
+ + indexDocument.getIdentifier());
indexDocument.addField(field);
}
}
@@ -348,7 +327,7 @@ private SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument) throws IOExcepti
return indexDocument;
}
}
-
+
/*
* If the given field name is a system metadata field.
*/
@@ -357,7 +336,9 @@ private boolean isSystemMetadataField(String fieldName) {
if (fieldName != null && !fieldName.trim().equals("") && sysmetaSolrFields != null) {
for(ISolrField field : sysmetaSolrFields) {
if(field != null && field.getName() != null && field.getName().equals(fieldName)) {
- log.debug("SolrIndex.isSystemMetadataField - the field name "+fieldName+" matches one record of system metadata field list. It is a system metadata field.");
+ log.debug("SolrIndex.isSystemMetadataField - the field name " + fieldName
+ + " matches one record of system metadata field list. It is a "
+ + "system metadata field.");
is = true;
break;
}
@@ -369,21 +350,13 @@ private boolean isSystemMetadataField(String fieldName) {
/**
* Check the parameters of the insert or update methods.
- * @param pid
- * @param systemMetadata
- * @param data
+ * @param pid the pid which will be indexed
* @throws SolrServerException
*/
- private void checkParams(Identifier pid, SystemMetadata systemMetadata, String objectPath) throws InvalidRequest {
+ private void checkParams(Identifier pid) throws InvalidRequest {
if(pid == null || pid.getValue() == null || pid.getValue().trim().equals("")) {
throw new InvalidRequest("0000", "The identifier of the indexed document should not be null or blank.");
}
- if(systemMetadata == null) {
- throw new InvalidRequest("0000", "The system metadata of the indexed document "+pid.getValue()+ " should not be null.");
- }
- /*if(objectPath == null) {
- throw new SolrServerException("The indexed document itself for pid "+pid.getValue()+" should not be null.");
- }*/
}
/**
@@ -391,23 +364,25 @@ private void checkParams(Identifier pid, SystemMetadata systemMetadata, String o
* @param pid the id of this document
* @param systemMetadata the system metadata associated with the data object
* @param data the path to the object file itself
- * @throws SolrServerException
- * @throws MarshallingException
- * @throws EncoderException
- * @throws UnsupportedType
- * @throws NotFound
- * @throws NotImplemented
+ * @throws SolrServerException
+ * @throws MarshallingException
+ * @throws EncoderException
+ * @throws UnsupportedType
+ * @throws NotFound
+ * @throws NotImplemented
* @throws InvalidRequest
*/
- private void insert(Identifier pid, SystemMetadata systemMetadata, String objectPath, boolean isSysmetaChangeOnly)
+ private void insert(Identifier pid, boolean isSysmetaChangeOnly)
throws IOException, SAXException, ParserConfigurationException, InvalidRequest,
- XPathExpressionException, SolrServerException, MarshallingException, EncoderException, NotImplemented, NotFound, UnsupportedType {
- checkParams(pid, systemMetadata, objectPath);
+ XPathExpressionException, SolrServerException, MarshallingException,
+ EncoderException, NotImplemented, NotFound, UnsupportedType {
+ checkParams(pid);
log.debug("SolrIndex.insert - trying to insert the solrDoc for object "+pid.getValue());
long start = System.currentTimeMillis();
- Map docs = process(pid.getValue(), systemMetadata, objectPath, isSysmetaChangeOnly);
+ Map docs = process(pid.getValue(), isSysmetaChangeOnly);
long end = System.currentTimeMillis();
- log.info("SolrIndex.insert - the subprocessor processing time of " + pid.getValue() + " is " + (end-start) + " milliseconds.");
+ log.info("SolrIndex.insert - the subprocessor processing time of " + pid.getValue() + " is "
+ + (end-start) + " milliseconds.");
//transform the Map to the SolrInputDocument which can be used by the solr server
if(docs != null) {
start = System.currentTimeMillis();
@@ -416,18 +391,22 @@ private void insert(Identifier pid, SystemMetadata systemMetadata, String object
if(id != null) {
SolrDoc doc = docs.get(id);
insertToIndex(doc);
- log.debug("SolrIndex.insert - inserted the solr-doc object of pid "+id+", which relates to object "+pid.getValue()+", into the solr server.");
+ log.debug("SolrIndex.insert - inserted the solr-doc object of pid " + id
+ + ", which relates to object " + pid.getValue()
+ + ", into the solr server.");
}
}
end = System.currentTimeMillis();
- log.info("SolrIndex.insert - finished to insert the solrDoc to the solr server for object " + pid.getValue() +
- " and it took " + (end-start) + " milliseconds.");
+ log.info("SolrIndex.insert - finished to insert the solrDoc to the solr server for "
+ + " object " + pid.getValue() + " and it took " + (end-start)
+ + " milliseconds.");
} else {
- log.debug("SolrIndex.insert - the genered solrDoc is null. So we will not index the object "+pid.getValue());
+ log.debug("SolrIndex.insert - the genered solrDoc is null. So we will not index the "
+ + "object "+pid.getValue());
}
}
-
+
/*
* Insert a SolrDoc to the solr server.
*/
@@ -438,44 +417,14 @@ private void insertToIndex(SolrDoc doc) throws SolrServerException, IOException
httpService.sendUpdate(solrIndexUri, addCommand, "UTF-8");
}
-
- /*private void insertToIndex(SolrDoc doc) throws SolrServerException, IOException {
- if(doc != null ) {
- SolrInputDocument solrDoc = new SolrInputDocument();
- List list = doc.getFieldList();
- if(list != null) {
- //solrDoc.addField(METACATPIDFIELD, pid);
- Iterator iterator = list.iterator();
- while (iterator.hasNext()) {
- SolrElementField field = iterator.next();
- if(field != null) {
- String value = field.getValue();
- String name = field.getName();
- log.trace("SolrIndex.insertToIndex - add name/value pair - "+name+"/"+value);
- solrDoc.addField(name, value);
- }
- }
- }
- if(!solrDoc.isEmpty()) {
- try {
- UpdateResponse response = solrServer.add(solrDoc);
- solrServer.commit();
- } catch (SolrServerException e) {
- throw e;
- } catch (IOException e) {
- throw e;
- }
- //System.out.println("=================the response is:\n"+response.toString());
- }
- }
- }*/
-
/**
* Update the solr index. This method handles the three scenarios:
* 1. Remove an existing doc - if the the system metadata shows the value of the archive is true,
* remove the index for the previous version(s) and generate new index for the doc.
* 2. Add a new doc - if the system metadata shows the value of the archive is false, generate the
* index for the doc.
+ * @param pid the identifier of object which will be indexed
+ * @param isSysmetaChangeOnly the flag indicating if the change is system metadata only
* @throws NotFound
* @throws ServiceFailure
* @throws NotImplemented
@@ -494,34 +443,38 @@ private void insertToIndex(SolrDoc doc) throws SolrServerException, IOException
* @throws IllegalAccessException
* @throws InstantiationException
*/
- public void update(Identifier pid, String relativePath, boolean isSysmetaChangeOnly) throws InvalidToken, NotAuthorized,
- NotImplemented, ServiceFailure, NotFound, XPathExpressionException, UnsupportedType,
- SAXException, ParserConfigurationException, SolrServerException, MarshallingException,
- EncoderException, InterruptedException, IOException, InvalidRequest, InstantiationException, IllegalAccessException {
- log.debug("SolrIndex.update - trying to update(insert or remove) solr index of object "+pid.getValue());
- String objectPath = null;
- SystemMetadata systemMetadata = ObjectManager.getInstance().getSystemMetadata(pid.getValue(), relativePath);
- objectPath = ObjectManager.getInstance().getFilePath(relativePath, systemMetadata.getFormatId().getValue());
+ public void update(Identifier pid, boolean isSysmetaChangeOnly)
+ throws InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, NotFound,
+ XPathExpressionException, UnsupportedType, SAXException,
+ ParserConfigurationException, SolrServerException, MarshallingException,
+ EncoderException, InterruptedException, IOException, InvalidRequest,
+ InstantiationException, IllegalAccessException {
+ log.debug("SolrIndex.update - trying to update(insert or remove) solr index of object "
+ + pid.getValue());
try {
- insert(pid, systemMetadata, objectPath, isSysmetaChangeOnly);
+ insert(pid, isSysmetaChangeOnly);
} catch (SolrServerException e) {
if (e.getMessage().contains(VERSION_CONFLICT) && VERSION_CONFLICT_MAX_ATTEMPTS > 0) {
- log.info("SolrIndex.update - Indexer grabbed an older verion (version conflict) of the solr doc for object " +
- pid.getValue() + ". It will try " + VERSION_CONFLICT_MAX_ATTEMPTS + " to fix the issues");
+ log.info("SolrIndex.update - Indexer grabbed an older verion (version conflict) of "
+ + "the solr doc for object " + pid.getValue()
+ + ". It will try " + VERSION_CONFLICT_MAX_ATTEMPTS + " to fix the issues");
for (int i=0; i docsToUpdate = getUpdatedSolrDocsByRemovingResourceMap(pid);
if (docsToUpdate != null && !docsToUpdate.isEmpty()) {
- //SolrElementAdd addCommand = new SolrElementAdd(docsToUpdate);
- //httpService.sendUpdate(solrIndexUri, addCommand);
for(SolrDoc doc : docsToUpdate) {
- //deleteDocFromIndex(doc.getIdentifier());
insertToIndex(doc);
}
}
break;
} catch (SolrServerException e) {
if (e.getMessage().contains(VERSION_CONFLICT) && VERSION_CONFLICT_MAX_ATTEMPTS > 0) {
- log.info("SolrIndex.removeDataPackage - Indexer grabbed an older verion (version conflict) of the solr doc for object" +
- ". It will try " + (VERSION_CONFLICT_MAX_ATTEMPTS - i )+ " to fix the issues");
+ log.info("SolrIndex.removeDataPackage - Indexer grabbed an older verion "
+ + "(version conflict) of the solr doc for object"
+ + ". It will try " + (VERSION_CONFLICT_MAX_ATTEMPTS - i )
+ + " to fix the issues");
} else {
throw e;
}
@@ -650,12 +607,13 @@ private void removeDataPackage(String pid) throws IOException, UnsupportedType,
* Get the list of the solr doc which need to be updated because the removal of the resource map
*/
private List getUpdatedSolrDocsByRemovingResourceMap(String resourceMapId)
- throws UnsupportedType, NotFound, SolrServerException, ParserConfigurationException, SAXException, MalformedURLException, IOException, XPathExpressionException, EncoderException {
+ throws UnsupportedType, NotFound, SolrServerException, ParserConfigurationException,
+ SAXException, MalformedURLException, IOException, XPathExpressionException,
+ EncoderException {
List updatedSolrDocs = null;
if (resourceMapId != null && !resourceMapId.trim().equals("")) {
- /*List docsContainResourceMap = httpService.getDocumentsByResourceMap(
- solrQueryUri, resourceMapId);*/
- List docsContainResourceMap = httpService.getDocumentsByResourceMap(solrQueryUri, resourceMapId);
+ List docsContainResourceMap = httpService
+ .getDocumentsByResourceMap(solrQueryUri, resourceMapId);
updatedSolrDocs = removeResourceMapRelationship(docsContainResourceMap,
resourceMapId);
}
@@ -740,15 +698,6 @@ private List removeAggregatedItems(String targetResourceMapId, SolrDoc
doc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP,
targetResourceMapId);
updatedSolrDocs.add(doc);
- /*if (aggregatedItemsInDoc.size() > 1) {
-
-
- } else {
- //multiple resource map aggregate same metadata and data. Just remove the resource map
- doc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP,
- targetResourceMapId);
- updatedSolrDocs.add(doc);
- }*/
}
}
return updatedSolrDocs;
@@ -835,32 +784,43 @@ private List mergeUpdatedSolrDocs(ListremovedDocumentBy, List<
SolrDoc docInRemovedDocs = removedDocuments.get(j);
if(docInRemovedDocBy.getIdentifier().equals(docInRemovedDocs.getIdentifier())) {
//find the same doc in both list. let's merge them.
- //first get all the documents element from the docWithDocs(it has the correct information about the documents element)
- List idsInDocuments = docInRemovedDocs.getAllFieldValues(SolrElementField.FIELD_DOCUMENTS);
- docInRemovedDocBy.removeAllFields(SolrElementField.FIELD_DOCUMENTS);//clear out any documents element in docInRemovedDocBy
+ //first get all the documents element from the docWithDocs
+ //(it has the correct information about the documents element)
+ List idsInDocuments = docInRemovedDocs
+ .getAllFieldValues(SolrElementField.FIELD_DOCUMENTS);
+ //clear out any documents element in docInRemovedDocBy
+ docInRemovedDocBy.removeAllFields(SolrElementField.FIELD_DOCUMENTS);
//add the Documents element from the docInRemovedDocs if it has any.
- // The docInRemovedDocs has the correct information about the documentBy. Now it copied the correct information of the documents element.
- // So docInRemovedDocs has both correct information about the documentBy and documents elements.
+ // The docInRemovedDocs has the correct information about the documentBy.
+ // Now it copied the correct information of the documents element.
+ // So docInRemovedDocs has both correct information about the documentBy
+ //and documents elements.
if(idsInDocuments != null) {
for(String id : idsInDocuments) {
if(id != null && !id.trim().equals("")) {
- docInRemovedDocBy.addField(new SolrElementField(SolrElementField.FIELD_DOCUMENTS, id));
+ docInRemovedDocBy.addField(
+ new SolrElementField(SolrElementField.FIELD_DOCUMENTS, id));
}
}
}
//intersect the resource map ids.
- List resourceMapIdsInWithDocs = docInRemovedDocs.getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP);
- List resourceMapIdsInWithDocBy = docInRemovedDocBy.getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP);
+ List resourceMapIdsInWithDocs = docInRemovedDocs
+ .getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP);
+ List resourceMapIdsInWithDocBy = docInRemovedDocBy
+ .getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP);
docInRemovedDocBy.removeAllFields(SolrElementField.FIELD_RESOURCEMAP);
- Collection resourceMapIds = CollectionUtils.union(resourceMapIdsInWithDocs, resourceMapIdsInWithDocBy);
+ Collection resourceMapIds = CollectionUtils.union(resourceMapIdsInWithDocs,
+ resourceMapIdsInWithDocBy);
if(resourceMapIds != null) {
for(Object idObj : resourceMapIds) {
String id = (String)idObj;
- docInRemovedDocBy.addField(new SolrElementField(SolrElementField.FIELD_RESOURCEMAP, id));
+ docInRemovedDocBy.addField(new SolrElementField(
+ SolrElementField.FIELD_RESOURCEMAP, id));
}
}
- //we don't need do anything about the documentBy elements since the docInRemovedDocBy has the correct information.
+ //we don't need do anything about the documentBy elements since the
+ //docInRemovedDocBy has the correct information.
mergedDocuments.add(docInRemovedDocBy);
//delete the two documents from the list
removedDocumentBy.remove(i);
@@ -870,8 +830,8 @@ private List mergeUpdatedSolrDocs(ListremovedDocumentBy, List<
}
}
- // when we get there, if the two lists are empty, this will be a perfect merge. However, if something are left. we
- //just put them in.
+ // when we get there, if the two lists are empty, this will be a perfect merge.
+ // However, if something are left. we just put them in.
for(SolrDoc doc: removedDocumentBy) {
mergedDocuments.add(doc);
}
@@ -886,7 +846,8 @@ private List mergeUpdatedSolrDocs(ListremovedDocumentBy, List<
/*
* Remove a pid which is part of resource map.
*/
- private void removeFromDataPackage(String pid) throws XPathExpressionException, IOException, EncoderException, SolrServerException {
+ private void removeFromDataPackage(String pid) throws XPathExpressionException, IOException,
+ EncoderException, SolrServerException {
SolrDoc indexedDoc = httpService.getSolrDocumentById(solrQueryUri, pid);
deleteDocFromIndex(pid);
List documents = indexedDoc.getAllFieldValues(SolrElementField.FIELD_DOCUMENTS);
@@ -903,8 +864,10 @@ private void removeFromDataPackage(String pid) throws XPathExpressionException,
break;
} catch (SolrServerException e) {
if (e.getMessage().contains(VERSION_CONFLICT) && VERSION_CONFLICT_MAX_ATTEMPTS > 0) {
- log.info("SolrIndex.removeFromDataPackage - Indexer grabbed an older verion (version conflict) of the solr doc for object " +
- documentsValue + ". It will try " + (VERSION_CONFLICT_MAX_ATTEMPTS - i )+ " to fix the issues");
+ log.info("SolrIndex.removeFromDataPackage - Indexer grabbed an older "
+ + "verion (version conflict) of the solr doc for object "
+ + documentsValue + ". It will try "
+ + (VERSION_CONFLICT_MAX_ATTEMPTS - i )+ " to fix the issues");
} else {
throw e;
}
@@ -927,8 +890,10 @@ private void removeFromDataPackage(String pid) throws XPathExpressionException,
break;
} catch (SolrServerException e) {
if (e.getMessage().contains(VERSION_CONFLICT) && VERSION_CONFLICT_MAX_ATTEMPTS > 0) {
- log.info("SolrIndex.removeFromDataPackage - Indexer grabbed an older verion (version conflict) of the solr doc for object " +
- documentedByValue + ". It will try " + (VERSION_CONFLICT_MAX_ATTEMPTS - i )+ " to fix the issues");
+ log.info("SolrIndex.removeFromDataPackage - Indexer grabbed an older "
+ + "verion (version conflict) of the solr doc for object "
+ + documentedByValue + ". It will try "
+ + (VERSION_CONFLICT_MAX_ATTEMPTS - i )+ " to fix the issues");
} else {
throw e;
}
@@ -938,68 +903,18 @@ private void removeFromDataPackage(String pid) throws XPathExpressionException,
}
}
- /*
- * Remove a pid from the solr index
- */
- /*private synchronized void removeFromIndex(String identifier) throws Exception {
-
-
- Map docs = new HashMap();
- for (IDocumentDeleteSubprocessor deleteSubprocessor : deleteSubprocessors) {
- docs.putAll(deleteSubprocessor.processDocForDelete(identifier, docs));
- }
- List docsToUpdate = new ArrayList();
- List idsToIndex = new ArrayList();
- for (String idToUpdate : docs.keySet()) {
- if (docs.get(idToUpdate) != null) {
- docsToUpdate.add(docs.get(idToUpdate));
- } else {
- idsToIndex.add(idToUpdate);
- }
- }
-
- // update the docs we have
- for (SolrDoc docToUpdate : docsToUpdate) {
- insertToIndex(docToUpdate);
- }
-
- // delete this one
- deleteDocFromIndex(identifier);
-
- // index the rest
- //TODO: we need to figure out how to get the file path
- for (String idToIndex : idsToIndex) {
- Identifier pid = new Identifier();
- pid.setValue(idToIndex);
- SystemMetadata sysMeta = DistributedMapsFactory.getSystemMetadata(idToIndex);
- if (SolrDoc.visibleInIndex(sysMeta)) {
- String objectPath = DistributedMapsFactory.getObjectPathMap().get(pid);
- boolean isSysmetaChangeOnlyEvent = false;
- insert(pid, sysMeta, objectPath, isSysmetaChangeOnlyEvent);
- }
- }
-
- }*/
-
private void deleteDocFromIndex(String pid) throws IOException {
if (pid != null && !pid.trim().equals("")) {
try {
- //solrServer.deleteById(pid);
- //solrServer.commit();
httpService.sendSolrDelete(pid, solrIndexUri);
- //} catch (SolrServerException e) {
- //throw e;
-
} catch (IOException e) {
throw e;
}
}
-
}
-
/**
* Set the http service
* @param service
@@ -1015,5 +930,5 @@ public void setHttpService(HTTPService service) {
public HTTPService getHttpService() {
return httpService;
}
-
+
}
diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java
index f897903f..df67f0f7 100644
--- a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java
+++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java
@@ -1,42 +1,25 @@
-/**
- * This work was created by participants in the DataONE project, and is
- * jointly copyrighted by participating institutions in DataONE. For
- * more information on DataONE, see our web site at http://dataone.org.
- *
- * Copyright 2022
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- */
package org.dataone.cn.indexer.object;
+import java.io.ByteArrayInputStream;
import java.io.File;
+import java.io.FileNotFoundException;
import java.io.IOException;
-import java.nio.file.FileSystems;
-import java.nio.file.Files;
+import java.io.InputStream;
+import java.security.NoSuchAlgorithmException;
import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.log4j.Logger;
import org.dataone.client.auth.AuthTokenSession;
import org.dataone.client.exception.ClientSideException;
import org.dataone.client.rest.HttpMultipartRestClient;
import org.dataone.client.rest.MultipartRestClient;
-import org.dataone.client.v2.formats.ObjectFormatCache;
import org.dataone.client.v2.impl.MultipartCNode;
import org.dataone.client.v2.impl.MultipartD1Node;
import org.dataone.client.v2.impl.MultipartMNode;
import org.dataone.configuration.Settings;
import org.dataone.exceptions.MarshallingException;
+import org.dataone.indexer.storage.Storage;
import org.dataone.service.exceptions.InvalidToken;
import org.dataone.service.exceptions.NotAuthorized;
import org.dataone.service.exceptions.NotFound;
@@ -44,7 +27,6 @@
import org.dataone.service.exceptions.ServiceFailure;
import org.dataone.service.types.v1.Identifier;
import org.dataone.service.types.v1.Session;
-import org.dataone.service.types.v2.ObjectFormat;
import org.dataone.service.types.v2.SystemMetadata;
import org.dataone.service.util.TypeMarshaller;
@@ -56,164 +38,143 @@
*/
public class ObjectManager {
private static ObjectManager manager = null;
- private static String dataRootDir = Settings.getConfiguration().getString("index.data.root.directory");
- private static String documentRootDir = Settings.getConfiguration().getString("index.document.root.directory");
private static String nodeBaseURL = Settings.getConfiguration().getString("dataone.mn.baseURL");
private static String DataONEauthToken = null;
private static Logger logger = Logger.getLogger(ObjectManager.class);
+ private static Storage storage = null;
private static final String TOKEN_VARIABLE_NAME = "DATAONE_AUTH_TOKEN";
private static final String TOKEN_FILE_PATH_PROP_NAME = "dataone.nodeToken.file";
- private static final String SYSTEMMETA_FILE_NAME = "systemmetadata.xml";
private static MultipartD1Node d1Node = null;
private static Session session = null;
- private static boolean ifDataAndDocRootSame = false;
-
+
+ static {
+ try {
+ refreshD1Node();
+ } catch (ServiceFailure e) {
+ logger.warn("Metacat cannot initialize the d1Node since " + e.getMessage());
+ }
+ storage = Storage.getInstance();
+ manager = new ObjectManager();
+ }
+
+
/**
* Private constructor
- * @throws ServiceFailure
*/
- private ObjectManager() throws ServiceFailure {
- if (dataRootDir == null || dataRootDir.trim().equals("")) {
- throw new ServiceFailure("0000", "The data root directory specified by the property index.data.root.directory is blank in the properties file");
- }
- if (documentRootDir == null || documentRootDir.trim().equals("")) {
- throw new ServiceFailure("0000", "The metadata root directory specified by the property index.document.root.directory is blank in the properties file");
- }
- if (!Files.exists(FileSystems.getDefault().getPath(dataRootDir))) {
- throw new ServiceFailure("0000", "The data root directory " + dataRootDir +
- " specified in the properties file doesn't exist");
- }
- if (!Files.exists(FileSystems.getDefault().getPath(documentRootDir))) {
- throw new ServiceFailure("0000", "The document root directory " + documentRootDir +
- " specified in the properties file doesn't exist");
- }
- if (!dataRootDir.endsWith("/")) {
- dataRootDir = dataRootDir + "/";
- }
- if (!documentRootDir.endsWith("/")) {
- documentRootDir = documentRootDir + "/";
- }
-
- if (documentRootDir.equals(dataRootDir)) {
- ifDataAndDocRootSame = true;
- }
- logger.info("ObjectManager.constructor - the root document directory is " +
- documentRootDir + " and the root data directory is " + dataRootDir +
- " Are they same?" + ifDataAndDocRootSame);
- if (d1Node == null) {
- refreshD1Node();
- } else {
- logger.info("ObjectManager ---NOT going to create the d1node with the url " + nodeBaseURL +
- " since the ObjectManager already was assigned a d1node with the url " + d1Node.getNodeBaseServiceUrl());
- }
+ private ObjectManager() {
}
-
+
/**
* Get an ObjectManager instance through the singleton pattern.
* @return the instance of ObjectManager
- * @throws ServiceFailure
*/
- public static ObjectManager getInstance() throws ServiceFailure {
- if (manager == null) {
- synchronized (ObjectManager.class) {
- if (manager == null) {
- manager = new ObjectManager();
- }
- }
- }
+ public static ObjectManager getInstance() {
return manager;
}
-
+
/**
- * Get the absolute file path for a given relative path. If the relativePath is null or blank,
- * null will be returned
- * @param relativePath
- * @param objectFormat
- * @return the absolute file path
+ * Get the system metadata for the given id
+ * @param id the id to identify the system metadata
+ * @return the input stream of the system metadata associated with the id. It may be null.
+ * @throws InvalidToken
+ * @throws NotAuthorized
+ * @throws NotImplemented
+ * @throws ServiceFailure
* @throws NotFound
+ * @throws MarshallingException
+ * @throws IOException
+ * @throws NoSuchAlgorithmException
*/
- public String getFilePath(String relativePath, String objectFormat) throws NotFound {
- String absolutePath = null;
- if (relativePath != null && !relativePath.trim().equals("")) {
- if (ifDataAndDocRootSame) {
- absolutePath = documentRootDir + relativePath;
- } else if (objectFormat != null && !objectFormat.trim().equals("")) {
- ObjectFormat format =ObjectFormatCache.getInstance().getFormat(objectFormat);
- if (format.getFormatType().equals("METADATA")) {
- absolutePath = documentRootDir + relativePath;
- } else {
- absolutePath = dataRootDir + relativePath;
+ public InputStream getSystemMetadataStream(String id) throws InvalidToken, NotAuthorized,
+ NotImplemented, ServiceFailure, NotFound,
+ NoSuchAlgorithmException, IOException, MarshallingException {
+ long start = System.currentTimeMillis();
+ //try to get the system metadata from the storage system first
+ InputStream sysmetaInputStream = null;
+ try {
+ sysmetaInputStream = storage.retrieveSystemMetadata(id);
+ long end = System.currentTimeMillis();
+ logger.info("Finish getting the system metadata via the file system for the pid " + id
+ + " and it took " + (end - start) + "milliseconds");
+ } catch (FileNotFoundException exception ) {
+ if (d1Node != null) {
+ // Metacat can't find the system metadata from the storage system.
+ // So try to get it from the dataone api
+ SystemMetadata sysmeta = null;
+ Identifier identifier = new Identifier();
+ identifier.setValue(id);
+ sysmeta = d1Node.getSystemMetadata(session, identifier);
+ logger.debug("Finish getting the system metadata via the DataONE API call for the pid "
+ + id);
+ if (sysmeta != null) {
+ ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream();
+ TypeMarshaller.marshalTypeToOutputStream(sysmeta, systemMetadataOutputStream);
+ sysmetaInputStream =
+ new ByteArrayInputStream(systemMetadataOutputStream.toByteArray());
}
+ long end = System.currentTimeMillis();
+ logger.info("Finish getting the system metadata via DataONE API for the pid " + id
+ + " and it took " + (end - start) + "milliseconds");
}
}
- logger.debug("ObjectManager.getFilePath - the absolute file path for the relative file path " +
- relativePath + " is " + absolutePath);
- return absolutePath;
+ return sysmetaInputStream;
}
-
+
/**
- * Get the system metadata for the given id
+ * Get the system metadata object for the given identifier
* @param id the id to identify the system metadata
- * @param objectRelativePath the object path for this id. It can help to determine
- * the system metadata file if the system metadata file exists.
- * @return the system metadata associated with the id
+ * @return the system metadata object associated with the id. It may be null.
* @throws InvalidToken
* @throws NotAuthorized
* @throws NotImplemented
* @throws ServiceFailure
* @throws NotFound
- * @throws MarshallingException
- * @throws IOException
- * @throws IllegalAccessException
- * @throws InstantiationException
+ * @throws InstantiationException
+ * @throws IllegalAccessException
+ * @throws IOException
+ * @throws MarshallingException
+ * @throws NoSuchAlgorithmException
*/
- public SystemMetadata getSystemMetadata(String id, String relativeObjPath) throws InvalidToken, NotAuthorized, NotImplemented,
- ServiceFailure, NotFound, InstantiationException, IllegalAccessException, IOException, MarshallingException {
- SystemMetadata sysmeta = null;
- long start = System.currentTimeMillis();
- //try to get the system metadata from the file system first
- File sysmetaFile = getSysmetaFile(relativeObjPath);
- if (sysmetaFile != null) {
- sysmeta = TypeMarshaller.unmarshalTypeFromFile(SystemMetadata.class, sysmetaFile);
- long end = System.currentTimeMillis();
- logger.info("ObjectManager.getSystemMetadata - finish getting the system metadata via the file system for the pid " + id +
- " and it took " + (end - start) + "milliseconds");
- } else {
- //if we can't get it from the file system, get it from dataone API
- Identifier identifier = new Identifier();
- identifier.setValue(id);
- try {
- for (int i=0; i<5; i++) {
- try {
- sysmeta = d1Node.getSystemMetadata(session, identifier);
- break;
- } catch (ServiceFailure ee) {
- logger.warn("The DataONE api call doesn't get the system metadata since "
- + ee.getMessage() + ". This is " + i
- + " try and Indexer will try again.");
- try {
- Thread.sleep(300);
- } catch (InterruptedException ie) {
- logger.info("The sleep of the thread was interrupted.");
+ public org.dataone.service.types.v1.SystemMetadata getSystemMetadata(String id)
+ throws InvalidToken, NotAuthorized, NoSuchAlgorithmException,
+ NotImplemented, ServiceFailure, NotFound,
+ InstantiationException, IllegalAccessException,
+ IOException, MarshallingException {
+ org.dataone.service.types.v1.SystemMetadata sysmeta = null;
+ try (InputStream input = getSystemMetadataStream(id)) {
+ if (input != null) {
+ try {
+ SystemMetadata sysmeta2 = TypeMarshaller
+ .unmarshalTypeFromStream(SystemMetadata.class, input);
+ sysmeta = sysmeta2;
+ } catch (Exception e) {
+ try (InputStream input2 = getSystemMetadataStream(id)) {
+ if (input2 != null) {
+ sysmeta = TypeMarshaller.unmarshalTypeFromStream(
+ org.dataone.service.types.v1.SystemMetadata.class, input2);
}
- continue;
}
}
- logger.debug("ObjectManager.getSystemMetadata - finish getting the system metadata via the DataONE API call for the pid " + id);
- } catch (NotAuthorized e) {
- logger.info("ObjectManager.getSystemMetadata - failed to get the system metadata via the DataONE API call for the pid " + id +
- " since it is not authorized. We will refresh the token and try again");
- refreshD1Node();
- sysmeta = d1Node.getSystemMetadata(session, identifier);
}
- long end = System.currentTimeMillis();
- logger.info("ObjectManager.getSystemMetadata - finish getting the system metadata via DataONE API for the pid " + id +
- " and it took " + (end - start) + "milliseconds");
}
return sysmeta;
}
-
+
+ /**
+ * Get the input stream of the content of the given pid
+ * @param pid the identifier of the content
+ * @return the input stream of the content
+ * @throws IllegalArgumentException
+ * @throws FileNotFoundException
+ * @throws NoSuchAlgorithmException
+ * @throws IOException
+ */
+ public InputStream getObject(String pid) throws IllegalArgumentException, FileNotFoundException,
+ NoSuchAlgorithmException, IOException {
+ return storage.retrieveObject(pid);
+ }
+
/**
* Set the d1 node for this object manager.
* We only use it for testing
@@ -222,63 +183,12 @@ public SystemMetadata getSystemMetadata(String id, String relativeObjPath) throw
public static void setD1Node(MultipartD1Node node) {
d1Node = node;
}
-
- /**
- * Get the system metadata file path from the objectPath.
- * We assume the object and system metadata file are in the same directory.
- * The system metadata file has a fixed name - systemmetadata.xml
- * @param relativeObjPath the relative path of the object
- * @return the file of system metadata. If it is null, this means the system metadata file does not exist.
- */
- protected static File getSysmetaFile(String relativeObjPath) {
- File sysmetaFile = null;
- String sysmetaPath = null;
- String relativeSysmetaPath = null;
- if (relativeObjPath != null) {
- if (relativeObjPath.contains(File.separator)) {
- logger.debug("ObjectManager.getSysmetaFile - the object file path " + relativeObjPath + " has at least one path separator " + File.pathSeparator);
- relativeSysmetaPath = relativeObjPath.substring(0, relativeObjPath.lastIndexOf(File.separator) + 1) + SYSTEMMETA_FILE_NAME;
- } else {
- logger.debug("ObjectManager.getSysmetaFile - the object file path " + relativeObjPath + " doesnot have any path separator " + File.pathSeparator);
- //There is not path information in the object path ( it only has the file name). So we just simply return systemmetadata.xml
- relativeSysmetaPath = SYSTEMMETA_FILE_NAME;
- }
- logger.debug("ObjectManager.getSysmetaFile - the relative system metadata file path for the object path " +
- relativeObjPath + " is " + relativeSysmetaPath);
- if (ifDataAndDocRootSame) {
- sysmetaPath = documentRootDir + relativeSysmetaPath;
- sysmetaFile = new File(sysmetaPath);
- if (!sysmetaFile.exists()) {
- //the system metadata file doesn't exist and we set it to null
- sysmetaPath = null;
- sysmetaFile = null;
- }
- } else {
- //try if this object is a document first since we have no idea if the object is metadata or data.
- sysmetaPath = documentRootDir + relativeSysmetaPath;
- sysmetaFile = new File(sysmetaPath);
- if (!sysmetaFile.exists()) {
- // try data
- sysmetaPath = dataRootDir + relativeSysmetaPath;
- sysmetaFile = new File(sysmetaPath);
- if (!sysmetaFile.exists()) {
- //the system metadata file doesn't exist and we set it to null
- sysmetaPath = null;
- sysmetaFile = null;
- }
- }
- }
- }
- logger.debug("ObjectManager.getSysmetaFile - the final system metadata file path for the object path " +
- relativeObjPath + " is " + sysmetaPath + ". Null means that not system metadata file exists.");
- return sysmetaFile;
- }
-
+
/**
* In case the token expired, the method will retrieve the token and create a new d1 node
* @throws ServiceFailure
*/
- private void refreshD1Node() throws ServiceFailure {
+ private static void refreshD1Node() throws ServiceFailure {
//get the token
DataONEauthToken = System.getenv(TOKEN_VARIABLE_NAME);
if (DataONEauthToken == null || DataONEauthToken.trim().equals("")) {
@@ -321,7 +231,7 @@ private void refreshD1Node() throws ServiceFailure {
* @param authToken the authentication token
* @return the DataONE session
*/
- private Session createSession(String authToken) {
+ private static Session createSession(String authToken) {
Session session = null;
if (authToken == null || authToken.trim().equals("")) {
logger.info("ObjectManager.createSession - Creating the public session");
@@ -340,10 +250,9 @@ private Session createSession(String authToken) {
* @param serviceUrl the service URL for the node we are connecting to
* @return a DataONE MultipartCNode object
* @throws ClientSideException
- * @throws IOException
- * @throws MetadigException
+ * @throws IOException
*/
- private MultipartD1Node getMultipartD1Node(Session session, String serviceUrl) throws IOException, ClientSideException {
+ private static MultipartD1Node getMultipartD1Node(Session session, String serviceUrl) throws IOException, ClientSideException {
MultipartRestClient mrc = null;
MultipartD1Node d1Node = null;
// First create a default HTTP client
@@ -366,7 +275,7 @@ private MultipartD1Node getMultipartD1Node(Session session, String serviceUrl) t
* @param nodeStr either a DataONE node serviceURL (e.g. https://knb.ecoinformatics.org/knb/d1/mn)
* or a DataONE node identifier (e.g. urn:node:CN)
*/
- private Boolean isCN(String nodeStr) {
+ private static Boolean isCN(String nodeStr) {
Boolean isCN = false;
// match node urn, e.g. "https://cn.dataone.org/cn"
if (nodeStr.matches("^\\s*urn:node:.*")) {
diff --git a/src/main/java/org/dataone/cn/indexer/parser/BaseXPathDocumentSubprocessor.java b/src/main/java/org/dataone/cn/indexer/parser/BaseXPathDocumentSubprocessor.java
index 1be93575..e15cc90f 100644
--- a/src/main/java/org/dataone/cn/indexer/parser/BaseXPathDocumentSubprocessor.java
+++ b/src/main/java/org/dataone/cn/indexer/parser/BaseXPathDocumentSubprocessor.java
@@ -166,7 +166,7 @@ public void setMatchDocuments(List matchDocuments) {
this.matchDocuments = matchDocuments;
}
- public static void setXmlNamespaceConfig(XMLNamespaceConfig xmlNamespaceConfig) {
+ public void setXmlNamespaceConfig(XMLNamespaceConfig xmlNamespaceConfig) {
xpath.setNamespaceContext(xmlNamespaceConfig);
}
diff --git a/src/main/java/org/dataone/cn/indexer/parser/utility/SeriesIdResolver.java b/src/main/java/org/dataone/cn/indexer/parser/utility/SeriesIdResolver.java
index e921a055..ca181ed6 100644
--- a/src/main/java/org/dataone/cn/indexer/parser/utility/SeriesIdResolver.java
+++ b/src/main/java/org/dataone/cn/indexer/parser/utility/SeriesIdResolver.java
@@ -80,42 +80,6 @@ public static Identifier getPid(Identifier identifier)
return pid;
}
- /**
- * Check if the given identifier is a PID or a SID
- *
- * @param identifier
- * @return true if the identifier is a SID, false if a PID
- * @throws NotFound
- * @throws ServiceFailure
- * @throws NotImplemented
- * @throws NotAuthorized
- * @throws InvalidToken
- * @throws MarshallingException
- * @throws IOException
- * @throws IllegalAccessException
- * @throws InstantiationException
- */
- public static boolean isSeriesId(Identifier identifier)
- throws InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, NotFound,
- InstantiationException, IllegalAccessException, IOException, MarshallingException {
-
- // if we have system metadata available via HZ map, then it's a PID
- String relativeObjPath = null;//we don't know the path
- SystemMetadata systemMetadata =
- ObjectManager.getInstance().getSystemMetadata(identifier.getValue(), relativeObjPath);
- if (systemMetadata != null) {
- return false;
- }
- //TODO: check that it's not just bogus value by looking up the pid?
-// Identifier pid = getPid(identifier);
-// if (pid.equals(identifier)) {
-// return false;
-// }
-
- // okay, it's a SID
- return true;
-
- }
}
diff --git a/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java b/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java
index 55aa8a4f..c37c8f1d 100644
--- a/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java
+++ b/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java
@@ -1,25 +1,3 @@
-/**
- * This work was created by participants in the DataONE project, and is
- * jointly copyrighted by participating institutions in DataONE. For
- * more information on DataONE, see our web site at http://dataone.org.
- *
- * Copyright ${year}
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * $Id$
- */
-
package org.dataone.cn.indexer.resourcemap;
import java.io.ByteArrayOutputStream;
@@ -29,6 +7,7 @@
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.net.URISyntaxException;
+import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@@ -218,60 +197,65 @@ private void _init(InputStream is) throws OREException, URISyntaxException,
public static boolean representsResourceMap(String formatId) {
return RESOURCE_MAP_FORMAT.equals(formatId);
}
-
+
private boolean isHeadVersion(Identifier pid, Identifier sid) {
boolean isHead = true;
if(pid != null && sid != null) {
- /*Identifier newId = new Identifier();
- newId.setValue("peggym.130.5");
- if(pid.getValue().equals("peggym.130.4") && HazelcastClientFactory.getSystemMetadataMap().get(newId) != null) {
- isHead =false;
- } else if (pid.getValue().equals("peggym.130.4") && HazelcastClientFactory.getSystemMetadataMap().get(newId) == null) {
- isHead = true;
- }*/
Identifier head = null;
try {
- head = SeriesIdResolver.getPid(sid);//if the passed sid actually is a pid, the method will return the pid.
+ //if the passed sid actually is a pid, the method will return the pid.
+ head = SeriesIdResolver.getPid(sid);
} catch (Exception e) {
- System.out.println(""+e.getStackTrace());
isHead = true;
}
if(head != null ) {
- //System.out.println("||||||||||||||||||| the head version is "+ head.getValue()+" for sid "+sid.getValue());
- logger.info("||||||||||||||||||| the head version is "+ head.getValue()+" for sid "+sid.getValue());
+
+ logger.info("||||||||||||||||||| the head version is " + head.getValue()
+ + " for sid " + sid.getValue());
if(head.equals(pid)) {
- logger.info("||||||||||||||||||| the pid "+ pid.getValue()+" is the head version for sid "+sid.getValue());
+ logger.info("||||||||||||||||||| the pid " + pid.getValue()
+ + " is the head version for sid " + sid.getValue());
isHead=true;
} else {
- logger.info("||||||||||||||||||| the pid "+ pid.getValue()+" is NOT the head version for sid "+sid.getValue());
+ logger.info("||||||||||||||||||| the pid " + pid.getValue()
+ + " is NOT the head version for sid " + sid.getValue());
isHead=false;
}
} else {
- //System.out.println("||||||||||||||||||| can't find the head version for sid "+sid.getValue());
- logger.info("||||||||||||||||||| can't find the head version for sid "+sid.getValue() + " and we think the given pid "+pid.getValue()+" is the head version.");
+ logger.info("||||||||||||||||||| can't find the head version for sid "
+ + sid.getValue() + " and we think the given pid " + pid.getValue()
+ + " is the head version.");
}
}
return isHead;
}
- private SolrDoc _mergeMappedReference(ResourceEntry resourceEntry, SolrDoc mergeDocument) throws InvalidToken, NotAuthorized, NotImplemented,
- ServiceFailure, NotFound, InstantiationException, IllegalAccessException, IOException, MarshallingException {
-
- Identifier identifier = new Identifier();
- identifier.setValue(mergeDocument.getIdentifier());
- //SystemMetadata sysMeta = HazelcastClientFactory.getSystemMetadataMap().get(identifier);
- String relativeObjPath = null; //we don't know the path
- SystemMetadata sysMeta = ObjectManager.getInstance().getSystemMetadata(identifier.getValue(), relativeObjPath);
- if (sysMeta.getSeriesId() != null && sysMeta.getSeriesId().getValue() != null && !sysMeta.getSeriesId().getValue().trim().equals("")) {
- // skip this one
- if(!isHeadVersion(identifier, sysMeta.getSeriesId())) {
- //System.out.println("The id "+identifier+" is not the head of the serial id "+sysMeta.getSeriesId().getValue()+" So, skip merge this one!!!!!!!!!!!!!!!!!!!!!!"+mergeDocument.getIdentifier());
- logger.info("The id "+identifier+" is not the head of the serial id "+sysMeta.getSeriesId().getValue()+" So, skip merge this one!!!!!!!!!!!!!!!!!!!!!!"+mergeDocument.getIdentifier());
- return mergeDocument;
- }
-
- }
-
+ private SolrDoc _mergeMappedReference(ResourceEntry resourceEntry, SolrDoc mergeDocument)
+ throws InvalidToken, NotAuthorized, NotImplemented,
+ NoSuchAlgorithmException, ServiceFailure, NotFound, InstantiationException,
+ IllegalAccessException, IOException, MarshallingException {
+
+ Identifier identifier = new Identifier();
+ identifier.setValue(mergeDocument.getIdentifier());
+ try {
+ SystemMetadata sysMeta = (SystemMetadata) ObjectManager.getInstance()
+ .getSystemMetadata(identifier.getValue());
+ if (sysMeta.getSeriesId() != null && sysMeta.getSeriesId().getValue() != null
+ && !sysMeta.getSeriesId().getValue().trim().equals("")) {
+ // skip this one
+ if(!isHeadVersion(identifier, sysMeta.getSeriesId())) {
+ logger.info("The id " + identifier + " is not the head of the serial id "
+ + sysMeta.getSeriesId().getValue()
+ + " So, skip merge this one!!!!!!!!!!!!!!!!!!!!!!"
+ + mergeDocument.getIdentifier());
+ return mergeDocument;
+ }
+ }
+ } catch (ClassCastException e) {
+ logger.warn("The systemmetadata is a v1 object and we need to do nothing");
+ }
+
+
if (mergeDocument.hasField(SolrElementField.FIELD_ID) == false) {
mergeDocument.addField(new SolrElementField(SolrElementField.FIELD_ID, resourceEntry
.getIdentifier()));
@@ -362,19 +346,22 @@ public List mergeIndexedDocuments(List docs) {
List mergedDocuments = new ArrayList();
for (ResourceEntry resourceEntry : this.resourceMap.values()) {
for (SolrDoc doc : docs) {
- //System.out.println(">>>>>>>>in mergeIndexedDocuments of ForesiteResourceMap, the doc id is "+doc.getIdentifier() +" in the thread "+Thread.currentThread().getId());
- //System.out.println(">>>>>>>>in mergeIndexedDocuments of ForesiteResourceMap, the doc series id is "+doc.getSeriesId()+" in the thread "+Thread.currentThread().getId());
- //System.out.println(">>>>>>>>in mergeIndexedDocuments of ForesiteResourceMap, the resource entry id is "+resourceEntry.getIdentifier()+" in the thread "+Thread.currentThread().getId());
- logger.debug(">>>>>>>>in mergeIndexedDocuments of ForesiteResourceMap, the doc id is "+doc.getIdentifier() +" in the thread "+Thread.currentThread().getId());
- logger.debug(">>>>>>>>in mergeIndexedDocuments of ForesiteResourceMap, the doc series id is "+doc.getSeriesId()+" in the thread "+Thread.currentThread().getId());
- logger.debug(">>>>>>>>in mergeIndexedDocuments of ForesiteResourceMap, the resource entry id is "+resourceEntry.getIdentifier()+" in the thread "+Thread.currentThread().getId());
+
+ logger.debug("in mergeIndexedDocuments of ForesiteResourceMap, the doc id is "
+ + doc.getIdentifier() + " in the thread "+Thread.currentThread().getId());
+ logger.debug("in mergeIndexedDocuments of ForesiteResourceMap, the doc series id is "
+ + doc.getSeriesId() + " in the thread "+Thread.currentThread().getId());
+ logger.debug("in mergeIndexedDocuments of ForesiteResourceMap, the resource entry id is "
+ + resourceEntry.getIdentifier() + " in the thread "
+ + Thread.currentThread().getId());
if (doc.getIdentifier().equals(resourceEntry.getIdentifier())
|| resourceEntry.getIdentifier().equals(doc.getSeriesId())) {
try {
mergedDocuments.add(_mergeMappedReference(resourceEntry, doc));
} catch (Exception e) {
- logger.error("ForestieResourceMap.mergeIndexedDocuments - cannot merge the document since " + e.getMessage());
+ logger.error("ForestieResourceMap.mergeIndexedDocuments - cannot merge the document since "
+ + e.getMessage());
}
}
diff --git a/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java b/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java
index 1f375ba6..044550b8 100644
--- a/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java
+++ b/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java
@@ -1,6 +1,7 @@
package org.dataone.cn.indexer.resourcemap;
import java.io.IOException;
+import java.security.NoSuchAlgorithmException;
import org.apache.log4j.Logger;
import org.dataone.cn.indexer.object.ObjectManager;
@@ -12,7 +13,7 @@
import org.dataone.service.exceptions.NotImplemented;
import org.dataone.service.exceptions.ServiceFailure;
import org.dataone.service.types.v1.Identifier;
-import org.dataone.service.types.v2.SystemMetadata;
+import org.dataone.service.types.v1.SystemMetadata;
public class IndexVisibilityDelegateImpl implements IndexVisibilityDelegate {
@@ -25,10 +26,8 @@ public IndexVisibilityDelegateImpl() {
public boolean isDocumentVisible(Identifier pid) {
boolean visible = false;
try {
-
- //SystemMetadata systemMetadata = HazelcastClientFactory.getSystemMetadataMap().get(pid);
- String relativeObjPath = null; //we don't know the path
- SystemMetadata systemMetadata = ObjectManager.getInstance().getSystemMetadata(pid.getValue(), relativeObjPath);
+ SystemMetadata systemMetadata = ObjectManager.getInstance()
+ .getSystemMetadata(pid.getValue());
// TODO: Is pid Identifier a SID?
if (systemMetadata == null) {
return true;
@@ -56,6 +55,8 @@ public boolean isDocumentVisible(Identifier pid) {
logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage());
} catch (MarshallingException e) {
logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage());
+ } catch (NoSuchAlgorithmException e) {
+ logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage());
}
return visible;
}
@@ -63,9 +64,7 @@ public boolean isDocumentVisible(Identifier pid) {
public boolean documentExists(Identifier pid) {
boolean exists = false;
try {
- //SystemMetadata systemMetadata = HazelcastClientFactory.getSystemMetadataMap().get(pid);
- String relativeObjPath = null; //we don't know the path
- SystemMetadata systemMetadata = ObjectManager.getInstance().getSystemMetadata(pid.getValue(), relativeObjPath);
+ SystemMetadata systemMetadata = ObjectManager.getInstance().getSystemMetadata(pid.getValue());
if (systemMetadata != null) {
exists = true;
} else {
@@ -92,6 +91,8 @@ public boolean documentExists(Identifier pid) {
logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage());
} catch (MarshallingException e) {
logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage());
+ } catch (NoSuchAlgorithmException e) {
+ logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage());
}
return exists;
}
diff --git a/src/main/java/org/dataone/cn/indexer/solrhttp/SolrDoc.java b/src/main/java/org/dataone/cn/indexer/solrhttp/SolrDoc.java
index 01271732..20d3ba06 100644
--- a/src/main/java/org/dataone/cn/indexer/solrhttp/SolrDoc.java
+++ b/src/main/java/org/dataone/cn/indexer/solrhttp/SolrDoc.java
@@ -28,7 +28,7 @@
import java.util.List;
import org.apache.commons.io.IOUtils;
-import org.dataone.service.types.v2.SystemMetadata;
+import org.dataone.service.types.v1.SystemMetadata;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
diff --git a/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java b/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java
index 81a725da..a3a18831 100644
--- a/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java
+++ b/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java
@@ -1,23 +1,3 @@
-/**
- * This work was created by participants in the DataONE project, and is
- * jointly copyrighted by participating institutions in DataONE. For
- * more information on DataONE, see our web site at http://dataone.org.
- *
- * Copyright 2022
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- */
package org.dataone.indexer.queue;
import java.util.Map;
@@ -36,12 +16,12 @@
*
*/
public class IndexQueueMessageParser {
- private final static String HEADER_ID = "id"; //The header name in the message to store the identifier
- private final static String HEADER_PATH = "path"; //The header name in the message to store the path of the object
- private final static String HEADER_INDEX_TYPE = "index_type"; //The header name in the message to store the index type
-
+ //The header name in the message to store the identifier
+ private final static String HEADER_ID = "id";
+ //The header name in the message to store the index type
+ private final static String HEADER_INDEX_TYPE = "index_type";
+
private Identifier identifier = null;
- private String objectPath = null;
private String indexType = null;
private int priority = 1;
@@ -55,11 +35,13 @@ public class IndexQueueMessageParser {
*/
public void parse(AMQP.BasicProperties properties, byte[] body) throws InvalidRequest {
if(properties == null) {
- throw new InvalidRequest("0000", "The properties, which contains the index task info, cannot be null in the index queue message.");
+ throw new InvalidRequest("0000", "The properties, which contains the index task info, "
+ + "cannot be null in the index queue message.");
}
Map headers = properties.getHeaders();
if(headers == null) {
- throw new InvalidRequest("0000", "The header of the properties, which contains the index task info, cannot be null in the index queue message.");
+ throw new InvalidRequest("0000", "The header of the properties, which contains the "
+ + "index task info, cannot be null in the index queue message.");
}
Object pidObj = headers.get(HEADER_ID);
if (pidObj == null) {
@@ -72,7 +54,7 @@ public void parse(AMQP.BasicProperties properties, byte[] body) throws InvalidRe
logger.debug("IndexQueueMessageParser.parse - the identifier in the message is " + pid);
identifier = new Identifier();
identifier.setValue(pid);
-
+
Object typeObj = headers.get(HEADER_INDEX_TYPE);
if (typeObj == null) {
throw new InvalidRequest("0000", "The index type cannot be null in the index queue message.");
@@ -82,12 +64,7 @@ public void parse(AMQP.BasicProperties properties, byte[] body) throws InvalidRe
throw new InvalidRequest("0000", "The index type cannot be null or blank in the index queue message.");
}
logger.debug("IndexQueueMessageParser.parse - the index type in the message is " + indexType);
-
- Object pathObject = headers.get(HEADER_PATH);
- if (pathObject != null) {
- objectPath = ((LongString)pathObject).toString();
- }
- logger.debug("IndexQueueMessageParser.parse - the file path of the object which be indexed in the message is " + objectPath);
+
try {
priority = properties.getPriority();
} catch (NullPointerException e) {
@@ -105,16 +82,6 @@ public Identifier getIdentifier() {
return identifier;
}
- /**
- * Get the file path of the object, which will be indexed,
- * after calling the parse method to parse the index queue message.
- * @return the file path of the object. It can be null or blank, which
- * means we don't have the object in the system.
- */
- public String getObjectPath() {
- return objectPath;
- }
-
/**
* Get the type of the index task after calling the parse method to parse the index queue message.
* @return the type of the index task. It can be create, delete or sysmeta.
diff --git a/src/main/java/org/dataone/indexer/storage/Storage.java b/src/main/java/org/dataone/indexer/storage/Storage.java
new file mode 100644
index 00000000..a0ea2176
--- /dev/null
+++ b/src/main/java/org/dataone/indexer/storage/Storage.java
@@ -0,0 +1,136 @@
+package org.dataone.indexer.storage;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.security.NoSuchAlgorithmException;
+import java.util.Properties;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.dataone.configuration.Settings;
+import org.dataone.hashstore.HashStore;
+import org.dataone.hashstore.HashStoreFactory;
+import org.dataone.hashstore.exceptions.HashStoreFactoryException;
+
+/**
+ * The HashFileStore implementation of the Storage interface
+ */
+public class Storage {
+
+ private static Log log = LogFactory.getLog(Storage.class);
+ private static Storage instance;
+ private static HashStore hashStore;
+ static {
+ try {
+ instance = new Storage();
+ } catch (IOException e) {
+ log.error(
+ "Dataone-indexer cannot initialize the Storage class since " + e.getMessage(), e);
+ System.exit(1);
+ }
+ }
+
+ /**
+ * Private constructor
+ * @throws IOException
+ * @throws HashStoreFactoryException
+ */
+ private Storage() throws HashStoreFactoryException, IOException {
+ String className = Settings.getConfiguration().getString("storage.className");
+ String rootPath = Settings.getConfiguration().getString("storage.hashstore.rootDirectory");
+ if (rootPath == null) {
+ throw new HashStoreFactoryException("HashStorage.constructor - The HashStore root path "
+ + " is null or blank from the property of storage.hashstore.rootDirectory");
+ }
+ String directoryDepth = Settings.getConfiguration()
+ .getString("storage.hashstore.directory.depth", "3");
+ String directoryNameWidth = Settings.getConfiguration()
+ .getString("storage.hashstore.directory.width", "2");
+ String fileNameAlgorithm = Settings.getConfiguration()
+ .getString("storage.hashstore.fileNameAlgorithm", "SHA-256");
+ String defaultNamespace = Settings.getConfiguration()
+ .getString("storage.hashstore.defaultNamespace",
+ "https://ns.dataone.org/service/types/v2.0#SystemMetadata");
+ Properties storeProperties = new Properties();
+ storeProperties.setProperty("storePath", rootPath);
+ storeProperties.setProperty("storeDepth", directoryDepth);
+ storeProperties.setProperty("storeWidth", directoryNameWidth);
+ storeProperties.setProperty("storeAlgorithm", fileNameAlgorithm);
+ storeProperties.setProperty("storeMetadataNamespace", defaultNamespace);
+ hashStore = HashStoreFactory.getHashStore(className, storeProperties);
+ }
+
+ /**
+ * Get the instance of the class through the singleton pattern
+ * @return the instance of the class
+ */
+ public static Storage getInstance() {
+ return instance;
+ }
+
+ /**
+ * Returns an InputStream to an object from HashStore using a given persistent identifier.
+ *
+ * @param pid Authority-based identifier
+ * @return Object InputStream
+ * @throws IllegalArgumentException When pid is null or empty
+ * @throws FileNotFoundException When requested pid has no associated object
+ * @throws IOException I/O error when creating InputStream to object
+ * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not
+ * supported
+ */
+ public InputStream retrieveObject(String pid)
+ throws IllegalArgumentException, FileNotFoundException, IOException,
+ NoSuchAlgorithmException {
+ return hashStore.retrieveObject(pid);
+ }
+
+ /**
+ * Returns an InputStream to the system metadata content of a given pid
+ *
+ * @param pid Authority-based identifier
+ * @return Metadata InputStream
+ * @throws IllegalArgumentException When pid/formatId is null or empty
+ * @throws FileNotFoundException When requested pid+formatId has no associated object
+ * @throws IOException I/O error when creating InputStream to metadata
+ * @throws NoSuchAlgorithmException When algorithm used to calculate metadata address is not
+ * supported
+ */
+ public InputStream retrieveSystemMetadata(String pid)
+ throws IllegalArgumentException, FileNotFoundException, IOException,
+ NoSuchAlgorithmException {
+ return hashStore.retrieveMetadata(pid);
+ }
+
+ /**
+ * Store the input stream object into hash store. This method is only for the test classes.
+ * @param object the input stream of the object
+ * @param pid the identifier of the object which will be stored
+ * @throws NoSuchAlgorithmException
+ * @throws IOException
+ * @throws RuntimeException
+ * @throws InterruptedException
+ */
+ public void storeObject(InputStream object, String pid) throws NoSuchAlgorithmException,
+ IOException,RuntimeException, InterruptedException {
+ hashStore.storeObject(object, pid, null, null, null, -1);
+ }
+
+ /**
+ * Store the system metadata into hash store. This method is only for the test classes.
+ * @param metadata the input stream of the system metadata
+ * @param pid the identifier of the system metadata
+ * @throws IOException
+ * @throws IllegalArgumentException
+ * @throws FileNotFoundException
+ * @throws InterruptedException
+ * @throws NoSuchAlgorithmException
+ */
+ public void storeMetadata(InputStream metadata, String pid) throws IOException,
+ IllegalArgumentException, FileNotFoundException,
+ InterruptedException, NoSuchAlgorithmException {
+ hashStore.storeMetadata(metadata, pid);
+ }
+
+}
diff --git a/src/main/resources/log4j2.properties b/src/main/resources/log4j2.properties
index 75a89bc2..67b1596f 100644
--- a/src/main/resources/log4j2.properties
+++ b/src/main/resources/log4j2.properties
@@ -34,7 +34,7 @@ appender.consoleAppender.layout.pattern=dataone-indexer %d{yyyyMMdd-HH:mm:ss}: [
##################################
# the root logger configuration #
##################################
-rootLogger.level=INFO
+rootLogger.level={{ ternary "DEBUG" "ERROR" .Values.image.debug }}
rootLogger.appenderRef.console.ref=consoleAppender
################################################################################
diff --git a/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java b/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java
index 57ad01ae..e0addfb3 100644
--- a/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java
+++ b/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java
@@ -1,29 +1,10 @@
-/**
- * This work was created by participants in the DataONE project, and is
- * jointly copyrighted by participating institutions in DataONE. For
- * more information on DataONE, see our web site at http://dataone.org.
- *
- * Copyright ${year}
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * $Id$
- */
-
package org.dataone.cn.index;
import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
import java.io.IOException;
+import java.io.InputStream;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
@@ -38,7 +19,6 @@
import org.apache.commons.codec.EncoderException;
import org.apache.commons.collections.CollectionUtils;
-import org.apache.commons.lang3.StringUtils;
import org.apache.solr.SolrJettyTestBase;
import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
import org.apache.solr.client.solrj.SolrServerException;
@@ -55,14 +35,13 @@
import org.dataone.cn.indexer.parser.ISolrField;
import org.dataone.cn.indexer.solrhttp.SolrElementField;
import org.dataone.configuration.Settings;
+import org.dataone.indexer.storage.Storage;
import org.dataone.service.exceptions.NotFound;
import org.dataone.service.exceptions.NotImplemented;
import org.dataone.service.exceptions.ServiceFailure;
import org.dataone.service.exceptions.UnsupportedType;
import org.dataone.service.types.v1.Identifier;
-import org.dataone.service.types.v2.SystemMetadata;
import org.dataone.service.util.DateTimeMarshaller;
-import org.dataone.service.util.TypeMarshaller;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
@@ -88,7 +67,8 @@ public abstract class DataONESolrJettyTestBase extends SolrJettyTestBase {
private SolrIndex solrIndexService;
private int solrPort = Settings.getConfiguration().getInt("test.solr.port", 8985);
private static final String DEFAULT_SOL_RHOME = "solr8home";
-
+ private static final String SYSTEMMETA_FILE_NAME = "systemmetadata.xml";
+
/**
* Index the given object into solr
* @param identifier the identifier of the object which needs to be indexed
@@ -98,11 +78,52 @@ public abstract class DataONESolrJettyTestBase extends SolrJettyTestBase {
protected void indexObjectToSolr(String identifier, Resource objectFile) throws Exception {
boolean isSysmetaChangeOnly = false;
String relativePath = objectFile.getFile().getPath();
+ try {
+ Storage.getInstance().retrieveObject(identifier);
+ } catch (FileNotFoundException e) {
+ // The pid is not in the hash store and we need to save the object into hashstore
+ try (InputStream object = objectFile.getInputStream()) {
+ Storage.getInstance().storeObject(object, identifier);
+ }
+ File sysmetaFile = getSysmetaFile(relativePath);
+ if (sysmetaFile != null) {
+ try (InputStream sysmeta = new FileInputStream(sysmetaFile)) {
+ Storage.getInstance().storeMetadata(sysmeta, identifier);
+ }
+ }
+ }
Identifier pid = new Identifier();
pid.setValue(identifier);
- solrIndexService.update(pid, relativePath, isSysmetaChangeOnly);
+ solrIndexService.update(pid, isSysmetaChangeOnly);
}
-
+
+ /**
+ * The convention method to get the system metadata file path from the objectPath.
+ * We assume the object and system metadata file are in the same directory.
+ * The system metadata file has a fixed name - systemmetadata.xml
+ * @param relativeObjPath the relative path of the object
+ * @return the file of system metadata. If it is null, this means the system metadata file does not exist.
+ */
+ private static File getSysmetaFile(String relativeObjPath) {
+ File sysmetaFile = null;
+ String sysmetaPath = null;
+ String relativeSysmetaPath = null;
+ if (relativeObjPath != null) {
+ if (relativeObjPath.contains(File.separator)) {
+ relativeSysmetaPath = relativeObjPath.substring(0,
+ relativeObjPath.lastIndexOf(File.separator) + 1) + SYSTEMMETA_FILE_NAME;
+ } else {
+ // There is not path information in the object path ( it only has the file name).
+ // So we just simply return systemmetadata.xml
+ relativeSysmetaPath = SYSTEMMETA_FILE_NAME;
+ }
+ }
+ if (relativeSysmetaPath != null) {
+ sysmetaFile = new File(relativeSysmetaPath);
+ }
+ return sysmetaFile;
+ }
+
/**
* Delete the given identifier from the solr server
* @param identifier
@@ -125,25 +146,6 @@ protected void deleteSolrDoc(String identifier) throws XPathExpressionException,
solrIndexService.remove(pid);
}
- protected void addEmlToSolrIndex(Resource sysMetaFile) throws Exception {
- SolrIndex indexService = solrIndexService;
- SystemMetadata smd = TypeMarshaller.unmarshalTypeFromStream(SystemMetadata.class,
- sysMetaFile.getInputStream());
- // path to actual science metadata document
- String path = StringUtils.remove(sysMetaFile.getFile().getPath(), File.separator + "SystemMetadata");
- boolean isSysmetaChangeOnly = false;
- indexService.update(smd.getIdentifier(), path, isSysmetaChangeOnly);
-
- }
-
- protected void addSysAndSciMetaToSolrIndex(Resource sysMeta, Resource sciMeta) throws Exception {
- SolrIndex indexService = solrIndexService;
- SystemMetadata smd = TypeMarshaller.unmarshalTypeFromStream(SystemMetadata.class,
- sysMeta.getInputStream());
- String path = sciMeta.getFile().getAbsolutePath();
- boolean isSysmetaChangeOnly = false;
- indexService.update(smd.getIdentifier(), path, isSysmetaChangeOnly);
- }
protected SolrDocument assertPresentInSolrIndex(String pid) throws SolrServerException,
IOException {
diff --git a/src/test/java/org/dataone/cn/index/SolrFieldXPathFgdcTest.java b/src/test/java/org/dataone/cn/index/SolrFieldXPathFgdcTest.java
index 4e317dd1..3ab0d6fc 100644
--- a/src/test/java/org/dataone/cn/index/SolrFieldXPathFgdcTest.java
+++ b/src/test/java/org/dataone/cn/index/SolrFieldXPathFgdcTest.java
@@ -296,8 +296,8 @@ public static void setUp() throws Exception {
fgdcNasaExpected.put("mediaTypeProperty", "");
fgdcNasaExpected.put("formatId", "FGDC-STD-001.1-1999");
fgdcNasaExpected.put("formatType", "METADATA");
- fgdcNasaExpected.put("size", "14880");
- fgdcNasaExpected.put("checksum", "c72ff66bbe7fa99e5fb399bab8cb6f85");
+ fgdcNasaExpected.put("size", "14828");
+ fgdcNasaExpected.put("checksum", "1755a557c13be7af44d676bb09274b0e");
fgdcNasaExpected.put("checksumAlgorithm", "MD5");
fgdcNasaExpected.put("submitter", "CN=Dave Vieglais T799,O=Google,C=US,DC=cilogon,DC=org");
fgdcNasaExpected.put("rightsHolder",
diff --git a/src/test/java/org/dataone/cn/indexer/IndexWorkerTest.java b/src/test/java/org/dataone/cn/indexer/IndexWorkerTest.java
index e6d10f85..67f92e1f 100644
--- a/src/test/java/org/dataone/cn/indexer/IndexWorkerTest.java
+++ b/src/test/java/org/dataone/cn/indexer/IndexWorkerTest.java
@@ -69,7 +69,7 @@ public void testInitExecutorService() throws Exception {
}
String propertyName = "index.thread.number";
String numberStr = "5";
- int number = (new Integer(numberStr)).intValue();
+ int number = Integer.parseInt(numberStr);
// only test setting multiple threads if enough processors are available
if (finalThreads > number) {
Settings.getConfiguration().setProperty(propertyName, numberStr);
@@ -79,7 +79,7 @@ public void testInitExecutorService() throws Exception {
assertTrue(worker.multipleThread);
}
numberStr = "1";
- number = (new Integer(numberStr)).intValue();
+ number = Integer.parseInt(numberStr);
Settings.getConfiguration().setProperty(propertyName, numberStr);
worker.initExecutorService();
System.out.println("worker.nThreads(1): " + worker.nThreads);
diff --git a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java
index b50c6e5c..120286b9 100644
--- a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java
+++ b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java
@@ -1,33 +1,25 @@
-/**
- * This work was created by participants in the DataONE project, and is
- * jointly copyrighted by participating institutions in DataONE. For
- * more information on DataONE, see our web site at http://dataone.org.
- *
- * Copyright 2022
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- */
package org.dataone.cn.indexer.object;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.security.MessageDigest;
-import java.nio.file.Paths;
+import javax.xml.bind.DatatypeConverter;
-import org.dataone.cn.index.DataONESolrJettyTestBase;
-import org.dataone.service.exceptions.NotFound;
+
+import org.dataone.indexer.storage.Storage;
+import org.dataone.service.types.v1.Identifier;
import org.dataone.service.types.v2.SystemMetadata;
+import org.dataone.service.util.TypeMarshaller;
+import org.junit.Before;
import org.junit.Test;
/**
@@ -36,73 +28,61 @@
*
*/
public class ObjectManagerTest {
-
- /**
- * Test the getFilePath method
- * @throws Exception
- */
- @Test
- public void testgetFilePath() throws Exception {
- ObjectManager manager = ObjectManager.getInstance();
- String path = null;
- String format = "eml://ecoinformatics.org/eml-2.0.1";
- String resultPath = manager.getFilePath(path, format);
- assertTrue(resultPath == null);
- format = "image/bmp";
- resultPath = manager.getFilePath(path, format);
- assertTrue(resultPath == null);
-
- path = "";
- format = "eml://ecoinformatics.org/eml-2.0.1";
- resultPath = manager.getFilePath(path, format);
- assertTrue(resultPath == null);
- format = "image/bmp";
- resultPath = manager.getFilePath(path, format);
- assertTrue(resultPath == null);
-
- path = "/var/metacat/documents/foo.1.1";
- format = "eml://ecoinformatics.org/eml-2.0.1";
- resultPath = manager.getFilePath(path, format);
- assertTrue(resultPath.equals("//var/metacat/documents/foo.1.1"));
-
- path = "/var/metacat/documents/foo.2.1";
- format = "image/bmp";;
- resultPath = manager.getFilePath(path, format);
- assertTrue(resultPath.equals("//var/metacat/documents/foo.2.1"));
+
+ private String identifier;
+
+ @Before
+ public void setUp() throws Exception {
+ identifier = "ObjectManagerTest-" + System.currentTimeMillis();
+ File objectFile = new File("src/test/resources/org/dataone/cn/index/resources/d1_testdocs/"
+ + "fgdc/nasa_d_FEDGPS1293.xml");
+ try (InputStream object = new FileInputStream(objectFile)) {
+ Storage.getInstance().storeObject(object, identifier);
+ }
+ File sysmetaFile = new File("src/test/resources/org/dataone/cn/index/resources/"
+ + "d1_testdocs/fgdc/nasa_d_FEDGPS1293Sysmeta.xml");
+ try (InputStream sysmetaStream = new FileInputStream(sysmetaFile)) {
+ SystemMetadata sysmeta = TypeMarshaller
+ .unmarshalTypeFromStream(SystemMetadata.class, sysmetaStream);
+ Identifier pid = new Identifier();
+ pid.setValue(identifier);
+ sysmeta.setIdentifier(pid);
+ try (ByteArrayOutputStream output = new ByteArrayOutputStream()) {
+ TypeMarshaller.marshalTypeToOutputStream(sysmeta, output);
+ try (ByteArrayInputStream input = new ByteArrayInputStream(output.toByteArray())) {
+ Storage.getInstance().storeMetadata(input, identifier);
+ }
+ }
+ }
}
-
+
/**
- * Test the getSystemMetadata method
+ * Test the getObject and getSystemMetadata method
* @throws Exception
*/
- @Test
- public void testGetSystemMetadata() throws Exception {
- //Test to get system metadata from a file
- String currentDir = Paths.get(".").toAbsolutePath().normalize().toString();
- System.out.println("current dir " + currentDir);
- String path = currentDir + "/src/test/resources/org/dataone/cn/index/resources/d1_testdocs/json-ld/hakai-deep-schema/hakai-deep-schema.jsonld";
- String id = "hakai-deep-schema.jsonld";
- SystemMetadata sysmeta = ObjectManager.getInstance().getSystemMetadata(id, path);
- assertTrue(sysmeta.getIdentifier().getValue().equals(id));
-
- //Test to get system metadata from the Mock dataone cn server.
- id = "ala-wai-canal-ns02-matlab-processing.eml.1.xml";
- path = null;
- MockMNode mockMNode = new MockMNode("http://mnode.foo");
- mockMNode.setContext(DataONESolrJettyTestBase.getContext());
- ObjectManager.setD1Node(mockMNode);
- sysmeta = ObjectManager.getInstance().getSystemMetadata(id, path);
- assertTrue(sysmeta.getIdentifier().getValue().equals(id));
-
- //Test the system metadata not found
- id = "foo.1.1";
- path = "foo1";
- try {
- sysmeta = ObjectManager.getInstance().getSystemMetadata(id, path);
- fail("We should reach here");
- } catch (NotFound e) {
- assert(true);
+ @Test
+ public void testGetObjectAndSystemMetadata() throws Exception {
+ try (InputStream input = ObjectManager.getInstance().getObject(identifier)) {
+ assertNotNull(input);
+ try (OutputStream os = new ByteArrayOutputStream()) {
+ MessageDigest md5 = MessageDigest.getInstance("MD5");
+ // Calculate hex digests
+ byte[] buffer = new byte[8192];
+ int bytesRead;
+ while ((bytesRead = input.read(buffer)) != -1) {
+ os.write(buffer, 0, bytesRead);
+ md5.update(buffer, 0, bytesRead);
+ }
+ String md5Digest = DatatypeConverter.printHexBinary(md5.digest()).toLowerCase();
+ assertEquals("1755a557c13be7af44d676bb09274b0e", md5Digest);
+ }
}
+ org.dataone.service.types.v1.SystemMetadata sysmeta = ObjectManager.getInstance()
+ .getSystemMetadata(identifier);
+ assertEquals(identifier, sysmeta.getIdentifier().getValue());
+ assertEquals("1755a557c13be7af44d676bb09274b0e", sysmeta.getChecksum().getValue());
+ assertEquals(14828, sysmeta.getSize().intValue());
}
+
}
diff --git a/src/test/java/org/dataone/indexer/queue/IndexQueueMessageParserTest.java b/src/test/java/org/dataone/indexer/queue/IndexQueueMessageParserTest.java
index b6a5c1cc..201a55f7 100644
--- a/src/test/java/org/dataone/indexer/queue/IndexQueueMessageParserTest.java
+++ b/src/test/java/org/dataone/indexer/queue/IndexQueueMessageParserTest.java
@@ -1,5 +1,6 @@
package org.dataone.indexer.queue;
+import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@@ -19,10 +20,11 @@
*
*/
public class IndexQueueMessageParserTest {
- private final static String HEADER_ID = "id"; //The header name in the message to store the identifier
- private final static String HEADER_PATH = "path"; //The header name in the message to store the path of the object
- private final static String HEADER_INDEX_TYPE = "index_type"; //The header name in the message to store the index type
-
+ //The header name in the message to store the identifier
+ private final static String HEADER_ID = "id";
+ //The header name in the message to store the index type
+ private final static String HEADER_INDEX_TYPE = "index_type";
+
/**
* Test the invalid messages
* @throws Exception
@@ -32,54 +34,50 @@ public void testInvalidRequest() throws Exception {
LongString id = null;
LongString index_type = LongStringHelper.asLongString("create");
int priority = 1;
- LongString filePath = LongStringHelper.asLongString("foo");
- AMQP.BasicProperties properties = generateProperties(id, index_type, priority, filePath);
+ AMQP.BasicProperties properties = generateProperties(id, index_type, priority);
byte[] body = null;
IndexQueueMessageParser parser = new IndexQueueMessageParser();
try {
parser.parse(properties, body);
fail("Since the idenitifer is null, we shoulder get here");
} catch (InvalidRequest e) {
-
+
}
-
+
id = LongStringHelper.asLongString(" ");
index_type = LongStringHelper.asLongString("create");
priority = 1;
- filePath = LongStringHelper.asLongString("foo");
- properties = generateProperties(id, index_type, priority, filePath);
+ properties = generateProperties(id, index_type, priority);
try {
parser.parse(properties, body);
fail("Since the idenitifer is null, we shouldn't get here");
} catch (InvalidRequest e) {
-
+
}
-
+
id = LongStringHelper.asLongString("foo");
index_type = null;
priority = 1;
- filePath = LongStringHelper.asLongString("foo");
- properties = generateProperties(id, index_type, priority, filePath);
+ properties = generateProperties(id, index_type, priority);
try {
parser.parse(properties, body);
fail("Since the index type is null, we shouldn't get here");
} catch (InvalidRequest e) {
-
+
}
-
+
id = LongStringHelper.asLongString("foo");
index_type = LongStringHelper.asLongString("");
priority = 1;
- filePath = LongStringHelper.asLongString("foo");
- properties = generateProperties(id, index_type, priority, filePath);
+ properties = generateProperties(id, index_type, priority);
try {
parser.parse(properties, body);
fail("Since the index type is null, we shouldn't get here");
} catch (InvalidRequest e) {
-
+
}
}
-
+
/**
* Test valid messages
* @throws Exception
@@ -89,62 +87,51 @@ public void testParse() throws Exception {
String id = "doi:10.5063/F1HX1B4Q";
String indexType = "create";
int priority = 1;
- String filePath = "/var/metacat/12dfad";
LongString longId = LongStringHelper.asLongString(id);
LongString longIndexType = LongStringHelper.asLongString(indexType);
- LongString longFilePath = LongStringHelper.asLongString(filePath);
- AMQP.BasicProperties properties = generateProperties(longId, longIndexType, priority, longFilePath);
+ AMQP.BasicProperties properties = generateProperties(longId, longIndexType, priority);
byte[] body = null;
IndexQueueMessageParser parser = new IndexQueueMessageParser();
parser.parse(properties, body);
- assertTrue(parser.getIdentifier().getValue().equals(id));
- assertTrue(parser.getIndexType().equals(indexType));
- assertTrue(parser.getPriority() == priority);
- assertTrue(parser.getObjectPath().equals(filePath));
-
+ assertEquals(id, parser.getIdentifier().getValue());
+ assertEquals(indexType, parser.getIndexType());
+ assertEquals(priority, parser.getPriority());
+
id = "urn:uuid:45298965-f867-440c-841f-91d3abd729b7";
indexType = "delete";
priority = 2;
- filePath = "";
longId = LongStringHelper.asLongString(id);
longIndexType = LongStringHelper.asLongString(indexType);
- longFilePath = LongStringHelper.asLongString(filePath);
- properties = generateProperties(longId, longIndexType, priority, longFilePath);
+ properties = generateProperties(longId, longIndexType, priority);
parser = new IndexQueueMessageParser();
parser.parse(properties, body);
- assertTrue(parser.getIdentifier().getValue().equals(id));
- assertTrue(parser.getIndexType().equals(indexType));
- assertTrue(parser.getPriority() == priority);
- assertTrue(parser.getObjectPath().equals(filePath));
+ assertEquals(id, parser.getIdentifier().getValue());
+ assertEquals(indexType, parser.getIndexType());
+ assertEquals(priority, parser.getPriority());
id = "test-foo";
indexType = "sysmeta";
priority = 10;
- filePath = "c:\\foo\\abc";
longId = LongStringHelper.asLongString(id);
longIndexType = LongStringHelper.asLongString(indexType);
- longFilePath = LongStringHelper.asLongString(filePath);
- properties = generateProperties(longId, longIndexType, priority, longFilePath);
+ properties = generateProperties(longId, longIndexType, priority);
parser = new IndexQueueMessageParser();
parser.parse(properties, body);
- assertTrue(parser.getIdentifier().getValue().equals(id));
- assertTrue(parser.getIndexType().equals(indexType));
- assertTrue(parser.getPriority() == priority);
- assertTrue(parser.getObjectPath().equals(filePath));
-
+ assertEquals(id, parser.getIdentifier().getValue());
+ assertEquals(indexType, parser.getIndexType());
+ assertEquals(priority, parser.getPriority());
+
id = "test-foo2";
indexType = "sysmeta2";
priority = 10;
longId = LongStringHelper.asLongString(id);
longIndexType = LongStringHelper.asLongString(indexType);
- longFilePath = null;
- properties = generateProperties(longId, longIndexType, priority, longFilePath);
+ properties = generateProperties(longId, longIndexType, priority);
parser = new IndexQueueMessageParser();
parser.parse(properties, body);
- assertTrue(parser.getIdentifier().getValue().equals(id));
- assertTrue(parser.getIndexType().equals(indexType));
- assertTrue(parser.getPriority() == priority);
- assertTrue(parser.getObjectPath() == null);
+ assertEquals(id, parser.getIdentifier().getValue());
+ assertEquals(indexType, parser.getIndexType());
+ assertEquals(priority, parser.getPriority());
}
/**
@@ -155,13 +142,10 @@ public void testParse() throws Exception {
* @param filePath
* @return
*/
- private AMQP.BasicProperties generateProperties(LongString id, LongString index_type, int priority, LongString filePath) {
+ private AMQP.BasicProperties generateProperties(LongString id, LongString index_type, int priority) {
Map headers = new HashMap();
headers.put(HEADER_ID, id);
headers.put(HEADER_INDEX_TYPE, index_type);
- if (filePath != null) {
- headers.put(HEADER_PATH, filePath);
- }
AMQP.BasicProperties basicProperties = new AMQP.BasicProperties.Builder()
.contentType("text/plain")
.deliveryMode(2) // set this message to persistent
diff --git a/src/test/resources/org/dataone/cn/index/resources/d1_testdocs/fgdc/nasa_d_FEDGPS1293Sysmeta.xml b/src/test/resources/org/dataone/cn/index/resources/d1_testdocs/fgdc/nasa_d_FEDGPS1293Sysmeta.xml
index 42998531..9b0dfbd6 100644
--- a/src/test/resources/org/dataone/cn/index/resources/d1_testdocs/fgdc/nasa_d_FEDGPS1293Sysmeta.xml
+++ b/src/test/resources/org/dataone/cn/index/resources/d1_testdocs/fgdc/nasa_d_FEDGPS1293Sysmeta.xml
@@ -9,8 +9,8 @@
22
www.nbii.gov_metadata_mdata_NASA_nasa_d_FEDGPS1293
FGDC-STD-001.1-1999
- 14880
- c72ff66bbe7fa99e5fb399bab8cb6f85
+ 14828
+ 1755a557c13be7af44d676bb09274b0e
CN=Dave Vieglais T799,O=Google,C=US,DC=cilogon,DC=org
CN=Dave Vieglais T799,O=Google,C=US,DC=cilogon,DC=org
diff --git a/src/test/resources/org/dataone/configuration/index-processor.properties b/src/test/resources/org/dataone/configuration/index-processor.properties
index 05cb1b1e..256dd8d8 100644
--- a/src/test/resources/org/dataone/configuration/index-processor.properties
+++ b/src/test/resources/org/dataone/configuration/index-processor.properties
@@ -42,3 +42,12 @@ index.resourcemap.namespace=http://www.w3.org/TR/rdf-syntax-grammar;http://www.o
dataone.mn.registration.serviceType.url=https://cn-sandbox-ucsb-1.test.dataone.org/mnServiceTypes.xml
cn.router.hostname=cn.dataone.org
+
+# Storage properties
+storage.className=org.dataone.hashstore.filehashstore.FileHashStore
+storage.hashstore.rootDirectory=./target/hashstore
+storage.hashstore.defaultNamespace=https://ns.dataone.org/service/types/v2.0#SystemMetadata
+# The following three properties must NOT be modified after the hash store is initialized
+storage.hashstore.fileNameAlgorithm=SHA-256
+storage.hashstore.directory.width=2
+storage.hashstore.directory.depth=3