From a918530397e3e70d5171d879b2238b3005e5c66f Mon Sep 17 00:00:00 2001 From: Bharathwaj G Date: Wed, 7 Aug 2024 12:27:21 +0530 Subject: [PATCH] Add changes to build star tree in off heap (#14817) --------- Signed-off-by: Bharathwaj G --- .../index/mapper/StarTreeMapperIT.java | 4 +- .../common/util/ByteArrayBackedBitset.java | 86 +++ .../composite/Composite99DocValuesWriter.java | 8 +- .../aggregators/CountValueAggregator.java | 5 + .../aggregators/SumValueAggregator.java | 5 + .../startree/aggregators/ValueAggregator.java | 5 + .../builder/AbstractDocumentsFileManager.java | 231 ++++++++ .../startree/builder/BaseStarTreeBuilder.java | 48 +- .../builder/OffHeapStarTreeBuilder.java | 334 ++++++++++++ .../builder/OnHeapStarTreeBuilder.java | 24 +- .../builder/SegmentDocsFileManager.java | 103 ++++ .../builder/StarTreeDocsFileManager.java | 294 ++++++++++ .../startree/builder/StarTreesBuilder.java | 13 +- .../utils/StarTreeDocumentBitSetUtil.java | 57 ++ .../utils/StarTreeDocumentsSorter.java | 66 +++ .../datacube/startree/utils/TreeNode.java | 4 + .../index/mapper/StarTreeMapper.java | 3 +- .../builder/AbstractStarTreeBuilderTests.java | 513 ++++++++++++++++-- .../builder/OffHeapStarTreeBuilderTests.java | 26 + .../builder/StarTreesBuilderTests.java | 10 +- .../SequentialDocValuesIteratorTests.java | 2 - .../StarTreeDocumentBitSetUtilTests.java | 72 +++ .../utils/StarTreeDocumentsSorterTests.java | 201 +++++++ .../index/mapper/StarTreeMapperTests.java | 4 +- 24 files changed, 2028 insertions(+), 90 deletions(-) create mode 100644 server/src/main/java/org/opensearch/common/util/ByteArrayBackedBitset.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractDocumentsFileManager.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/SegmentDocsFileManager.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeDocsFileManager.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentBitSetUtil.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorter.java create mode 100644 server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilderTests.java create mode 100644 server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentBitSetUtilTests.java create mode 100644 server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorterTests.java diff --git a/server/src/internalClusterTest/java/org/opensearch/index/mapper/StarTreeMapperIT.java b/server/src/internalClusterTest/java/org/opensearch/index/mapper/StarTreeMapperIT.java index 8e5193b650868..1cabb8b617ce3 100644 --- a/server/src/internalClusterTest/java/org/opensearch/index/mapper/StarTreeMapperIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/index/mapper/StarTreeMapperIT.java @@ -275,7 +275,7 @@ public void testValidCompositeIndex() { assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics()); assertEquals(10000, starTreeFieldType.getStarTreeConfig().maxLeafDocs()); assertEquals( - StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP, + StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode() ); assertEquals(Collections.emptySet(), starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims()); @@ -359,7 +359,7 @@ public void testUpdateIndexWhenMappingIsSame() { assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics()); assertEquals(10000, starTreeFieldType.getStarTreeConfig().maxLeafDocs()); assertEquals( - StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP, + StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode() ); assertEquals(Collections.emptySet(), starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims()); diff --git a/server/src/main/java/org/opensearch/common/util/ByteArrayBackedBitset.java b/server/src/main/java/org/opensearch/common/util/ByteArrayBackedBitset.java new file mode 100644 index 0000000000000..2d7948d414937 --- /dev/null +++ b/server/src/main/java/org/opensearch/common/util/ByteArrayBackedBitset.java @@ -0,0 +1,86 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.util; + +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; + +import java.io.IOException; + +/** + * A bitset backed by a byte array. This will initialize and set bits in the byte array based on the index. + */ +public class ByteArrayBackedBitset { + private final byte[] byteArray; + + /** + * Constructor which uses an on heap list. This should be using during construction of the bitset. + */ + public ByteArrayBackedBitset(int capacity) { + byteArray = new byte[capacity]; + } + + /** + * Constructor which set the Lucene's RandomAccessInput to read the bitset into a read-only buffer. + */ + public ByteArrayBackedBitset(RandomAccessInput in, long offset, int length) throws IOException { + byteArray = new byte[length]; + int i = 0; + while (i < length) { + byteArray[i] = in.readByte(offset + i); + i++; + } + } + + /** + * Constructor which set the Lucene's IndexInput to read the bitset into a read-only buffer. + */ + public ByteArrayBackedBitset(IndexInput in, int length) throws IOException { + byteArray = new byte[length]; + int i = 0; + while (i < length) { + byteArray[i] = in.readByte(); + i++; + } + } + + /** + * Sets the bit at the given index to 1. + * Each byte can indicate 8 bits, so the index is divided by 8 to get the byte array index. + * @param index the index to set the bit + */ + public void set(int index) { + int byteArrIndex = index >> 3; + byteArray[byteArrIndex] |= (byte) (1 << (index & 7)); + } + + public int write(IndexOutput output) throws IOException { + int numBytes = 0; + for (Byte bitSet : byteArray) { + output.writeByte(bitSet); + numBytes += Byte.BYTES; + } + return numBytes; + } + + /** + * Retrieves whether the bit is set or not at the given index. + * @param index the index to look up for the bit + * @return true if bit is set, false otherwise + */ + public boolean get(int index) throws IOException { + int byteArrIndex = index >> 3; + return (byteArray[byteArrIndex] & (1 << (index & 7))) != 0; + } + + public int getCurrBytesRead() { + return byteArray.length; + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java index 3859d3c998573..6ed1a8c42e380 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java @@ -8,8 +8,6 @@ package org.opensearch.index.codec.composite; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.DocValues; @@ -50,9 +48,9 @@ public class Composite99DocValuesWriter extends DocValuesConsumer { private final Set compositeMappedFieldTypes; private final Set compositeFieldSet; private final Set segmentFieldSet; + private final boolean segmentHasCompositeFields; private final Map fieldProducerMap = new HashMap<>(); - private static final Logger logger = LogManager.getLogger(Composite99DocValuesWriter.class); public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState segmentWriteState, MapperService mapperService) { @@ -70,6 +68,8 @@ public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState for (CompositeMappedFieldType type : compositeMappedFieldTypes) { compositeFieldSet.addAll(type.fields()); } + // check if there are any composite fields which are part of the segment + segmentHasCompositeFields = Collections.disjoint(segmentFieldSet, compositeFieldSet) == false; } @Override @@ -91,7 +91,7 @@ public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) th public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { delegate.addSortedNumericField(field, valuesProducer); // Perform this only during flush flow - if (mergeState.get() == null) { + if (mergeState.get() == null && segmentHasCompositeFields) { createCompositeIndicesIfPossible(valuesProducer, field); } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java index 5390b6728b9b6..ed159ee2efb7b 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java @@ -68,4 +68,9 @@ public Long toLongValue(Long value) { public Long toStarTreeNumericTypeValue(Long value) { return value; } + + @Override + public Long getIdentityMetricValue() { + return 0L; + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java index 385549216e4d6..a471f0e2bd960 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java @@ -103,4 +103,9 @@ public Double toStarTreeNumericTypeValue(Long value) { throw new IllegalStateException("Cannot convert " + value + " to sortable aggregation type", e); } } + + @Override + public Double getIdentityMetricValue() { + return 0D; + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java index 93230ed012b13..048582cc530e5 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java @@ -61,4 +61,9 @@ public interface ValueAggregator { * Converts an aggregated value from a Long type. */ A toStarTreeNumericTypeValue(Long rawValue); + + /** + * Fetches a value that does not alter the result of aggregations + */ + A getIdentityMetricValue(); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractDocumentsFileManager.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractDocumentsFileManager.java new file mode 100644 index 0000000000000..78c49dbada6b2 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractDocumentsFileManager.java @@ -0,0 +1,231 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.builder; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.store.TrackingDirectoryWrapper; +import org.apache.lucene.util.NumericUtils; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericTypeConverters; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeDocumentBitSetUtil; + +import java.io.Closeable; +import java.io.IOException; +import java.util.List; + +/** + * Abstract class for managing star tree file operations. + * + * @opensearch.experimental + */ +@ExperimentalApi +public abstract class AbstractDocumentsFileManager implements Closeable { + private static final Logger logger = LogManager.getLogger(AbstractDocumentsFileManager.class); + protected final StarTreeField starTreeField; + protected final List metricAggregatorInfos; + protected final int numMetrics; + protected final TrackingDirectoryWrapper tmpDirectory; + protected final SegmentWriteState state; + protected int docSizeInBytes = -1; + + public AbstractDocumentsFileManager( + SegmentWriteState state, + StarTreeField starTreeField, + List metricAggregatorInfos + ) { + this.starTreeField = starTreeField; + this.tmpDirectory = new TrackingDirectoryWrapper(state.directory); + this.metricAggregatorInfos = metricAggregatorInfos; + this.state = state; + numMetrics = metricAggregatorInfos.size(); + } + + private void setDocSizeInBytes(int numBytes) { + if (docSizeInBytes == -1) { + docSizeInBytes = numBytes; + } + assert docSizeInBytes == numBytes; + } + + /** + * Write the star tree document to file associated with dimensions and metrics + */ + protected int writeStarTreeDocument(StarTreeDocument starTreeDocument, IndexOutput output, boolean isAggregatedDoc) throws IOException { + int numBytes = writeDimensions(starTreeDocument, output); + numBytes += writeMetrics(starTreeDocument, output, isAggregatedDoc); + setDocSizeInBytes(numBytes); + return numBytes; + } + + /** + * Write dimensions to file + */ + protected int writeDimensions(StarTreeDocument starTreeDocument, IndexOutput output) throws IOException { + int numBytes = 0; + for (int i = 0; i < starTreeDocument.dimensions.length; i++) { + output.writeLong(starTreeDocument.dimensions[i] == null ? 0L : starTreeDocument.dimensions[i]); + numBytes += Long.BYTES; + } + numBytes += StarTreeDocumentBitSetUtil.writeBitSet(starTreeDocument.dimensions, output); + return numBytes; + } + + /** + * Write star tree document metrics to file + */ + protected int writeMetrics(StarTreeDocument starTreeDocument, IndexOutput output, boolean isAggregatedDoc) throws IOException { + int numBytes = 0; + for (int i = 0; i < starTreeDocument.metrics.length; i++) { + switch (metricAggregatorInfos.get(i).getValueAggregators().getAggregatedValueType()) { + case LONG: + output.writeLong(starTreeDocument.metrics[i] == null ? 0L : (Long) starTreeDocument.metrics[i]); + numBytes += Long.BYTES; + break; + case DOUBLE: + if (isAggregatedDoc) { + long val = NumericUtils.doubleToSortableLong( + starTreeDocument.metrics[i] == null ? 0.0 : (Double) starTreeDocument.metrics[i] + ); + output.writeLong(val); + } else { + output.writeLong(starTreeDocument.metrics[i] == null ? 0L : (Long) starTreeDocument.metrics[i]); + } + numBytes += Long.BYTES; + break; + default: + throw new IllegalStateException("Unsupported metric type"); + } + } + numBytes += StarTreeDocumentBitSetUtil.writeBitSet(starTreeDocument.metrics, output); + return numBytes; + } + + /** + * Reads the star tree document from file with given offset + * + * @param input RandomAccessInput + * @param offset Offset in the file + * @param isAggregatedDoc boolean to indicate if aggregated star tree docs should be read + * @return StarTreeDocument + * @throws IOException IOException in case of I/O errors + */ + protected StarTreeDocument readStarTreeDocument(RandomAccessInput input, long offset, boolean isAggregatedDoc) throws IOException { + int dimSize = starTreeField.getDimensionsOrder().size(); + Long[] dimensions = new Long[dimSize]; + long initialOffset = offset; + offset = readDimensions(dimensions, input, offset); + + Object[] metrics = new Object[numMetrics]; + offset = readMetrics(input, offset, numMetrics, metrics, isAggregatedDoc); + assert (offset - initialOffset) == docSizeInBytes; + return new StarTreeDocument(dimensions, metrics); + } + + /** + * Read dimensions from file + */ + protected long readDimensions(Long[] dimensions, RandomAccessInput input, long offset) throws IOException { + for (int i = 0; i < dimensions.length; i++) { + try { + dimensions[i] = input.readLong(offset); + } catch (Exception e) { + logger.error("Error reading dimension value at offset {} for dimension {}", offset, i); + throw e; + } + offset += Long.BYTES; + } + offset += StarTreeDocumentBitSetUtil.readBitSet(input, offset, dimensions, index -> null); + return offset; + } + + /** + * Read star tree metrics from file + */ + protected long readMetrics(RandomAccessInput input, long offset, int numMetrics, Object[] metrics, boolean isAggregatedDoc) + throws IOException { + for (int i = 0; i < numMetrics; i++) { + switch (metricAggregatorInfos.get(i).getValueAggregators().getAggregatedValueType()) { + case LONG: + metrics[i] = input.readLong(offset); + offset += Long.BYTES; + break; + case DOUBLE: + long val = input.readLong(offset); + if (isAggregatedDoc) { + metrics[i] = StarTreeNumericTypeConverters.sortableLongtoDouble(val); + } else { + metrics[i] = val; + } + offset += Long.BYTES; + break; + default: + throw new IllegalStateException("Unsupported metric type"); + } + } + offset += StarTreeDocumentBitSetUtil.readBitSet( + input, + offset, + metrics, + index -> metricAggregatorInfos.get(index).getValueAggregators().getIdentityMetricValue() + ); + return offset; + } + + /** + * Write star tree document to file + */ + public abstract void writeStarTreeDocument(StarTreeDocument starTreeDocument, boolean isAggregatedDoc) throws IOException; + + /** + * Read star tree document from file based on doc id + */ + public abstract StarTreeDocument readStarTreeDocument(int docId, boolean isAggregatedDoc) throws IOException; + + /** + * Read star document dimensions from file based on doc id + */ + public abstract Long[] readDimensions(int docId) throws IOException; + + /** + * Read dimension value for given doc id and dimension id + */ + public abstract Long getDimensionValue(int docId, int dimensionId) throws IOException; + + /** + * Delete the temporary files created + */ + public void deleteFiles(boolean success) throws IOException { + if (success) { + for (String file : tmpDirectory.getCreatedFiles()) { + tmpDirectory.deleteFile(file); + } + } else { + deleteFilesIgnoringException(); + } + + } + + /** + * Delete the temporary files created + */ + private void deleteFilesIgnoringException() throws IOException { + for (String file : tmpDirectory.getCreatedFiles()) { + try { + tmpDirectory.deleteFile(file); + } catch (final IOException ignored) {} // similar to IOUtils.deleteFilesWhileIgnoringExceptions + } + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java index 7187fade882ea..56bb46e83a9da 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java @@ -72,8 +72,7 @@ public abstract class BaseStarTreeBuilder implements StarTreeBuilder { protected final TreeNode rootNode = getNewNode(); - private final StarTreeField starTreeField; - private final MapperService mapperService; + protected final StarTreeField starTreeField; private final SegmentWriteState state; static String NUM_SEGMENT_DOCS = "numSegmentDocs"; @@ -95,7 +94,6 @@ protected BaseStarTreeBuilder(StarTreeField starTreeField, SegmentWriteState sta this.skipStarNodeCreationForDimensions = new HashSet<>(); this.totalSegmentDocs = state.segmentInfo.maxDoc(); - this.mapperService = mapperService; this.state = state; Set skipStarNodeCreationForDimensions = starTreeFieldSpec.getSkipStarNodeCreationInDims(); @@ -141,6 +139,37 @@ public List generateMetricAggregatorInfos(MapperService ma return metricAggregatorInfos; } + /** + * Get star tree document from the segment for the current docId with the dimensionReaders and metricReaders + */ + protected StarTreeDocument getStarTreeDocument( + int currentDocId, + SequentialDocValuesIterator[] dimensionReaders, + List metricReaders + ) throws IOException { + Long[] dims = new Long[numDimensions]; + int i = 0; + for (SequentialDocValuesIterator dimensionDocValueIterator : dimensionReaders) { + dimensionDocValueIterator.nextDoc(currentDocId); + Long val = dimensionDocValueIterator.value(currentDocId); + dims[i] = val; + i++; + } + i = 0; + Object[] metrics = new Object[metricReaders.size()]; + for (SequentialDocValuesIterator metricDocValuesIterator : metricReaders) { + metricDocValuesIterator.nextDoc(currentDocId); + // As part of merge, we traverse the star tree doc values + // The type of data stored in metric fields is different from the + // actual indexing field they're based on + metrics[i] = metricAggregatorInfos.get(i) + .getValueAggregators() + .toStarTreeNumericTypeValue(metricDocValuesIterator.value(currentDocId)); + i++; + } + return new StarTreeDocument(dims, metrics); + } + /** * Adds a document to the star-tree. * @@ -163,7 +192,7 @@ public List generateMetricAggregatorInfos(MapperService ma * * @return Star tree documents */ - public abstract List getStarTreeDocuments(); + public abstract List getStarTreeDocuments() throws IOException; /** * Returns the value of the dimension for the given dimension id and document in the star-tree. @@ -330,8 +359,13 @@ protected StarTreeDocument reduceSegmentStarTreeDocuments( * @return converted metric value to long */ private static long getLong(Object metric) { - Long metricValue = null; + // TODO : remove this after we merge identity changes + if (metric instanceof Double) { + if (0D == (double) metric) { + return 0L; + } + } try { if (metric instanceof Long) { metricValue = (long) metric; @@ -709,4 +743,8 @@ public void close() throws IOException { } abstract Iterator mergeStarTrees(List starTreeValues) throws IOException; + + public TreeNode getRootNode() { + return rootNode; + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java new file mode 100644 index 0000000000000..f63b0cb0cc77d --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java @@ -0,0 +1,334 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.builder; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.search.DocIdSetIterator; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.util.io.IOUtils; +import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeDocumentsSorter; +import org.opensearch.index.mapper.MapperService; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +/** + * Off-heap implementation of the star tree builder. + * @opensearch.experimental + */ +@ExperimentalApi +public class OffHeapStarTreeBuilder extends BaseStarTreeBuilder { + private static final Logger logger = LogManager.getLogger(OffHeapStarTreeBuilder.class); + private final StarTreeDocsFileManager starTreeDocumentFileManager; + private final SegmentDocsFileManager segmentDocumentFileManager; + + /** + * Builds star tree based on star tree field configuration consisting of dimensions, metrics and star tree index + * specific configuration. + * + * @param starTreeField holds the configuration for the star tree + * @param state stores the segment write state + * @param mapperService helps to find the original type of the field + */ + protected OffHeapStarTreeBuilder(StarTreeField starTreeField, SegmentWriteState state, MapperService mapperService) throws IOException { + super(starTreeField, state, mapperService); + segmentDocumentFileManager = new SegmentDocsFileManager(state, starTreeField, metricAggregatorInfos); + try { + starTreeDocumentFileManager = new StarTreeDocsFileManager(state, starTreeField, metricAggregatorInfos); + } catch (IOException e) { + IOUtils.closeWhileHandlingException(segmentDocumentFileManager); + throw e; + } + + } + + @Override + public void appendStarTreeDocument(StarTreeDocument starTreeDocument) throws IOException { + starTreeDocumentFileManager.writeStarTreeDocument(starTreeDocument, true); + } + + /** + * Builds star tree based on the star tree values from multiple segments + * + * @param starTreeValuesSubs contains the star tree values from multiple segments + */ + @Override + public void build(List starTreeValuesSubs) throws IOException { + boolean success = false; + try { + build(mergeStarTrees(starTreeValuesSubs)); + success = true; + } finally { + starTreeDocumentFileManager.deleteFiles(success); + segmentDocumentFileManager.deleteFiles(success); + } + } + + /** + * Sorts and aggregates all the documents of the segment based on dimension and metrics configuration + * + * @param dimensionReaders List of docValues readers to read dimensions from the segment + * @param metricReaders List of docValues readers to read metrics from the segment + * @return Iterator of star-tree documents + */ + @Override + public Iterator sortAndAggregateSegmentDocuments( + SequentialDocValuesIterator[] dimensionReaders, + List metricReaders + ) throws IOException { + // Write all dimensions for segment documents into the buffer, + // and sort all documents using an int array + int[] sortedDocIds = new int[totalSegmentDocs]; + for (int i = 0; i < totalSegmentDocs; i++) { + sortedDocIds[i] = i; + } + try { + for (int i = 0; i < totalSegmentDocs; i++) { + StarTreeDocument document = getSegmentStarTreeDocument(i, dimensionReaders, metricReaders); + segmentDocumentFileManager.writeStarTreeDocument(document, false); + } + } catch (IOException ex) { + segmentDocumentFileManager.close(); + throw ex; + } + // Create an iterator for aggregated documents + return sortAndReduceDocuments(sortedDocIds, totalSegmentDocs, false); + } + + /** + * Sorts and aggregates the star-tree documents from multiple segments and builds star tree based on the newly + * aggregated star-tree documents + * + * @param starTreeValuesSubs StarTreeValues from multiple segments + * @return iterator of star tree documents + */ + Iterator mergeStarTrees(List starTreeValuesSubs) throws IOException { + int numDocs = 0; + int[] docIds; + try { + for (StarTreeValues starTreeValues : starTreeValuesSubs) { + List dimensionsSplitOrder = starTreeValues.getStarTreeField().getDimensionsOrder(); + SequentialDocValuesIterator[] dimensionReaders = new SequentialDocValuesIterator[starTreeValues.getStarTreeField() + .getDimensionsOrder() + .size()]; + for (int i = 0; i < dimensionsSplitOrder.size(); i++) { + String dimension = dimensionsSplitOrder.get(i).getField(); + dimensionReaders[i] = new SequentialDocValuesIterator(starTreeValues.getDimensionDocValuesIteratorMap().get(dimension)); + } + List metricReaders = new ArrayList<>(); + for (Map.Entry metricDocValuesEntry : starTreeValues.getMetricDocValuesIteratorMap().entrySet()) { + metricReaders.add(new SequentialDocValuesIterator(metricDocValuesEntry.getValue())); + } + int currentDocId = 0; + int numSegmentDocs = Integer.parseInt( + starTreeValues.getAttributes().getOrDefault(NUM_SEGMENT_DOCS, String.valueOf(DocIdSetIterator.NO_MORE_DOCS)) + ); + while (currentDocId < numSegmentDocs) { + StarTreeDocument starTreeDocument = getStarTreeDocument(currentDocId, dimensionReaders, metricReaders); + segmentDocumentFileManager.writeStarTreeDocument(starTreeDocument, true); + numDocs++; + currentDocId++; + } + } + docIds = new int[numDocs]; + for (int i = 0; i < numDocs; i++) { + docIds[i] = i; + } + } catch (IOException ex) { + segmentDocumentFileManager.close(); + throw ex; + } + + if (numDocs == 0) { + return Collections.emptyIterator(); + } + + return sortAndReduceDocuments(docIds, numDocs, true); + } + + /** + * Sorts and reduces the star tree documents based on the dimensions + */ + private Iterator sortAndReduceDocuments(int[] sortedDocIds, int numDocs, boolean isMerge) throws IOException { + try { + if (sortedDocIds == null || sortedDocIds.length == 0) { + logger.debug("Sorted doc ids array is null"); + return Collections.emptyIterator(); + } + try { + StarTreeDocumentsSorter.sort(sortedDocIds, -1, numDocs, index -> { + try { + return segmentDocumentFileManager.readDimensions(sortedDocIds[index]); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + } catch (UncheckedIOException ex) { + // Unwrap UncheckedIOException and throw as IOException + if (ex.getCause() != null) { + throw ex.getCause(); + } + throw ex; + } + final StarTreeDocument currentDocument = segmentDocumentFileManager.readStarTreeDocument(sortedDocIds[0], isMerge); + // Create an iterator for aggregated documents + return new Iterator() { + StarTreeDocument tempCurrentDocument = currentDocument; + boolean hasNext = true; + int docId = 1; + + @Override + public boolean hasNext() { + return hasNext; + } + + @Override + public StarTreeDocument next() { + StarTreeDocument next = reduceSegmentStarTreeDocuments(null, tempCurrentDocument, isMerge); + while (docId < numDocs) { + StarTreeDocument doc; + try { + doc = segmentDocumentFileManager.readStarTreeDocument(sortedDocIds[docId++], isMerge); + } catch (IOException e) { + throw new RuntimeException("Reducing documents failed ", e); + } + if (!Arrays.equals(doc.dimensions, next.dimensions)) { + tempCurrentDocument = doc; + return next; + } else { + next = reduceSegmentStarTreeDocuments(next, doc, isMerge); + } + } + hasNext = false; + try { + segmentDocumentFileManager.close(); + } catch (IOException ex) { + logger.error("Closing segment documents file failed", ex); + } + return next; + } + }; + } catch (IOException ex) { + IOUtils.closeWhileHandlingException(segmentDocumentFileManager); + throw ex; + } + } + + /** + * Get star tree document for the given docId from the star-tree.documents file + */ + @Override + public StarTreeDocument getStarTreeDocument(int docId) throws IOException { + return starTreeDocumentFileManager.readStarTreeDocument(docId, true); + } + + // This should be only used for testing + @Override + public List getStarTreeDocuments() throws IOException { + List starTreeDocuments = new ArrayList<>(); + for (int i = 0; i < numStarTreeDocs; i++) { + starTreeDocuments.add(getStarTreeDocument(i)); + } + return starTreeDocuments; + } + + @Override + public Long getDimensionValue(int docId, int dimensionId) throws IOException { + return starTreeDocumentFileManager.getDimensionValue(docId, dimensionId); + } + + /** + * Generates a star-tree for a given star-node + * + * @param startDocId Start document id in the star-tree + * @param endDocId End document id (exclusive) in the star-tree + * @param dimensionId Dimension id of the star-node + * @return iterator for star-tree documents of star-node + * @throws IOException throws when unable to generate star-tree for star-node + */ + @Override + public Iterator generateStarTreeDocumentsForStarNode(int startDocId, int endDocId, int dimensionId) + throws IOException { + // Sort all documents using an int array + int numDocs = endDocId - startDocId; + int[] sortedDocIds = new int[numDocs]; + for (int i = 0; i < numDocs; i++) { + sortedDocIds[i] = startDocId + i; + } + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, index -> { + try { + return starTreeDocumentFileManager.readDimensions(sortedDocIds[index]); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + // Create an iterator for aggregated documents + return new Iterator() { + boolean hasNext = true; + StarTreeDocument currentDocument = getStarTreeDocument(sortedDocIds[0]); + int docId = 1; + + private boolean hasSameDimensions(StarTreeDocument document1, StarTreeDocument document2) { + for (int i = dimensionId + 1; i < starTreeField.getDimensionsOrder().size(); i++) { + if (!Objects.equals(document1.dimensions[i], document2.dimensions[i])) { + return false; + } + } + return true; + } + + @Override + public boolean hasNext() { + return hasNext; + } + + @Override + public StarTreeDocument next() { + StarTreeDocument next = reduceStarTreeDocuments(null, currentDocument); + next.dimensions[dimensionId] = STAR_IN_DOC_VALUES_INDEX; + while (docId < numDocs) { + StarTreeDocument document; + try { + document = getStarTreeDocument(sortedDocIds[docId++]); + } catch (IOException e) { + throw new RuntimeException(e); + } + if (!hasSameDimensions(document, currentDocument)) { + currentDocument = document; + return next; + } else { + next = reduceStarTreeDocuments(next, document); + } + } + hasNext = false; + return next; + } + }; + } + + @Override + public void close() throws IOException { + IOUtils.closeWhileHandlingException(starTreeDocumentFileManager, segmentDocumentFileManager); + super.close(); + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java index 1599be2e76a56..8ff111d3b41d9 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java @@ -127,34 +127,12 @@ StarTreeDocument[] getSegmentsStarTreeDocuments(List starTreeVal metricReaders.add(new SequentialDocValuesIterator(metricDocValuesEntry.getValue())); } - boolean endOfDoc = false; int currentDocId = 0; int numSegmentDocs = Integer.parseInt( starTreeValues.getAttributes().getOrDefault(NUM_SEGMENT_DOCS, String.valueOf(DocIdSetIterator.NO_MORE_DOCS)) ); while (currentDocId < numSegmentDocs) { - Long[] dims = new Long[dimensionsSplitOrder.size()]; - int i = 0; - for (SequentialDocValuesIterator dimensionDocValueIterator : dimensionReaders) { - dimensionDocValueIterator.nextDoc(currentDocId); - Long val = dimensionDocValueIterator.value(currentDocId); - dims[i] = val; - i++; - } - i = 0; - Object[] metrics = new Object[metricReaders.size()]; - for (SequentialDocValuesIterator metricDocValuesIterator : metricReaders) { - metricDocValuesIterator.nextDoc(currentDocId); - // As part of merge, we traverse the star tree doc values - // The type of data stored in metric fields is different from the - // actual indexing field they're based on - metrics[i] = metricAggregatorInfos.get(i) - .getValueAggregators() - .toStarTreeNumericTypeValue(metricDocValuesIterator.value(currentDocId)); - i++; - } - StarTreeDocument starTreeDocument = new StarTreeDocument(dims, metrics); - starTreeDocuments.add(starTreeDocument); + starTreeDocuments.add(getStarTreeDocument(currentDocId, dimensionReaders, metricReaders)); currentDocId++; } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/SegmentDocsFileManager.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/SegmentDocsFileManager.java new file mode 100644 index 0000000000000..fe94df57d9535 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/SegmentDocsFileManager.java @@ -0,0 +1,103 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.builder; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.util.io.IOUtils; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; + +import java.io.Closeable; +import java.io.IOException; +import java.util.List; + +/** + * Class for managing segment documents file. + * Segment documents are stored in a single file named 'segment.documents' for sorting and aggregation. A document ID array is created, + * and the document IDs in the array are swapped during sorting based on the actual segment document values in the file. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class SegmentDocsFileManager extends AbstractDocumentsFileManager implements Closeable { + + private static final Logger logger = LogManager.getLogger(SegmentDocsFileManager.class); + private static final String SEGMENT_DOC_FILE_NAME = "segment.documents"; + private IndexInput segmentDocsFileInput; + private RandomAccessInput segmentRandomInput; + final IndexOutput segmentDocsFileOutput; + + public SegmentDocsFileManager(SegmentWriteState state, StarTreeField starTreeField, List metricAggregatorInfos) + throws IOException { + super(state, starTreeField, metricAggregatorInfos); + try { + segmentDocsFileOutput = tmpDirectory.createTempOutput(SEGMENT_DOC_FILE_NAME, state.segmentSuffix, state.context); + } catch (IOException e) { + IOUtils.closeWhileHandlingException(this); + throw e; + } + } + + @Override + public void writeStarTreeDocument(StarTreeDocument starTreeDocument, boolean isAggregatedDoc) throws IOException { + writeStarTreeDocument(starTreeDocument, segmentDocsFileOutput, isAggregatedDoc); + } + + private void maybeInitializeSegmentInput() throws IOException { + try { + if (segmentDocsFileInput == null) { + IOUtils.closeWhileHandlingException(segmentDocsFileOutput); + segmentDocsFileInput = tmpDirectory.openInput(segmentDocsFileOutput.getName(), state.context); + segmentRandomInput = segmentDocsFileInput.randomAccessSlice(0, segmentDocsFileInput.length()); + } + } catch (IOException e) { + IOUtils.closeWhileHandlingException(this); + throw e; + } + } + + @Override + public StarTreeDocument readStarTreeDocument(int docId, boolean isAggregatedDoc) throws IOException { + maybeInitializeSegmentInput(); + return readStarTreeDocument(segmentRandomInput, (long) docId * docSizeInBytes, isAggregatedDoc); + } + + @Override + public Long[] readDimensions(int docId) throws IOException { + maybeInitializeSegmentInput(); + Long[] dims = new Long[starTreeField.getDimensionsOrder().size()]; + readDimensions(dims, segmentRandomInput, (long) docId * docSizeInBytes); + return dims; + } + + @Override + public Long getDimensionValue(int docId, int dimensionId) throws IOException { + Long[] dims = readDimensions(docId); + return dims[dimensionId]; + } + + @Override + public void close() throws IOException { + try { + if (this.segmentDocsFileOutput != null) { + IOUtils.closeWhileHandlingException(segmentDocsFileOutput); + tmpDirectory.deleteFile(segmentDocsFileOutput.getName()); + } + } finally { + IOUtils.closeWhileHandlingException(segmentDocsFileInput, segmentDocsFileOutput); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeDocsFileManager.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeDocsFileManager.java new file mode 100644 index 0000000000000..779ed77b0540a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeDocsFileManager.java @@ -0,0 +1,294 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.builder; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.opensearch.common.util.io.IOUtils; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; + +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * Star tree document file manager. + * This class manages all the temporary files associated with off heap star tree builder. + *

+ * Star tree documents are stored in multiple 'star-tree.documents' files. The algorithm works as follows: + *

    + *
  1. Initially, aggregated documents are created based on the segment documents.
  2. + *
  3. Further, star tree documents are generated (e.g., in the {@code generateStarTreeDocumentsForStarNode} method) by reading the current + * aggregated documents and creating new aggregated star tree documents, which are appended to the 'star-tree.documents' files.
  4. + *
  5. This process is repeated until all combinations of star tree documents are generated.
  6. + *
+ *

In cases where previously written star tree documents need to be read from the 'star-tree.documents' files, the current + * 'star-tree.documents' file is closed, and the values are read. Then, the derived values gets appended to a new 'star-tree.documents' file. + * This is necessary because Lucene maintains immutability of data, and an {@code IndexOutput} cannot be kept open while creating an + * {@code IndexInput} on the same file, as all file contents may not be visible in the reader. Therefore, the {@code IndexOutput} must be + * closed to ensure all data can be read before creating an {@code IndexInput}. Additionally, an {@code IndexOutput} cannot be reopened, + * so a new file is created for the new star tree documents. + *

The set of 'star-tree.documents' files is maintained, and a tracker array is used to keep track of the start document ID for each file. + * Once the number of files reaches a set threshold, the files are merged. + * + */ +public class StarTreeDocsFileManager extends AbstractDocumentsFileManager implements Closeable { + private static final Logger logger = LogManager.getLogger(StarTreeDocsFileManager.class); + private static final String STAR_TREE_DOC_FILE_NAME = "star-tree.documents"; + public static final int DEFAULT_FILE_COUNT_MERGE_THRESHOLD = 5; + private IndexInput starTreeDocsFileInput; + private RandomAccessInput starTreeDocsFileRandomInput; + private IndexOutput starTreeDocsFileOutput; + private final Map fileToEndDocIdMap; + private final List starTreeDocumentOffsets = new ArrayList<>(); + private int currentFileStartDocId; + private int numReadableStarTreeDocuments; + private int starTreeFileCount = -1; + private int currBytes = 0; + private final int fileCountMergeThreshold; + private int numStarTreeDocs = 0; + + public StarTreeDocsFileManager(SegmentWriteState state, StarTreeField starTreeField, List metricAggregatorInfos) + throws IOException { + this(state, starTreeField, metricAggregatorInfos, DEFAULT_FILE_COUNT_MERGE_THRESHOLD); + } + + public StarTreeDocsFileManager( + SegmentWriteState state, + StarTreeField starTreeField, + List metricAggregatorInfos, + int fileCountThreshold + ) throws IOException { + super(state, starTreeField, metricAggregatorInfos); + fileToEndDocIdMap = new LinkedHashMap<>(); + try { + starTreeDocsFileOutput = createStarTreeDocumentsFileOutput(); + } catch (IOException e) { + IOUtils.closeWhileHandlingException(starTreeDocsFileOutput); + IOUtils.closeWhileHandlingException(this); + throw e; + } + fileCountMergeThreshold = fileCountThreshold; + } + + /** + * Creates a new star tree document temporary file to store star tree documents. + */ + IndexOutput createStarTreeDocumentsFileOutput() throws IOException { + starTreeFileCount++; + return tmpDirectory.createTempOutput(STAR_TREE_DOC_FILE_NAME + starTreeFileCount, state.segmentSuffix, state.context); + } + + @Override + public void writeStarTreeDocument(StarTreeDocument starTreeDocument, boolean isAggregatedDoc) throws IOException { + assert isAggregatedDoc == true; + int numBytes = writeStarTreeDocument(starTreeDocument, starTreeDocsFileOutput, true); + addStarTreeDocumentOffset(numBytes); + numStarTreeDocs++; + } + + @Override + public StarTreeDocument readStarTreeDocument(int docId, boolean isAggregatedDoc) throws IOException { + assert isAggregatedDoc == true; + ensureDocumentReadable(docId); + return readStarTreeDocument(starTreeDocsFileRandomInput, starTreeDocumentOffsets.get(docId), true); + } + + @Override + public Long getDimensionValue(int docId, int dimensionId) throws IOException { + Long[] dims = readDimensions(docId); + return dims[dimensionId]; + } + + @Override + public Long[] readDimensions(int docId) throws IOException { + ensureDocumentReadable(docId); + Long[] dims = new Long[starTreeField.getDimensionsOrder().size()]; + readDimensions(dims, starTreeDocsFileRandomInput, starTreeDocumentOffsets.get(docId)); + return dims; + } + + private void addStarTreeDocumentOffset(int bytes) { + starTreeDocumentOffsets.add(currBytes); + currBytes += bytes; + if (docSizeInBytes == -1) { + docSizeInBytes = bytes; + } + assert docSizeInBytes == bytes; + } + + /** + * Load the correct StarTreeDocuments file based on the docId + */ + private void ensureDocumentReadable(int docId) throws IOException { + ensureDocumentReadable(docId, true); + } + + /** + * Load the correct StarTreeDocuments file based on the docId + * "currentFileStartDocId" and "numReadableStarTreeDocuments" tracks the "start doc id" and "end doc id + 1" + * of the range in the current open 'star-tree.documents' file + */ + private void ensureDocumentReadable(int docId, boolean shouldCreateFileOutput) throws IOException { + try { + if (docId >= currentFileStartDocId && docId < numReadableStarTreeDocuments) { + return; + } + IOUtils.closeWhileHandlingException(starTreeDocsFileInput); + starTreeDocsFileInput = null; + if (docId < numStarTreeDocs) { + loadStarTreeDocumentFile(docId); + } + if (starTreeDocsFileInput != null) { + return; + } + closeAndMaybeCreateNewFile(shouldCreateFileOutput, numStarTreeDocs); + loadStarTreeDocumentFile(docId); + } catch (IOException ex) { + IOUtils.closeWhileHandlingException(this); + throw ex; + } + } + + /** + * The fileToByteSizeMap is in the following format + * file1 == 521 [ contains docs from 0 to 520 ] + * file2 == 780 [ contains docs from 521 to 779 ] + *

+ * This method loads the correct 'star-tree.documents' file based on the docId + * and updates the "currentFileStartDocId" and "numReadableStarTreeDocuments" + */ + private void loadStarTreeDocumentFile(int docId) throws IOException { + int currentFileStartDocId = 0; + for (Map.Entry entry : fileToEndDocIdMap.entrySet()) { + if (docId < entry.getValue()) { + starTreeDocsFileInput = tmpDirectory.openInput(entry.getKey(), state.context); + starTreeDocsFileRandomInput = starTreeDocsFileInput.randomAccessSlice( + starTreeDocsFileInput.getFilePointer(), + starTreeDocsFileInput.length() - starTreeDocsFileInput.getFilePointer() + ); + numReadableStarTreeDocuments = entry.getValue(); + break; + } + currentFileStartDocId = entry.getValue(); + } + this.currentFileStartDocId = currentFileStartDocId; + } + + /** + * This case handles when the requested document ID is beyond the range of the currently open 'star-tree.documents' file. + * In this scenario, the following steps are taken: + *

+ * 1. Close the current 'star-tree.documents' file. + * 2. Create a new 'star-tree.documents' file if the operation involves appending new documents. + * If the operation is only for reading existing documents, a new file is not created. + */ + private void closeAndMaybeCreateNewFile(boolean shouldCreateFileForAppend, int numStarTreeDocs) throws IOException { + currBytes = 0; + if (starTreeDocsFileOutput != null) { + fileToEndDocIdMap.put(starTreeDocsFileOutput.getName(), numStarTreeDocs); + IOUtils.close(starTreeDocsFileOutput); + } + if (shouldCreateFileForAppend) { + starTreeDocsFileOutput = createStarTreeDocumentsFileOutput(); + if (fileToEndDocIdMap.size() >= fileCountMergeThreshold) { + mergeFiles(numStarTreeDocs); + } + } + if (starTreeDocsFileRandomInput != null) { + starTreeDocsFileRandomInput = null; + } + } + + /** + * Merge temporary star tree files once the number of files reach threshold + */ + private void mergeFiles(int numStarTreeDocs) throws IOException { + long st = System.currentTimeMillis(); + try (IndexOutput mergedOutput = createStarTreeDocumentsFileOutput()) { + long mergeBytes = mergeFilesToOutput(mergedOutput); + logger.debug( + "Created merge file : {} in : {} ms with size of : {} KB", + starTreeDocsFileOutput.getName(), + System.currentTimeMillis() - st, + mergeBytes / 1024 + ); + + deleteOldFiles(); + fileToEndDocIdMap.clear(); + fileToEndDocIdMap.put(mergedOutput.getName(), numStarTreeDocs); + resetStarTreeDocumentOffsets(); + } + } + + /** + * Merge all files to single IndexOutput + */ + private long mergeFilesToOutput(IndexOutput mergedOutput) throws IOException { + long mergeBytes = 0L; + for (Map.Entry entry : fileToEndDocIdMap.entrySet()) { + IndexInput input = tmpDirectory.openInput(entry.getKey(), state.context); + mergedOutput.copyBytes(input, input.length()); + mergeBytes += input.length(); + input.close(); + } + return mergeBytes; + } + + /** + * Delete the old star-tree.documents files + */ + private void deleteOldFiles() throws IOException { + for (String fileName : fileToEndDocIdMap.keySet()) { + tmpDirectory.deleteFile(fileName); + } + } + + /** + * Reset the star tree document offsets based on the merged file + */ + private void resetStarTreeDocumentOffsets() { + int curr = 0; + for (int i = 0; i < starTreeDocumentOffsets.size(); i++) { + starTreeDocumentOffsets.set(i, curr); + curr += docSizeInBytes; + } + } + + @Override + public void close() { + try { + if (starTreeDocsFileOutput != null) { + IOUtils.closeWhileHandlingException(starTreeDocsFileOutput); + try { + tmpDirectory.deleteFile(starTreeDocsFileOutput.getName()); + } catch (IOException ignored) {} // similar to IOUtils.deleteFilesIgnoringExceptions + } + } finally { + IOUtils.closeWhileHandlingException(starTreeDocsFileInput, starTreeDocsFileOutput); + } + // Delete all temporary star tree document files + for (String file : fileToEndDocIdMap.keySet()) { + try { + tmpDirectory.deleteFile(file); + } catch (IOException ignored) {} // similar to IOUtils.deleteFilesIgnoringExceptions + } + starTreeDocumentOffsets.clear(); + fileToEndDocIdMap.clear(); + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java index 6c3d476aa3a55..3b376d7c34351 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java @@ -75,7 +75,7 @@ public void build(Map fieldProducerMap) throws IOExce // Build all star-trees for (StarTreeField starTreeField : starTreeFields) { - try (StarTreeBuilder starTreeBuilder = getSingleTreeBuilder(starTreeField, state, mapperService)) { + try (StarTreeBuilder starTreeBuilder = getStarTreeBuilder(starTreeField, state, mapperService)) { starTreeBuilder.build(fieldProducerMap); } } @@ -102,9 +102,9 @@ public void buildDuringMerge(final Map> starTreeVal continue; } StarTreeField starTreeField = starTreeValuesList.get(0).getStarTreeField(); - StarTreeBuilder builder = getSingleTreeBuilder(starTreeField, state, mapperService); - builder.build(starTreeValuesList); - builder.close(); + try (StarTreeBuilder builder = getStarTreeBuilder(starTreeField, state, mapperService)) { + builder.build(starTreeValuesList); + } } logger.debug( "Took {} ms to merge {} star-trees with star-tree fields", @@ -116,14 +116,13 @@ public void buildDuringMerge(final Map> starTreeVal /** * Get star-tree builder based on build mode. */ - StarTreeBuilder getSingleTreeBuilder(StarTreeField starTreeField, SegmentWriteState state, MapperService mapperService) + StarTreeBuilder getStarTreeBuilder(StarTreeField starTreeField, SegmentWriteState state, MapperService mapperService) throws IOException { switch (starTreeField.getStarTreeConfig().getBuildMode()) { case ON_HEAP: return new OnHeapStarTreeBuilder(starTreeField, state, mapperService); case OFF_HEAP: - // TODO - // return new OffHeapStarTreeBuilder(starTreeField, state, mapperService); + return new OffHeapStarTreeBuilder(starTreeField, state, mapperService); default: throw new IllegalArgumentException( String.format( diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentBitSetUtil.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentBitSetUtil.java new file mode 100644 index 0000000000000..a508e497adcdf --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentBitSetUtil.java @@ -0,0 +1,57 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.utils; + +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.opensearch.common.util.ByteArrayBackedBitset; + +import java.io.IOException; +import java.util.function.Function; + +/** + * Helper class to read/write bitset for null values and identity values. + */ +public class StarTreeDocumentBitSetUtil { + /** + * Write bitset for null values. + * + * @param array array of objects + * @param output output stream + * @return number of bytes written + * @throws IOException if an I/O error occurs while writing to the output stream + */ + public static int writeBitSet(Object[] array, IndexOutput output) throws IOException { + ByteArrayBackedBitset bitset = new ByteArrayBackedBitset(getLength(array)); + for (int i = 0; i < array.length; i++) { + if (array[i] == null) { + bitset.set(i); + } + } + return bitset.write(output); + } + + /** + * Set identity values based on bitset. + */ + public static int readBitSet(RandomAccessInput input, long offset, Object[] array, Function identityValueSupplier) + throws IOException { + ByteArrayBackedBitset bitset = new ByteArrayBackedBitset(input, offset, getLength(array)); + for (int i = 0; i < array.length; i++) { + if (bitset.get(i)) { + array[i] = identityValueSupplier.apply(i); + } + } + return bitset.getCurrBytesRead(); + } + + private static int getLength(Object[] array) { + return (array.length / 8) + (array.length % 8 == 0 ? 0 : 1); + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorter.java new file mode 100644 index 0000000000000..7b1c63bc611ee --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorter.java @@ -0,0 +1,66 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.utils; + +import org.apache.lucene.util.IntroSorter; + +import java.util.Objects; +import java.util.function.IntFunction; + +/** + * Utility class for building star tree + */ +public class StarTreeDocumentsSorter { + /** + * Sort documents based on the dimension values off heap using intro sorter. + */ + public static void sort( + final int[] sortedDocIds, + final int dimensionId, + final int numDocs, + final IntFunction dimensionsReader + ) { + new IntroSorter() { + private Long[] dimensions; + + @Override + protected void swap(int i, int j) { + int temp = sortedDocIds[i]; + sortedDocIds[i] = sortedDocIds[j]; + sortedDocIds[j] = temp; + } + + @Override + protected void setPivot(int i) { + dimensions = dimensionsReader.apply(i); + } + + @Override + protected int comparePivot(int j) { + Long[] currentDimensions = dimensionsReader.apply(j); + for (int i = dimensionId + 1; i < dimensions.length; i++) { + Long dimension = currentDimensions[i]; + if (!Objects.equals(dimensions[i], dimension)) { + if (dimensions[i] == null && dimension == null) { + return 0; + } + if (dimension == null) { + return -1; + } + if (dimensions[i] == null) { + return 1; + } + return Long.compare(dimensions[i], dimension); + } + } + return 0; + } + }.sort(0, numDocs); + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java index 5cf737c61ab2d..a5d59a2602633 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java @@ -62,4 +62,8 @@ public class TreeNode { * A map containing the child nodes of this star-tree node, keyed by their dimension id. */ public Map children; + + public long getDimensionValue() { + return dimensionValue; + } } diff --git a/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java b/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java index d2debe762e9be..d9539f9dc0c82 100644 --- a/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java @@ -84,8 +84,7 @@ public static class Builder extends ParametrizedFieldMapper.Builder { List.of(XContentMapValues.nodeStringArrayValue(paramMap.getOrDefault(SKIP_STAR_NODE_IN_DIMS, new ArrayList()))) ); paramMap.remove(SKIP_STAR_NODE_IN_DIMS); - // TODO : change this to off heap once off heap gets implemented - StarTreeFieldConfiguration.StarTreeBuildMode buildMode = StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP; + StarTreeFieldConfiguration.StarTreeBuildMode buildMode = StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP; List dimensions = buildDimensions(name, paramMap, context); paramMap.remove(ORDERED_DIMENSIONS); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java index 76a7875919a8b..131d7444ff91c 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java @@ -55,6 +55,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Objects; @@ -355,19 +356,21 @@ public void test_sortAndAggregateStarTreeDocuments_nullMetricField() throws IOEx } } - public void test_sortAndAggregateStarTreeDocuments_nullDimensionField() throws IOException { + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/14813") + public void test_sortAndAggregateStarTreeDocuments_nullAndMinusOneInDimensionField() throws IOException { int noOfStarTreeDocuments = 5; StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; // Setting second metric iterator as empty sorted numeric , indicating a metric field is null starTreeDocuments[0] = new StarTreeDocument(new Long[] { 2L, null, 3L, 4L }, new Double[] { 12.0, null, randomDouble() }); - starTreeDocuments[1] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 10.0, null, randomDouble() }); - starTreeDocuments[2] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 14.0, null, randomDouble() }); + starTreeDocuments[1] = new StarTreeDocument(new Long[] { null, 4L, 2L, 1L }, new Double[] { 10.0, null, randomDouble() }); + starTreeDocuments[2] = new StarTreeDocument(new Long[] { null, 4L, 2L, 1L }, new Double[] { 14.0, null, randomDouble() }); starTreeDocuments[3] = new StarTreeDocument(new Long[] { 2L, null, 3L, 4L }, new Double[] { 9.0, null, randomDouble() }); - starTreeDocuments[4] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 11.0, null, randomDouble() }); + starTreeDocuments[4] = new StarTreeDocument(new Long[] { -1L, 4L, 2L, 1L }, new Double[] { 11.0, null, randomDouble() }); List inorderStarTreeDocuments = List.of( new StarTreeDocument(new Long[] { 2L, null, 3L, 4L }, new Object[] { 21.0, 0.0, 2L }), - new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Object[] { 35.0, 0.0, 3L }) + new StarTreeDocument(new Long[] { null, 4L, 2L, 1L }, new Object[] { 24.0, 0.0, 2L }), + new StarTreeDocument(new Long[] { -1L, 4L, 2L, 1L }, new Object[] { 11.0, 0.0, 1L }) ); Iterator expectedStarTreeDocumentIterator = inorderStarTreeDocuments.iterator(); @@ -388,8 +391,7 @@ public void test_sortAndAggregateStarTreeDocuments_nullDimensionField() throws I metricsIterators ); - while (segmentStarTreeDocumentIterator.hasNext() && expectedStarTreeDocumentIterator.hasNext()) { - StarTreeDocument resultStarTreeDocument = segmentStarTreeDocumentIterator.next(); + for (StarTreeDocument resultStarTreeDocument : builder.getStarTreeDocuments()) { StarTreeDocument expectedStarTreeDocument = expectedStarTreeDocumentIterator.next(); assertEquals(expectedStarTreeDocument.dimensions[0], resultStarTreeDocument.dimensions[0]); assertEquals(expectedStarTreeDocument.dimensions[1], resultStarTreeDocument.dimensions[1]); @@ -399,6 +401,8 @@ public void test_sortAndAggregateStarTreeDocuments_nullDimensionField() throws I assertEquals(expectedStarTreeDocument.metrics[1], resultStarTreeDocument.metrics[1]); assertEquals(expectedStarTreeDocument.metrics[2], resultStarTreeDocument.metrics[2]); } + builder.build(segmentStarTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 4, 1, builder.getStarTreeDocuments()); } public void test_sortAndAggregateStarTreeDocuments_nullDimensionsAndNullMetrics() throws IOException { @@ -411,7 +415,9 @@ public void test_sortAndAggregateStarTreeDocuments_nullDimensionsAndNullMetrics( starTreeDocuments[3] = new StarTreeDocument(new Long[] { null, null, null, null }, new Double[] { null, null, null }); starTreeDocuments[4] = new StarTreeDocument(new Long[] { null, null, null, null }, new Double[] { null, null, null }); - List inorderStarTreeDocuments = List.of(); + List inorderStarTreeDocuments = List.of( + new StarTreeDocument(new Long[] { null, null, null, null }, new Object[] { 0.0, 0.0, 5L }) + ); Iterator expectedStarTreeDocumentIterator = inorderStarTreeDocuments.iterator(); StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; @@ -446,6 +452,8 @@ public void test_sortAndAggregateStarTreeDocuments_nullDimensionsAndNullMetrics( assertEquals(expectedStarTreeDocument.metrics[1], resultStarTreeDocument.metrics[1]); assertEquals(expectedStarTreeDocument.metrics[2], resultStarTreeDocument.metrics[2]); } + builder.build(segmentStarTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 4, 1, builder.getStarTreeDocuments()); } public void test_sortAndAggregateStarTreeDocuments_emptyDimensions() throws IOException { @@ -595,6 +603,8 @@ public void test_sortAndAggregateStarTreeDocument_DoubleMaxAndDoubleMinMetrics() } assertEquals(inorderStarTreeDocuments.size(), numOfAggregatedDocuments); + builder.build(segmentStarTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 3, 1, builder.getStarTreeDocuments()); } @@ -671,6 +681,7 @@ public void test_build_halfFloatMetrics() throws IOException { Iterator expectedStarTreeDocumentIterator = getExpectedStarTreeDocumentIterator(); assertStarTreeDocuments(resultStarTreeDocuments, expectedStarTreeDocumentIterator); + builder.build(expectedStarTreeDocumentIterator); } public void test_build_floatMetrics() throws IOException { @@ -975,6 +986,7 @@ public void test_build_starTreeDataset() throws IOException { assertEquals(expectedStarTreeDocument.dimensions[2], resultStarTreeDocument.dimensions[2]); assertEquals(expectedStarTreeDocument.metrics[0], resultStarTreeDocument.metrics[0]); } + validateStarTree(builder.getRootNode(), 3, 1, builder.getStarTreeDocuments()); } private static Map> getExpectedDimToValueMap() { @@ -1055,7 +1067,7 @@ public void testFlushFlow() throws IOException { SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); SortedNumericDocValues m2sndv = getSortedNumericMock(metricsList, metricsWithField); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(6), mapperService); + builder = getStarTreeBuilder(sf, getWriteState(6), mapperService); SequentialDocValuesIterator[] dimDvs = { new SequentialDocValuesIterator(d1sndv), new SequentialDocValuesIterator(d2sndv) }; Iterator starTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimDvs, @@ -1081,6 +1093,62 @@ public void testFlushFlow() throws IOException { assertEquals(1L, starTreeDocument.metrics[1]); } assertEquals(6, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + } + + public void testFlushFlowDimsReverse() throws IOException { + List dimList = List.of(5L, 4L, 3L, 2L, 1L); + List docsWithField = List.of(0, 1, 2, 3, 4); + List dimList2 = List.of(5L, 4L, 3L, 2L, 1L, 0L); + List docsWithField2 = List.of(0, 1, 2, 3, 4, 5); + + List metricsList = List.of( + getLongFromDouble(50.0), + getLongFromDouble(40.0), + getLongFromDouble(30.0), + getLongFromDouble(20.0), + getLongFromDouble(10.0), + getLongFromDouble(0.0) + ); + List metricsWithField = List.of(0, 1, 2, 3, 4, 5); + + StarTreeField sf = getStarTreeFieldWithMultipleMetrics(); + SortedNumericDocValues d1sndv = getSortedNumericMock(dimList, docsWithField); + SortedNumericDocValues d2sndv = getSortedNumericMock(dimList2, docsWithField2); + SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); + SortedNumericDocValues m2sndv = getSortedNumericMock(metricsList, metricsWithField); + + builder = getStarTreeBuilder(sf, getWriteState(6), mapperService); + SequentialDocValuesIterator[] dimDvs = { new SequentialDocValuesIterator(d1sndv), new SequentialDocValuesIterator(d2sndv) }; + Iterator starTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( + dimDvs, + List.of(new SequentialDocValuesIterator(m1sndv), new SequentialDocValuesIterator(m2sndv)) + ); + /** + * Asserting following dim / metrics [ dim1, dim2 / Sum [metric], count [metric] ] + [1, 1] | [10.0, 1] + [2, 2] | [20.0, 1] + [3, 3] | [30.0, 1] + [4, 4] | [40.0, 1] + [5, 5] | [50.0, 1] + [null, 0] | [0.0, 1] + */ + int count = 0; + while (starTreeDocumentIterator.hasNext()) { + count++; + StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); + if (starTreeDocument.dimensions[0] != null) { + assertEquals(count, (long) starTreeDocument.dimensions[0]); + } else { + assertEquals(6, count); + } + assertEquals(starTreeDocument.dimensions[1] * 10.0, starTreeDocument.metrics[0]); + assertEquals(1L, starTreeDocument.metrics[1]); + } + assertEquals(6, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); } public void testFlushFlowBuild() throws IOException { @@ -1120,7 +1188,7 @@ public void testFlushFlowBuild() throws IOException { SortedNumericDocValues d2sndv = getSortedNumericMock(dimList2, docsWithField2); SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); - BaseStarTreeBuilder builder = getStarTreeBuilder(sf, getWriteState(100), mapperService); + builder = getStarTreeBuilder(sf, getWriteState(100), mapperService); DocValuesProducer d1vp = getDocValuesProducer(d1sndv); DocValuesProducer d2vp = getDocValuesProducer(d2sndv); @@ -1147,7 +1215,7 @@ public void testFlushFlowBuild() throws IOException { starTreeDocument.metrics[0] ); } - builder.close(); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); } private static DocValuesProducer getDocValuesProducer(SortedNumericDocValues sndv) { @@ -1209,7 +1277,7 @@ public void testMergeFlowWithSum() throws IOException { sf, "6" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(6), mapperService); + builder = getStarTreeBuilder(sf, getWriteState(6), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Sum [ metric] ] @@ -1232,6 +1300,8 @@ public void testMergeFlowWithSum() throws IOException { ); } assertEquals(6, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); } public void testMergeFlowWithCount() throws IOException { @@ -1259,7 +1329,7 @@ public void testMergeFlowWithCount() throws IOException { sf, "6" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(6), mapperService); + builder = getStarTreeBuilder(sf, getWriteState(6), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1279,6 +1349,9 @@ public void testMergeFlowWithCount() throws IOException { assertEquals(starTreeDocument.dimensions[0] != null ? starTreeDocument.dimensions[0] * 2 : 4, starTreeDocument.metrics[0]); } assertEquals(6, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + } private StarTreeValues getStarTreeValues( @@ -1336,7 +1409,7 @@ public void testMergeFlowWithDifferentDocsFromSegments() throws IOException { sf, "4" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(4), mapperService); + builder = getStarTreeBuilder(sf, getWriteState(4), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1361,6 +1434,68 @@ public void testMergeFlowWithDifferentDocsFromSegments() throws IOException { } } assertEquals(9, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + } + + public void testMergeFlowNumSegmentsDocs() throws IOException { + List dimList = List.of(0L, 1L, 2L, 3L, 4L, 5L, 6L, -1L, -1L, -1L); + List docsWithField = List.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); + List dimList2 = List.of(0L, 1L, 2L, 3L, 4L, 5L, 6L, -1L, -1L, -1L); + List docsWithField2 = List.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); + + List metricsList = List.of(0L, 1L, 2L, 3L, 4L, 5L, 6L, -1L, -1L, -1L); + List metricsWithField = List.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); + + List dimList3 = List.of(5L, 6L, 7L, 8L, -1L); + List docsWithField3 = List.of(0, 1, 2, 3, 4); + List dimList4 = List.of(5L, 6L, 7L, 8L, -1L); + List docsWithField4 = List.of(0, 1, 2, 3, 4); + + List metricsList2 = List.of(5L, 6L, 7L, 8L, 9L); + List metricsWithField2 = List.of(0, 1, 2, 3, 4); + + StarTreeField sf = getStarTreeField(MetricStat.COUNT); + StarTreeValues starTreeValues = getStarTreeValues( + getSortedNumericMock(dimList, docsWithField), + getSortedNumericMock(dimList2, docsWithField2), + getSortedNumericMock(metricsList, metricsWithField), + sf, + "6" + ); + + StarTreeValues starTreeValues2 = getStarTreeValues( + getSortedNumericMock(dimList3, docsWithField3), + getSortedNumericMock(dimList4, docsWithField4), + getSortedNumericMock(metricsList2, metricsWithField2), + sf, + "4" + ); + builder = getStarTreeBuilder(sf, getWriteState(4), mapperService); + Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); + /** + * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] + [0, 0] | [0] + [1, 1] | [1] + [2, 2] | [2] + [3, 3] | [3] + [4, 4] | [4] + [5, 5] | [10] + [6, 6] | [6] + [7, 7] | [7] + [8, 8] | [8] + */ + int count = 0; + while (starTreeDocumentIterator.hasNext()) { + count++; + StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); + if (Objects.equals(starTreeDocument.dimensions[0], 5L)) { + assertEquals(starTreeDocument.dimensions[0] * 2, starTreeDocument.metrics[0]); + } else { + assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); + } + } + assertEquals(9, count); } public void testMergeFlowWithMissingDocs() throws IOException { @@ -1396,7 +1531,7 @@ public void testMergeFlowWithMissingDocs() throws IOException { sf, "4" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(4), mapperService); + builder = getStarTreeBuilder(sf, getWriteState(4), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1421,6 +1556,138 @@ public void testMergeFlowWithMissingDocs() throws IOException { assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); } assertEquals(10, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + } + + public void testMergeFlowWithMissingDocsWithZero() throws IOException { + List dimList = List.of(0L, 0L, 0L, 0L); + List docsWithField = List.of(0, 1, 2, 6); + List dimList2 = List.of(0L, 0L, 0L, 0L); + List docsWithField2 = List.of(0, 1, 2, 6); + + List metricsList = List.of(0L, 1L, 2L, 3L, 4L, 5L, 6L); + List metricsWithField = List.of(0, 1, 2, 3, 4, 5, 6); + + List dimList3 = List.of(5L, 6L, 8L, -1L); + List docsWithField3 = List.of(0, 1, 3, 4); + List dimList4 = List.of(5L, 6L, 7L, 8L, -1L); + List docsWithField4 = List.of(0, 1, 2, 3, 4); + + List metricsList2 = List.of(5L, 6L, 7L, 8L, 9L); + List metricsWithField2 = List.of(0, 1, 2, 3, 4); + + StarTreeField sf = getStarTreeField(MetricStat.COUNT); + StarTreeValues starTreeValues = getStarTreeValues( + getSortedNumericMock(dimList, docsWithField), + getSortedNumericMock(dimList2, docsWithField2), + getSortedNumericMock(metricsList, metricsWithField), + sf, + "7" + ); + + StarTreeValues starTreeValues2 = getStarTreeValues( + getSortedNumericMock(dimList3, docsWithField3), + getSortedNumericMock(dimList4, docsWithField4), + getSortedNumericMock(metricsList2, metricsWithField2), + sf, + "4" + ); + builder = getStarTreeBuilder(sf, getWriteState(4), mapperService); + Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); + /** + * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] + [0, 0] | [9] + [5, 5] | [5] + [6, 6] | [6] + [8, 8] | [8] + [null, 7] | [7] + [null, null] | [12] + */ + int count = 0; + while (starTreeDocumentIterator.hasNext()) { + count++; + StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); + if (starTreeDocument.dimensions[0] == null && starTreeDocument.dimensions[1] == null) { + assertEquals(12L, (long) starTreeDocument.metrics[0]); + } else if (starTreeDocument.dimensions[0] == null) { + assertEquals(7L, starTreeDocument.metrics[0]); + } else if (starTreeDocument.dimensions[0] == 0) { + assertEquals(9L, starTreeDocument.metrics[0]); + } else { + assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); + } + } + assertEquals(6, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + } + + public void testMergeFlowWithMissingDocsWithZeroComplexCase() throws IOException { + List dimList = List.of(0L, 0L, 0L, 0L, 0L); + List docsWithField = List.of(0, 1, 2, 6, 8); + List dimList2 = List.of(0L, 0L, 0L, 0L); + List docsWithField2 = List.of(0, 1, 2, 6); + + List metricsList = List.of(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L); + List metricsWithField = List.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); + + List dimList3 = List.of(5L, 6L, 8L, -1L); + List docsWithField3 = List.of(0, 1, 3, 4); + List dimList4 = List.of(5L, 6L, 7L, 8L, -1L); + List docsWithField4 = List.of(0, 1, 2, 3, 4); + + List metricsList2 = List.of(5L, 6L, 7L, 8L, 9L); + List metricsWithField2 = List.of(0, 1, 2, 3, 4); + + StarTreeField sf = getStarTreeField(MetricStat.COUNT); + StarTreeValues starTreeValues = getStarTreeValues( + getSortedNumericMock(dimList, docsWithField), + getSortedNumericMock(dimList2, docsWithField2), + getSortedNumericMock(metricsList, metricsWithField), + sf, + "9" + ); + + StarTreeValues starTreeValues2 = getStarTreeValues( + getSortedNumericMock(dimList3, docsWithField3), + getSortedNumericMock(dimList4, docsWithField4), + getSortedNumericMock(metricsList2, metricsWithField2), + sf, + "4" + ); + builder = getStarTreeBuilder(sf, getWriteState(4), mapperService); + Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); + /** + * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] + [0, 0] | [9] + [0, null] | [8] + [5, 5] | [5] + [6, 6] | [6] + [8, 8] | [8] + [null, 7] | [7] + [null, null] | [19] + */ + int count = 0; + while (starTreeDocumentIterator.hasNext()) { + count++; + StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); + if (starTreeDocument.dimensions[0] == null && starTreeDocument.dimensions[1] == null) { + assertEquals(19L, (long) starTreeDocument.metrics[0]); + assertEquals(7, count); + } else if (starTreeDocument.dimensions[0] == null) { + assertEquals(7L, starTreeDocument.metrics[0]); + } else if (starTreeDocument.dimensions[1] == null) { + assertEquals(8L, starTreeDocument.metrics[0]); + } else if (starTreeDocument.dimensions[0] == 0) { + assertEquals(9L, starTreeDocument.metrics[0]); + } else { + assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); + } + } + assertEquals(7, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); } public void testMergeFlowWithMissingDocsInSecondDim() throws IOException { @@ -1456,7 +1723,7 @@ public void testMergeFlowWithMissingDocsInSecondDim() throws IOException { sf, "4" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(4), mapperService); + builder = getStarTreeBuilder(sf, getWriteState(4), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1482,6 +1749,8 @@ public void testMergeFlowWithMissingDocsInSecondDim() throws IOException { } } assertEquals(10, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); } public void testMergeFlowWithDocsMissingAtTheEnd() throws IOException { @@ -1517,7 +1786,7 @@ public void testMergeFlowWithDocsMissingAtTheEnd() throws IOException { sf, "4" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); + builder = getStarTreeBuilder(sf, writeState, mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1542,6 +1811,8 @@ public void testMergeFlowWithDocsMissingAtTheEnd() throws IOException { assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); } assertEquals(10, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); } public void testMergeFlowWithEmptyFieldsInOneSegment() throws IOException { @@ -1569,7 +1840,7 @@ public void testMergeFlowWithEmptyFieldsInOneSegment() throws IOException { sf, "0" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(0), mapperService); + builder = getStarTreeBuilder(sf, getWriteState(0), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1590,6 +1861,8 @@ public void testMergeFlowWithEmptyFieldsInOneSegment() throws IOException { assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); } assertEquals(6, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); } public void testMergeFlowWithDuplicateDimensionValues() throws IOException { @@ -1664,8 +1937,8 @@ public void testMergeFlowWithDuplicateDimensionValues() throws IOException { metricsWithField, sf ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); - builder.build(List.of(starTreeValues, starTreeValues2)); + builder = getStarTreeBuilder(sf, writeState, mapperService); + builder.build(builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2))); List starTreeDocuments = builder.getStarTreeDocuments(); assertEquals(401, starTreeDocuments.size()); int count = 0; @@ -1693,7 +1966,7 @@ public void testMergeFlowWithDuplicateDimensionValues() throws IOException { count++; } assertEquals(401, count); - builder.close(); + validateStarTree(builder.getRootNode(), 4, sf.getStarTreeConfig().maxLeafDocs(), builder.getStarTreeDocuments()); } public void testMergeFlowWithMaxLeafDocs() throws IOException { @@ -1774,8 +2047,8 @@ public void testMergeFlowWithMaxLeafDocs() throws IOException { sf ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); - builder.build(List.of(starTreeValues, starTreeValues2)); + builder = getStarTreeBuilder(sf, writeState, mapperService); + builder.build(builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2))); List starTreeDocuments = builder.getStarTreeDocuments(); /** 635 docs get generated @@ -1790,7 +2063,7 @@ public void testMergeFlowWithMaxLeafDocs() throws IOException { [null, null, null, null] | [2495000.0] */ assertEquals(635, starTreeDocuments.size()); - builder.close(); + validateStarTree(builder.getRootNode(), 4, sf.getStarTreeConfig().maxLeafDocs(), builder.getStarTreeDocuments()); } private StarTreeValues getStarTreeValues( @@ -1892,11 +2165,11 @@ public void testMergeFlowWithDuplicateDimensionValueWithMaxLeafDocs() throws IOE metricsWithField, sf ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); - builder.build(List.of(starTreeValues, starTreeValues2)); + builder = getStarTreeBuilder(sf, writeState, mapperService); + builder.build(builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2))); List starTreeDocuments = builder.getStarTreeDocuments(); assertEquals(401, starTreeDocuments.size()); - builder.close(); + validateStarTree(builder.getRootNode(), 4, sf.getStarTreeConfig().maxLeafDocs(), builder.getStarTreeDocuments()); } public static long getLongFromDouble(double value) { @@ -1991,8 +2264,8 @@ public void testMergeFlowWithMaxLeafDocsAndStarTreeNodesAssertion() throws IOExc metricsWithField, sf ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); - builder.build(List.of(starTreeValues, starTreeValues2)); + builder = getStarTreeBuilder(sf, writeState, mapperService); + builder.build(builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2))); List starTreeDocuments = builder.getStarTreeDocuments(); Map> dimValueToDocIdMap = new HashMap<>(); traverseStarTree(builder.rootNode, dimValueToDocIdMap, true); @@ -2007,7 +2280,7 @@ public void testMergeFlowWithMaxLeafDocsAndStarTreeNodesAssertion() throws IOExc } } assertEquals(1041, starTreeDocuments.size()); - builder.close(); + validateStarTree(builder.getRootNode(), 4, sf.getStarTreeConfig().maxLeafDocs(), builder.getStarTreeDocuments()); } private static StarTreeField getStarTreeField(int maxLeafDocs) { @@ -2151,7 +2424,7 @@ public void testMergeFlow() throws IOException { getAttributes(1000) ); - BaseStarTreeBuilder builder = getStarTreeBuilder(sf, writeState, mapperService); + builder = getStarTreeBuilder(sf, writeState, mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** [0, 0, 0, 0] | [0.0] @@ -2163,11 +2436,183 @@ public void testMergeFlow() throws IOException { ... [999, 999, 999, 999] | [19980.0] */ - while (starTreeDocumentIterator.hasNext()) { - StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); + for (StarTreeDocument starTreeDocument : builder.getStarTreeDocuments()) { assertEquals(starTreeDocument.dimensions[0] * 20.0, starTreeDocument.metrics[0]); } - builder.close(); + builder.build(starTreeDocumentIterator); + + // Validate the star tree structure + validateStarTree(builder.getRootNode(), 4, 1, builder.getStarTreeDocuments()); + } + + private void validateStarTree(TreeNode root, int totalDimensions, int maxLeafDocuments, List starTreeDocuments) { + Queue queue = new LinkedList<>(); + queue.offer(new Object[] { root, false }); + while (!queue.isEmpty()) { + Object[] current = queue.poll(); + TreeNode node = (TreeNode) current[0]; + boolean currentIsStarNode = (boolean) current[1]; + + assertNotNull(node); + + // assert dimensions + if (node.dimensionId != TreeNode.ALL) { + assertTrue(node.dimensionId >= 0 && node.dimensionId < totalDimensions); + } + if (node.children != null && !node.children.isEmpty()) { + assertEquals(node.dimensionId + 1, node.childDimensionId); + assertTrue(node.childDimensionId < totalDimensions); + TreeNode starNode = null; + Object[] nonStarNodeCumulativeMetrics = getMetrics(starTreeDocuments); + for (Map.Entry entry : node.children.entrySet()) { + Long childDimensionValue = entry.getKey(); + TreeNode child = entry.getValue(); + Object[] currMetrics = getMetrics(starTreeDocuments); + if (!child.isStarNode) { + // Validate dimension values in documents + for (int i = child.startDocId; i < child.endDocId; i++) { + StarTreeDocument doc = starTreeDocuments.get(i); + int j = 0; + addMetrics(doc, currMetrics, j); + if (!child.isStarNode) { + Long dimension = doc.dimensions[child.dimensionId]; + assertEquals(childDimensionValue, dimension); + if (dimension != null) { + assertEquals(child.dimensionValue, (long) dimension); + } else { + // TODO : fix this ? + assertEquals(child.dimensionValue, TreeNode.ALL); + } + } + } + Object[] aggregatedMetrics = starTreeDocuments.get(child.aggregatedDocId).metrics; + int j = 0; + for (Object metric : currMetrics) { + /* + * TODO : refactor this to handle any data type + */ + if (metric instanceof Double) { + nonStarNodeCumulativeMetrics[j] = (double) nonStarNodeCumulativeMetrics[j] + (double) metric; + assertEquals((Double) metric, (Double) aggregatedMetrics[j], 0); + } else if (metric instanceof Long) { + nonStarNodeCumulativeMetrics[j] = (long) nonStarNodeCumulativeMetrics[j] + (long) metric; + assertEquals((long) metric, (long) aggregatedMetrics[j]); + } else if (metric instanceof Float) { + nonStarNodeCumulativeMetrics[j] = (float) nonStarNodeCumulativeMetrics[j] + (float) metric; + assertEquals((float) metric, (float) aggregatedMetrics[j], 0); + } + j++; + } + queue.offer(new Object[] { child, false }); + } else { + starNode = child; + } + } + // Add star node to queue + if (starNode != null) { + Object[] starNodeMetrics = getMetrics(starTreeDocuments); + for (int i = starNode.startDocId; i < starNode.endDocId; i++) { + StarTreeDocument doc = starTreeDocuments.get(i); + int j = 0; + addMetrics(doc, starNodeMetrics, j); + } + int j = 0; + Object[] aggregatedMetrics = starTreeDocuments.get(starNode.aggregatedDocId).metrics; + for (Object nonStarNodeCumulativeMetric : nonStarNodeCumulativeMetrics) { + assertEquals(nonStarNodeCumulativeMetric, starNodeMetrics[j]); + assertEquals(starNodeMetrics[j], aggregatedMetrics[j]); + /* + * TODO : refactor this to handle any data type + */ + if (nonStarNodeCumulativeMetric instanceof Double) { + assertEquals((double) nonStarNodeCumulativeMetric, (double) starNodeMetrics[j], 0); + assertEquals((double) nonStarNodeCumulativeMetric, (double) aggregatedMetrics[j], 0); + } else if (nonStarNodeCumulativeMetric instanceof Long) { + assertEquals((long) nonStarNodeCumulativeMetric, (long) starNodeMetrics[j]); + assertEquals((long) nonStarNodeCumulativeMetric, (long) aggregatedMetrics[j]); + } else if (nonStarNodeCumulativeMetric instanceof Float) { + assertEquals((float) nonStarNodeCumulativeMetric, (float) starNodeMetrics[j], 0); + assertEquals((float) nonStarNodeCumulativeMetric, (float) aggregatedMetrics[j], 0); + } + + j++; + } + assertEquals(-1L, starNode.dimensionValue); + queue.offer(new Object[] { starNode, true }); + } + } else { + assertTrue(node.endDocId - node.startDocId <= maxLeafDocuments); + } + + if (currentIsStarNode) { + StarTreeDocument prevDoc = null; + int docCount = 0; + int docId = node.startDocId; + int dimensionId = node.dimensionId; + + while (docId < node.endDocId) { + StarTreeDocument currentDoc = starTreeDocuments.get(docId); + docCount++; + + // Verify that the dimension at 'dimensionId' is set to STAR_IN_DOC_VALUES_INDEX + assertNull(currentDoc.dimensions[dimensionId]); + + // Verify sorting of documents + if (prevDoc != null) { + assertTrue(compareDocuments(prevDoc, currentDoc, dimensionId + 1, totalDimensions) <= 0); + } + prevDoc = currentDoc; + docId++; + } + + // Verify that the number of generated star documents matches the range in the star node + assertEquals(node.endDocId - node.startDocId, docCount); + } + } + } + + /** + * TODO : refactor this to handle any data type + */ + private static void addMetrics(StarTreeDocument doc, Object[] currMetrics, int j) { + for (Object metric : doc.metrics) { + if (metric instanceof Double) { + currMetrics[j] = (double) currMetrics[j] + (double) metric; + } else if (metric instanceof Long) { + currMetrics[j] = (long) currMetrics[j] + (long) metric; + } else if (metric instanceof Float) { + currMetrics[j] = (float) currMetrics[j] + (float) metric; + } + j++; + } + } + + private static Object[] getMetrics(List starTreeDocuments) { + Object[] nonStarNodeCumulativeMetrics = new Object[starTreeDocuments.get(0).metrics.length]; + for (int i = 0; i < nonStarNodeCumulativeMetrics.length; i++) { + if (starTreeDocuments.get(0).metrics[i] instanceof Long) { + nonStarNodeCumulativeMetrics[i] = 0L; + } else if (starTreeDocuments.get(0).metrics[i] instanceof Double) { + nonStarNodeCumulativeMetrics[i] = 0.0; + } else if (starTreeDocuments.get(0).metrics[i] instanceof Float) { + nonStarNodeCumulativeMetrics[i] = 0.0f; + } + } + return nonStarNodeCumulativeMetrics; + } + + private int compareDocuments(StarTreeDocument doc1, StarTreeDocument doc2, int startDim, int endDim) { + for (int i = startDim; i < endDim; i++) { + Long val1 = doc1.dimensions[i]; + Long val2 = doc2.dimensions[i]; + + if (!Objects.equals(val1, val2)) { + if (val1 == null) return 1; + if (val2 == null) return -1; + return Long.compare(val1, val2); + } + } + return 0; } Map getAttributes(int numSegmentDocs) { diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilderTests.java new file mode 100644 index 0000000000000..92382b78f60c6 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilderTests.java @@ -0,0 +1,26 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.builder; + +import org.apache.lucene.index.SegmentWriteState; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.mapper.MapperService; + +import java.io.IOException; + +public class OffHeapStarTreeBuilderTests extends AbstractStarTreeBuilderTests { + @Override + public BaseStarTreeBuilder getStarTreeBuilder( + StarTreeField starTreeField, + SegmentWriteState segmentWriteState, + MapperService mapperService + ) throws IOException { + return new OffHeapStarTreeBuilder(starTreeField, segmentWriteState, mapperService); + } +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java index 564ab110fa7a5..828bddfb8aa6e 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java @@ -97,18 +97,10 @@ public void test_buildWithNoStarTreeFields() throws IOException { public void test_getStarTreeBuilder() throws IOException { when(mapperService.getCompositeFieldTypes()).thenReturn(Set.of(starTreeFieldType)); StarTreesBuilder starTreesBuilder = new StarTreesBuilder(segmentWriteState, mapperService); - StarTreeBuilder starTreeBuilder = starTreesBuilder.getSingleTreeBuilder(starTreeField, segmentWriteState, mapperService); + StarTreeBuilder starTreeBuilder = starTreesBuilder.getStarTreeBuilder(starTreeField, segmentWriteState, mapperService); assertTrue(starTreeBuilder instanceof OnHeapStarTreeBuilder); } - public void test_getStarTreeBuilder_illegalArgument() { - when(mapperService.getCompositeFieldTypes()).thenReturn(Set.of(starTreeFieldType)); - StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration(1, new HashSet<>(), StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP); - StarTreeField starTreeField = new StarTreeField("star_tree", new ArrayList<>(), new ArrayList<>(), starTreeFieldConfiguration); - StarTreesBuilder starTreesBuilder = new StarTreesBuilder(segmentWriteState, mapperService); - assertThrows(IllegalArgumentException.class, () -> starTreesBuilder.getSingleTreeBuilder(starTreeField, segmentWriteState, mapperService)); - } - public void test_closeWithNoStarTreeFields() throws IOException { StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration( 1, diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIteratorTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIteratorTests.java index dfc83125b2806..f56f7d9906ae1 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIteratorTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIteratorTests.java @@ -127,7 +127,5 @@ public void test_multipleCoordinatedDocumentReader() throws IOException { assertNotEquals(0, sequentialDocValuesIterator2.getDocId()); assertEquals(1, sequentialDocValuesIterator2.getDocId()); assertEquals(9L, (long) sequentialDocValuesIterator2.value(1)); - } - } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentBitSetUtilTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentBitSetUtilTests.java new file mode 100644 index 0000000000000..7d1bd37246fae --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentBitSetUtilTests.java @@ -0,0 +1,72 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.utils; + +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.function.Function; + +/** + * Unit tests for {@link StarTreeDocumentBitSetUtil} + */ +public class StarTreeDocumentBitSetUtilTests extends OpenSearchTestCase { + + public void testWriteAndReadNullBitSets() throws IOException { + for (int k = 0; k < 10; k++) { + int randomArraySize = randomIntBetween(2, 256); + Long[] dims = new Long[randomArraySize]; + for (int i = 0; i < randomArraySize; i++) { + dims[i] = randomLong(); + } + testNullBasedOnBitset(dims); + } + } + + void testNullBasedOnBitset(Long[] dims) throws IOException { + Long[] dims1 = Arrays.copyOf(dims, dims.length); + int randomNullIndex1 = randomIntBetween(0, dims.length - 1); + int randomNullIndex2 = randomIntBetween(0, dims.length - 1); + dims[randomNullIndex1] = null; + dims[randomNullIndex2] = null; + Path basePath = createTempDir("OffHeapTests"); + FSDirectory fsDirectory = FSDirectory.open(basePath); + String TEST_FILE = "test_file"; + IndexOutput indexOutput = fsDirectory.createOutput(TEST_FILE, IOContext.DEFAULT); + StarTreeDocumentBitSetUtil.writeBitSet(dims, indexOutput); + indexOutput.close(); + + // test null value on read + IndexInput in = fsDirectory.openInput(TEST_FILE, IOContext.DEFAULT); + RandomAccessInput randomAccessInput = in.randomAccessSlice(0, in.length()); + Function identityValueSupplier = i -> null; + StarTreeDocumentBitSetUtil.readBitSet(randomAccessInput, 0, dims1, identityValueSupplier); + assertNull(dims1[randomNullIndex1]); + assertNull(dims1[randomNullIndex2]); + in.close(); + + // test identity value on read + long randomLong = randomLong(); + identityValueSupplier = i -> randomLong; + in = fsDirectory.openInput(TEST_FILE, IOContext.DEFAULT); + + randomAccessInput = in.randomAccessSlice(0, in.length()); + StarTreeDocumentBitSetUtil.readBitSet(randomAccessInput, 0, dims1, identityValueSupplier); + assertEquals(randomLong, (long) dims1[randomNullIndex1]); + assertEquals(randomLong, (long) dims1[randomNullIndex2]); + in.close(); + } +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorterTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorterTests.java new file mode 100644 index 0000000000000..b485ea1a4fe3e --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorterTests.java @@ -0,0 +1,201 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.utils; + +import org.opensearch.common.Randomness; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Random; + +/** + * Tests for {@link StarTreeDocumentsSorter}. + */ +public class StarTreeDocumentsSorterTests extends OpenSearchTestCase { + private Map testData; + + @Before + public void setUp() throws Exception { + super.setUp(); + testData = new HashMap<>(); + testData.put(0, new Long[] { -1L, 2L, 3L }); + testData.put(1, new Long[] { 1L, 2L, 2L }); + testData.put(2, new Long[] { -1L, -1L, 3L }); + testData.put(3, new Long[] { 1L, 2L, null }); + testData.put(4, new Long[] { 1L, null, 3L }); + } + + public void testSortDocumentsOffHeap_FirstDimension() { + int[] sortedDocIds = { 0, 1, 2, 3, 4 }; + int dimensionId = -1; + int numDocs = 5; + + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + + assertArrayEquals(new int[] { 2, 0, 1, 3, 4 }, sortedDocIds); + } + + public void testSortDocumentsOffHeap_ThirdDimension() { + int[] sortedDocIds = { 0, 1, 2, 3, 4 }; + int dimensionId = 1; + int numDocs = 5; + + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + + assertArrayEquals(new int[] { 1, 0, 2, 4, 3 }, sortedDocIds); + } + + public void testSortDocumentsOffHeap_SingleElement() { + int[] sortedDocIds = { 0 }; + int dimensionId = -1; + int numDocs = 1; + + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + + assertArrayEquals(new int[] { 0 }, sortedDocIds); + } + + public void testSortDocumentsOffHeap_EmptyArray() { + int[] sortedDocIds = {}; + int dimensionId = -1; + int numDocs = 0; + + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + + assertArrayEquals(new int[] {}, sortedDocIds); + } + + public void testSortDocumentsOffHeap_SecondDimensionId() { + int[] sortedDocIds = { 0, 1, 2, 3, 4 }; + int dimensionId = 0; + int numDocs = 5; + + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + + assertArrayEquals(new int[] { 2, 1, 0, 3, 4 }, sortedDocIds); + } + + public void testSortDocumentsOffHeap_AllNulls() { + Map testData = new HashMap<>(); + testData.put(0, new Long[] { null, null, null }); + testData.put(1, new Long[] { null, null, null }); + testData.put(2, new Long[] { null, null, null }); + + int[] sortedDocIds = { 0, 1, 2 }; + int dimensionId = -1; + int numDocs = 3; + + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + + // The order should remain unchanged as all elements are equal (null) + assertArrayEquals(new int[] { 0, 1, 2 }, sortedDocIds); + } + + public void testSortDocumentsOffHeap_Negatives() { + Map testData = new HashMap<>(); + testData.put(0, new Long[] { -10L, 0L }); + testData.put(1, new Long[] { -9L, 0L }); + testData.put(2, new Long[] { -8L, 0L }); + testData.put(3, new Long[] { -7L, -0L }); + testData.put(4, new Long[] { -15L, -0L }); + + int[] sortedDocIds = { 0, 1, 2, 3, 4 }; + int dimensionId = -1; + int numDocs = 5; + + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + + // The order should remain unchanged as all elements are equal (null) + assertArrayEquals(new int[] { 4, 0, 1, 2, 3 }, sortedDocIds); + } + + public void testRandomSort() { + int i = 0; + while (i < 10) { + testRandomizedSort(); + i++; + } + } + + private void testRandomizedSort() { + + int numDocs = randomIntBetween(0, 1000); + Random random = Randomness.get(); + // skew more towards realistic number of dimensions + int numDimensions = random.nextBoolean() ? randomIntBetween(2, 10) : randomIntBetween(2, 100); + List testData = new ArrayList<>(); + // Generate random test data + for (int i = 0; i < numDocs; i++) { + Long[] dimensions = new Long[numDimensions]; + for (int j = 0; j < numDimensions; j++) { + if (random.nextFloat() < 0.5) { + dimensions[j] = random.nextBoolean() ? Long.valueOf(0L) : random.nextBoolean() ? -1L : null; + } else { + dimensions[j] = random.nextLong(); + } + } + testData.add(dimensions); + } + + int[] sortedDocIds = new int[numDocs]; + for (int i = 0; i < numDocs; i++) { + sortedDocIds[i] = i; + } + // sort dimensionId + 1 to numDimensions + // for example to start from dimension in 0th index, we need to pass -1 to sort method + int dimensionId = random.nextInt(numDimensions) - 1; + + // Sort using StarTreeDocumentsSorter + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + + // Verify the sorting + for (int i = 1; i < numDocs; i++) { + Long[] prev = testData.get(sortedDocIds[i - 1]); + Long[] curr = testData.get(sortedDocIds[i]); + boolean isCorrectOrder = true; + for (int j = dimensionId + 1; j < numDimensions; j++) { + int comparison = compareLongs(prev[j], curr[j]); + if (comparison < 0) { + break; + } else if (comparison > 0) { + isCorrectOrder = false; + break; + } + } + assertTrue( + "Sorting error when sorting from dimension index " + + dimensionId + + " Prev : " + + Arrays.toString(prev) + + " :: Curr : " + + Arrays.toString(curr), + isCorrectOrder + ); + } + } + + private int compareLongs(Long a, Long b) { + if (!Objects.equals(a, b)) { + if (a == null) { + return 1; + } else if (b == null) { + return -1; + } else { + return a.compareTo(b); + } + } + return 0; + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java index 3144b1b007924..132d2ff5a566a 100644 --- a/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java @@ -69,7 +69,7 @@ public void testValidStarTree() throws IOException { List expectedMetrics = Arrays.asList(MetricStat.SUM, MetricStat.AVG); assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics()); assertEquals(100, starTreeFieldType.getStarTreeConfig().maxLeafDocs()); - assertEquals(StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode()); + assertEquals(StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode()); assertEquals( new HashSet<>(Arrays.asList("@timestamp", "status")), starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims() @@ -101,7 +101,7 @@ public void testValidStarTreeDefaults() throws IOException { ); assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics()); assertEquals(10000, starTreeFieldType.getStarTreeConfig().maxLeafDocs()); - assertEquals(StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode()); + assertEquals(StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode()); assertEquals(Collections.emptySet(), starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims()); } }