From 62508dcda8822c93fd9559ab23c9d420bada5e98 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Fri, 22 Sep 2023 13:32:47 -0700 Subject: [PATCH 01/17] Pasted all changes into new branch off of main --- server/build.gradle | 4 + server/licenses/RoaringBitmap-LICENSE.txt | 191 ++++++ server/licenses/RoaringBitmap-NOTICE.txt | 0 server/licenses/shims-LICENSE.txt | 191 ++++++ server/licenses/shims-NOTICE.txt | 0 .../indices/HybridIntKeyLookupStore.java | 545 ++++++++++++++++++ .../opensearch/indices/IntKeyLookupStore.java | 164 ++++++ .../RemovableHybridIntKeyLookupStore.java | 108 ++++ .../indices/HybridIntKeyLookupStoreTests.java | 391 +++++++++++++ ...RemovableHybridIntKeyLookupStoreTests.java | 125 ++++ 10 files changed, 1719 insertions(+) create mode 100644 server/licenses/RoaringBitmap-LICENSE.txt create mode 100644 server/licenses/RoaringBitmap-NOTICE.txt create mode 100644 server/licenses/shims-LICENSE.txt create mode 100644 server/licenses/shims-NOTICE.txt create mode 100644 server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java create mode 100644 server/src/main/java/org/opensearch/indices/IntKeyLookupStore.java create mode 100644 server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java create mode 100644 server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java create mode 100644 server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java diff --git a/server/build.gradle b/server/build.gradle index f6db3d53a0dcc..426719f53632e 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -158,6 +158,10 @@ dependencies { api "com.google.protobuf:protobuf-java:${versions.protobuf}" api "jakarta.annotation:jakarta.annotation-api:${versions.jakarta_annotation}" + // roaring bitmaps + api 'org.roaringbitmap:RoaringBitmap:0.9.49' + runtimeOnly 'org.roaringbitmap:shims:0.9.49' // might fix complaining about ArraysShims? + testImplementation(project(":test:framework")) { // tests use the locally compiled version of server exclude group: 'org.opensearch', module: 'server' diff --git a/server/licenses/RoaringBitmap-LICENSE.txt b/server/licenses/RoaringBitmap-LICENSE.txt new file mode 100644 index 0000000000000..a890d4a062fad --- /dev/null +++ b/server/licenses/RoaringBitmap-LICENSE.txt @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright 2013-2016 the RoaringBitmap authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/server/licenses/RoaringBitmap-NOTICE.txt b/server/licenses/RoaringBitmap-NOTICE.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/server/licenses/shims-LICENSE.txt b/server/licenses/shims-LICENSE.txt new file mode 100644 index 0000000000000..a890d4a062fad --- /dev/null +++ b/server/licenses/shims-LICENSE.txt @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright 2013-2016 the RoaringBitmap authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/server/licenses/shims-NOTICE.txt b/server/licenses/shims-NOTICE.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java new file mode 100644 index 0000000000000..d9fbde46f0bd7 --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java @@ -0,0 +1,545 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.indices; + +import org.roaringbitmap.RoaringBitmap; +import java.util.HashSet; +import java.util.Arrays; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +/** + * A store which dynamically switches its internal data structure from hash set to sorted int array + * to roaring bitmap. For reasoning behind design decisions, see + * https://quip-amazon.com/JdWGAYm2doCm/Roaring-Bitmap-Performance-Testing + */ +public class HybridIntKeyLookupStore implements IntKeyLookupStore { + public static final int HASHSET_TO_INTARR_THRESHOLD = 5000; + public static final int INTARR_SIZE = 100000; + public static final int INTARR_TO_RBM_THRESHOLD = INTARR_SIZE; + public static final double HASHSET_MEM_SLOPE = 6.46 * Math.pow(10, -6); // used to calculate memory usage + + public enum StructureTypes { + HASHSET, + INTARR, + RBM + }; + + protected StructureTypes currentStructure; + protected final int modulo; + protected int size; + protected double memSizeCap; // in MB + protected int numAddAttempts; + protected int numCollisions; + protected boolean guaranteesNoFalseNegatives; + + protected HashSet hashset; + protected int[] intArr; + protected RoaringBitmap rbm; + protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + protected final Lock readLock = lock.readLock(); + protected final Lock writeLock = lock.writeLock(); + + // These are used to estimate RBM memory usage + protected double RBMMemSlope; + protected double RBMMemBufferMultiplier; + protected double RBMMemIntercept; + protected int maxNumEntries; + protected boolean isAtCapacity; + + public HybridIntKeyLookupStore(int modulo, double memSizeCap) { + this.modulo = modulo; // A modulo of 0 means no modulo + this.hashset = new HashSet(); + this.currentStructure = StructureTypes.HASHSET; + this.size = 0; + this.numAddAttempts = 0; + this.numCollisions = 0; + this.guaranteesNoFalseNegatives = true; + this.memSizeCap = memSizeCap; // A cap of 0 means no cap + memSizeInitFunction(); // Initialize values for RBM memory size estimates + this.maxNumEntries = calculateMaxNumEntries(); + } + + protected final int customAbs(int value) { + if (value < 0 && value > Integer.MIN_VALUE) { + return -value; + } else if (value >= 0) { + return value; + } + return Integer.MAX_VALUE; + } + + protected final int transform(int value) { + // We only use negative numbers to simplify sorting the int array + return modulo == 0 ? -customAbs(value) : -customAbs(value % modulo); + } + + // Helper function for intArr operations + protected void intArrChecks(int value) throws IllegalStateException { + if (currentStructure != StructureTypes.INTARR) { + throw new IllegalStateException("Cannot run isInIntArr when currentStructure is not INTARR!!"); + } + if (value > 0) { + throw new IllegalStateException("Cannot use positive value " + Integer.toString(value) + " in isInIntArr"); + } + } + + /** Checks for presence of value in intArr. If doAdd is true and the value is not already there, adds it. + * Returns true if the value was already contained (and therefore not added again), false otherwise + */ + protected final boolean isInIntArr(int value, int arrSize, boolean doAdd) throws IllegalStateException { + Lock lock = doAdd ? writeLock : readLock; + lock.lock(); + try { + intArrChecks(value); + int index = Arrays.binarySearch(intArr, 0, arrSize, value); // only search in initialized part of array + if (index < 0) { + if (doAdd) { + int insertionPoint = -index - 1; + System.arraycopy(intArr, insertionPoint, intArr, insertionPoint + 1, arrSize - insertionPoint); + intArr[insertionPoint] = value; + } + return false; + } + return true; + } finally { + lock.unlock(); + } + } + + protected final void switchHashsetToIntArr() throws IllegalStateException { + writeLock.lock(); + try { + if (currentStructure == StructureTypes.HASHSET) { + size = 0; + intArr = new int[INTARR_SIZE]; + currentStructure = StructureTypes.INTARR; + for (int value : hashset) { + boolean alreadyContained = isInIntArr(value, size, true); + // should never be already contained, but just to be safe + if (!alreadyContained) { + size++; + } + } + hashset = null; + } + } finally { + writeLock.unlock(); + } + } + + protected final void switchIntArrToRBM() { + writeLock.lock(); + try { + if (currentStructure == StructureTypes.INTARR) { + currentStructure = StructureTypes.RBM; + rbm = new RoaringBitmap(); + for (int i = 0; i < size; i++) { + rbm.add(intArr[i]); + } + intArr = null; + } + } finally { + writeLock.unlock(); + } + } + + /** + * Checks if adding an additional value would require us to change data structures. + * If so, start that change. + */ + protected final void handleStructureSwitch() throws IllegalStateException { // write lock? + writeLock.lock(); + try { + if (size == HASHSET_TO_INTARR_THRESHOLD - 1) { + if (maxNumEntries <= HASHSET_TO_INTARR_THRESHOLD) { + isAtCapacity = true; + return; + } + switchHashsetToIntArr(); + } else if (size == INTARR_TO_RBM_THRESHOLD - 1) { + if (maxNumEntries <= INTARR_TO_RBM_THRESHOLD) { + isAtCapacity = true; + return; + } + switchIntArrToRBM(); + } + } finally { + writeLock.unlock(); + } + } + + protected final void removeFromIntArr(int value) throws IllegalStateException { + writeLock.lock(); + try { + intArrChecks(value); + int index = Arrays.binarySearch(intArr, 0, size, value); + if (index >= 0) { + System.arraycopy(intArr, index + 1, intArr, index, size - index - 1); + intArr[size - 1] = 0; + size--; + } + } finally { + writeLock.unlock(); + } + } + + protected void handleCollisions(int transformedValue) { + numCollisions++; + } + + @Override + public boolean add(int value) throws IllegalStateException { + writeLock.lock(); + try { + if (size == maxNumEntries) { + isAtCapacity = true; + } + handleStructureSwitch(); // also might set isAtCapacity + if (!isAtCapacity) { + + numAddAttempts++; + int transformedValue = transform(value); + boolean alreadyContained; + + switch (currentStructure) { + case HASHSET: + alreadyContained = !(hashset.add(transformedValue)); + break; + case INTARR: + alreadyContained = isInIntArr(transformedValue, size, true); + break; + case RBM: + alreadyContained = containsTransformed(transformedValue); + if (!alreadyContained) { + rbm.add(transformedValue); + } + break; + default: + throw new IllegalStateException("currentStructure is none of possible values"); + } + if (alreadyContained) { + handleCollisions(transformedValue); + return false; + } + size++; + return true; + } + return false; + } finally { + writeLock.unlock(); + } + } + + protected boolean containsTransformed(int transformedValue) throws IllegalStateException { + readLock.lock(); + try { + switch (currentStructure) { + case HASHSET: + return hashset.contains(transformedValue); + case INTARR: + return isInIntArr(transformedValue, size, false); + case RBM: + return rbm.contains(transformedValue); + default: + throw new IllegalStateException("currentStructure is none of possible values"); + } + } finally { + readLock.unlock(); + } + } + + // Check the array is sorted with no duplicate elements (except 0) + protected boolean arrayCorrectlySorted() { + readLock.lock(); + try { + if (currentStructure == StructureTypes.INTARR) { + for (int j = 0; j < intArr.length - 1; j++) { + if (!((intArr[j] < intArr[j + 1]) || (intArr[j] == intArr[j + 1] && intArr[j + 1] == 0))) { + // left clause: check that array is sorted, right clause: check that values are unique unless they're zero + // (uninitialized) + return false; + } + } + } + return true; + } finally { + readLock.unlock(); + } + } + + @Override + public boolean contains(int value) throws IllegalStateException { + int transformedValue = transform(value); + return containsTransformed(transformedValue); + } + + @Override + public int getInternalRepresentation(int value) { + return transform(value); + } + + @Override + public boolean remove(int value) throws IllegalStateException { + return false; + } + + @Override + public boolean supportsRemoval() { + return false; + } + + protected void removeHelperFunction(int transformedValue) throws IllegalStateException { + // allows code to be reused in forceRemove() of this class and remove() of inheriting class + // shouldn't be called on its own, or on a value that's not already inside the structure + switch (currentStructure) { + case HASHSET: + hashset.remove(transformedValue); + size--; + return; + case INTARR: + removeFromIntArr(transformedValue); // size is decreased in this function already + return; + case RBM: + rbm.remove(transformedValue); + size--; + } + } + + @Override + public void forceRemove(int value) throws IllegalStateException { + writeLock.lock(); + guaranteesNoFalseNegatives = false; + try { + int transformedValue = transform(value); + boolean alreadyContained = contains(transformedValue); + if (alreadyContained) { + removeHelperFunction(transformedValue); + } + } finally { + writeLock.unlock(); + } + } + + @Override + public boolean canHaveFalseNegatives() { + return !guaranteesNoFalseNegatives; + } + + @Override + public int getSize() { + readLock.lock(); // needed because size is changed during switchHashsetToIntarr() + try { + return size; + } finally { + readLock.unlock(); + } + } + + @Override + public int getNumAddAttempts() { + return numAddAttempts; + } + + @Override + public int getNumCollisions() { + return numCollisions; + } + + @Override + public String getCurrentStructure() throws IllegalStateException { + switch (currentStructure) { + case HASHSET: + return "HashSet"; + case INTARR: + return "intArr"; + case RBM: + return "RBM"; + default: + throw new IllegalStateException("currentStructure is none of possible values"); + } + } + + @Override + public int getModulo() { + return modulo; + } + + @Override + public boolean isUsingNegativeOnly() { + return true; + } + + @Override + public boolean isCollision(int value1, int value2) { + return transform(value1) == transform(value2); + } + + protected static double[] memSizeHelperFunction(int modulo) { + // Sets up values to help estimate RBM size given a modulo + // Returns an array of {bufferMultiplier, slope, intercept} + // See https://quip-amazon.com/9Vl3A3kBq2bR/IntKeyLookupStore-Size-Estimates + // for an explanation of where these numbers came from + + double modifiedModulo; + if (modulo == 0) { + modifiedModulo = 31.0; + } else { + modifiedModulo = Math.log(0.5 * modulo) / Math.log(2); + } + // Note the effective modulos are 0.5x compared to tests, since we also use only negative numbers due to the intArr + double highCutoff = 29.001; // Floating point makes 29 not work + double lowCutoff = 28.0; + double bufferMultiplier = 1.35; + if (modifiedModulo <= highCutoff) { + bufferMultiplier = 1.6; + } + + double slope; + double intercept; + if (modifiedModulo > highCutoff) { + slope = 0.69; + intercept = -3; + } else if (modifiedModulo >= lowCutoff) { + slope = 0.75; + intercept = -3.5; + } else { + slope = 0.88; + intercept = -4.5; + } + return new double[] { bufferMultiplier, slope, intercept }; + } + + protected void memSizeInitFunction() { + double[] memSizeValues = memSizeHelperFunction(modulo); + this.RBMMemBufferMultiplier = memSizeValues[0]; + this.RBMMemSlope = memSizeValues[1]; + this.RBMMemIntercept = memSizeValues[2]; + } + + protected int calculateMaxNumEntries() { + double maxHashsetMemSize = HybridIntKeyLookupStore.getHashsetMemSize(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1); + double intArrMemSize = HybridIntKeyLookupStore.getIntArrMemSize(); + double minRBMMemSize = HybridIntKeyLookupStore.getRBMMemSizeWithModulo(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD, modulo); + + if (memSizeCap == 0) { + return Integer.MAX_VALUE; + } + if (memSizeCap >= minRBMMemSize) { + // max number of elements will be when we have an RBM + return (int) Math.pow(memSizeCap / (this.RBMMemBufferMultiplier * Math.pow(10, this.RBMMemIntercept)), 1 / this.RBMMemSlope); + } + if (memSizeCap < intArrMemSize) { + // max number of elements will be when we have a hash set + return Math.min((int) (memSizeCap / HASHSET_MEM_SLOPE), HASHSET_TO_INTARR_THRESHOLD - 1); + } + // max number of elements will be when we have an intArr + return HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - 1; + } + + protected static double getHashsetMemSize(int numEntries) { + // See https://quip-amazon.com/9Vl3A3kBq2bR/IntKeyLookupStore-Size-Estimates + // for an explanation of where these numbers came from + return HASHSET_MEM_SLOPE * numEntries; + } + + protected static double getIntArrMemSize() { + return (4 * INTARR_SIZE + 24) / (Math.pow(2, 20)); + } + + protected double getRBMMemSize(int numEntries) { + // See https://quip-amazon.com/9Vl3A3kBq2bR/IntKeyLookupStore-Size-Estimates + // for an explanation of where these numbers came from + return Math.pow(numEntries, RBMMemSlope) * Math.pow(10, RBMMemIntercept) * RBMMemBufferMultiplier; + } + + protected static double getRBMMemSizeWithModulo(int numEntries, int modulo) { + double[] memSizeValues = memSizeHelperFunction(modulo); + return Math.pow(numEntries, memSizeValues[1]) * Math.pow(10, memSizeValues[2]) * memSizeValues[0]; + } + + @Override + public double getMemorySize() { + switch (currentStructure) { + case HASHSET: + return getHashsetMemSize(size); + case INTARR: + return getIntArrMemSize(); + case RBM: + return getRBMMemSize(size); + } + return 0; + } + + @Override + public double getMemorySizeCap() { + return memSizeCap; + } + + public double getRBMMemSlope() { + return RBMMemSlope; + } + + public double getRBMMemBufferMultiplier() { + return RBMMemBufferMultiplier; + } + + public double getRBMMemIntercept() { + return RBMMemIntercept; + } + + public int getMaxNumEntries() { + return maxNumEntries; + } + + @Override + public boolean getIsAtCapacity() { + return isAtCapacity; + } + + @Override + public void regenerateStore(int[] newValues) throws IllegalStateException { + intArr = null; + rbm = null; + size = 0; + numCollisions = 0; + numAddAttempts = 0; + guaranteesNoFalseNegatives = true; + currentStructure = StructureTypes.HASHSET; + hashset = new HashSet<>(); + + for (int value : newValues) { + add(value); + } + } +} diff --git a/server/src/main/java/org/opensearch/indices/IntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/IntKeyLookupStore.java new file mode 100644 index 0000000000000..60a65ede756ef --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/IntKeyLookupStore.java @@ -0,0 +1,164 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.indices; + +/** + * An interface for objects that hold an in-memory record of hashes of keys in the disk cache. + * These objects have some internal data structure which stores some transformation of added + * int values. The internal representations may have collisions. Example transformations include a modulo + * or -abs(value), or some combination. + */ +public interface IntKeyLookupStore { + /** + * Transforms the input value into the internal representation for this keystore + * and adds it to the internal data structure. + * @param value The value to add. + * @return true if the value was added, false if it wasn't added because of a + * collision or if it was already present. + */ + boolean add(int value) throws Exception; + + /** + * Checks if the transformation of the value is in the keystore. + * @param value The value to check. + * @return true if the value was found, false otherwise. Due to collisions, false positives are + * possible, but there should be no false negatives unless forceRemove() is called. + */ + boolean contains(int value) throws Exception; + + /** + * Returns the transformed version of the input value, that would be used to stored it in the keystore. + * This transformation should be always be the same for a given instance. + * @param value The value to transform. + * @return The transformed value. + */ + int getInternalRepresentation(int value); + + /** + * Attempts to safely remove a value from the internal structure, maintaining the property that contains(value) + * will never return a false negative. If removing would lead to a false negative, the value won't be removed. + * Classes may not implement safe removal. + * @param value The value to attempt to remove. + * @return true if the value was removed, false if it wasn't. + */ + boolean remove(int value) throws Exception; + + /** + * Check if the implementing class supports safe removals. If it doesn't, remove() will always return false. + * @return true if the class supports safe removals, false if it doesn't. + */ + boolean supportsRemoval(); + + /** + * Remove the transformed version of this value from the store. Calling this function may cause + * contains() to return false negatives for future values. + * @param value The value to forcibly remove. + */ + void forceRemove(int value) throws Exception; + + /** + * Check if the object currently guarantees having no false negatives when running contains(). + * @return false if there will not be false negatives, true if there could be false negatives. + */ + boolean canHaveFalseNegatives(); + + /** + * Returns the number of distinct values stored in the internal data structure. + * Does not count values which weren't successfully added due to collisions. + * @return The number of values + */ + int getSize(); + + /** + * Returns the number of times add() has been run, including unsuccessful attempts. + * @return The number of adding attempts. + */ + int getNumAddAttempts(); + + /** + * Returns the number of times add() has returned false due to a collision. + * @return The number of collisions. + */ + int getNumCollisions(); + + /** + * Returns the current internal data structure. + * @return A string representing the currently used internal data structure. + */ + String getCurrentStructure() throws Exception; + + /** + * If the structure uses a roaring bitmap with a modulo at some point, returns that modulo. + * If it uses a RBM without a modulo or doesn't use an RBM, returns 0. + * @return The modulo. + */ + int getModulo(); + + /** + * Returns true if the transformation involves taking -abs(), simplifying int[] access and sorting + * @return Whether transformed values are always negative. + */ + boolean isUsingNegativeOnly(); + + /** + * Checks if two values would collide after being transformed by this store's transformation. + * @param value1 The first value to compare. + * @param value2 The second value to compare. + * @return true if the transformations are equal, false otherwise. + */ + boolean isCollision(int value1, int value2); + + /** + * Returns an estimate of the store's memory usage. + * @return The memory usage, in MB + */ + double getMemorySize(); + + /** + * Returns the cap for the store's memory usage. + * @return The cap, in MB + */ + double getMemorySizeCap(); + + /** + * Returns whether the store is at memory capacity + */ + boolean getIsAtCapacity(); + + /** + * Deletes the internal data structure and regenerates it from the values passed in. + * Also resets all stats related to adding. + * @param newValues The keys that should be in the reset structure. + */ + void regenerateStore(int[] newValues) throws Exception; +} diff --git a/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java new file mode 100644 index 0000000000000..ce6c441af972d --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java @@ -0,0 +1,108 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.indices; + +import java.util.HashSet; + +/** + * A store which supports safe removal of keys by maintaining a hashset of values that have had collisions. + * For reasoning behind design decisions, see + * https://quip-amazon.com/JdWGAYm2doCm/Roaring-Bitmap-Performance-Testing + */ +public class RemovableHybridIntKeyLookupStore extends HybridIntKeyLookupStore implements IntKeyLookupStore { + private HashSet collidedInts; + private int numRemovalAttempts; + private int numSuccessfulRemovals; + + RemovableHybridIntKeyLookupStore(int modulo, double memSizeCap) { + super(modulo, memSizeCap); + collidedInts = new HashSet<>(); + numRemovalAttempts = 0; + numSuccessfulRemovals = 0; + } + + @Override + protected void handleCollisions(int transformedValue) { + numCollisions++; + collidedInts.add(transformedValue); + } + + @Override + public boolean supportsRemoval() { + return true; + } + + // Check if the value to remove has had a collision, and if not, remove it + @Override + public boolean remove(int value) throws IllegalStateException { + int transformedValue = transform(value); + readLock.lock(); + try { + if (!contains(value)) { + return false; + } + numRemovalAttempts++; + if (collidedInts.contains(transformedValue)) { + return false; + } + } finally { + readLock.unlock(); + } + writeLock.lock(); + try { + removeHelperFunction(transformedValue); + numSuccessfulRemovals++; + return true; + } finally { + writeLock.unlock(); + } + } + + @Override + public double getMemorySize() { + return super.getMemorySize() + getHashsetMemSize(collidedInts.size()); + } + + public int getNumRemovalAttempts() { + return numRemovalAttempts; + } + + public int getNumSuccessfulRemovals() { + return numSuccessfulRemovals; + } + + public boolean valueHasHadCollision(int value) { + return collidedInts.contains(transform(value)); + } + +} diff --git a/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java new file mode 100644 index 0000000000000..6b45990d54fb2 --- /dev/null +++ b/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java @@ -0,0 +1,391 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.indices; + +import java.util.ArrayList; +import org.opensearch.common.Randomness; +import java.util.Random; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadPoolExecutor; + +// Tests base functionality of HybridIntKeyLookupStore for both that class and the inheriting +// RemovableHybridIntKeyLookupStore. + +public class HybridIntKeyLookupStoreTests extends org.apache.lucene.util.LuceneTestCase { + public void testInit() throws Exception { + HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { + assertEquals("HashSet", kls.getCurrentStructure()); + assertEquals(0, kls.getSize()); + } + } + + public void testStructureTransitions() throws Exception { + HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { + for (int i = 0; i < HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD; i++) { + kls.add(i); + } + assertEquals("intArr", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD, kls.getSize()); + for (int i = HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD; i < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD; i++) { + kls.add(i); + } + assertEquals("RBM", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD, kls.getSize()); + } + } + + public void testArrayLogic() throws Exception { + HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { + Random rand = Randomness.get(); + int numToAdd = 50000; + int[] addedValues = new int[numToAdd]; + for (int i = 0; i < numToAdd; i++) { + int val = rand.nextInt(); + kls.add(val); + addedValues[i] = val; + } + assertTrue(kls.arrayCorrectlySorted()); // Not sure if this is really good as a public method - but idk how else to do it? + assertTrue(numToAdd - kls.getSize() < 20); // size should not be too different from numToAdd - exact number varies due to + // collisions + int numToRemove = 20000; + for (int j = 0; j < numToRemove; j++) { + kls.forceRemove(addedValues[j]); + } + assertTrue(numToAdd - numToRemove - kls.getSize() < 20); + assertTrue(kls.arrayCorrectlySorted()); + assertTrue(kls.canHaveFalseNegatives()); + } + } + + public void testTransformationLogic() throws Exception { + int modulo = (int) Math.pow(2, 29); + HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore(modulo, 0.0); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(modulo, 0.0); + for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { + int offset = 3; + for (int i = 0; i < 4; i++) { // after this we run into max value, but thats not a flaw with the class design + int posValue = i * modulo + offset; + kls.add(posValue); + int negValue = -(i * modulo + offset); + kls.add(negValue); + } + assertEquals(1, kls.getSize()); + + // test output is always in expected range + int[] testVals = new int[] { 0, 1, -1, -23495, 23058, modulo, -modulo, Integer.MAX_VALUE, Integer.MIN_VALUE }; + for (int value : testVals) { + assertTrue(kls.getInternalRepresentation(value) <= 0); + assertTrue(kls.getInternalRepresentation(value) > -modulo); + } + } + } + + public void testContainsAndForceRemove() throws Exception { + HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { + for (int i = 0; i < 2000; i++) { + kls.add(i); + assertTrue(kls.contains(i)); + } + assertFalse(kls.canHaveFalseNegatives()); + for (int i = 1900; i < 2000; i++) { + kls.forceRemove(i); + assertFalse(kls.contains(i)); + } + assertEquals(1900, kls.getSize()); + int lastSize = kls.getSize(); + for (int i = kls.getSize(); i < HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD; i++) { + assertFalse(kls.contains(i)); + kls.add(i); + assertTrue(kls.contains(i)); // intArr removal logic already tested in testArrayLogic() + assertEquals(1, kls.getSize() - lastSize); + lastSize = kls.getSize(); + } + assertEquals("intArr", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD, kls.getSize()); + for (int i = kls.getSize(); i < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + 1000; i++) { + kls.add(i); + assertTrue(kls.contains(i)); + } + assertEquals("RBM", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + 1000, kls.getSize()); + for (int i = 5000; i < 10000; i++) { + kls.forceRemove(i); + assertFalse(kls.contains(i)); + } + assertTrue(kls.canHaveFalseNegatives()); + } + } + + public void testAddingStatsGetters() throws Exception { + int modulo = (int) Math.pow(2, 15); + HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore(modulo, 0.0); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(modulo, 0.0); + for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { + assertEquals(modulo, kls.getModulo()); + + kls.add(15); + kls.add(-15); + assertEquals(2, kls.getNumAddAttempts()); + assertEquals(1, kls.getNumCollisions()); + + int offset = 1; + for (int i = 0; i < 10; i++) { + kls.add(i * modulo + offset); + } + assertEquals(12, kls.getNumAddAttempts()); + assertEquals(10, kls.getNumCollisions()); + } + } + + public void testRegenerateStore() throws Exception { + HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { + Random rand = Randomness.get(); + int[] resetNumbers = new int[] { + HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD, + HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD, + HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + 10000 }; + // test reset starting from each of the 3 internal structure types + for (int resetNum : resetNumbers) { + for (int i = 0; i < resetNum; i++) { + kls.add(i); + } + int[] newVals = new int[(int) (resetNum * 1.1)]; // margin accounts for collisions + for (int j = 0; j < newVals.length; j++) { + newVals[j] = rand.nextInt(); + } + kls.regenerateStore(newVals); + assertTrue(kls.getSize() >= resetNum); + assertTrue(kls.getSize() <= newVals.length); + } + } + } + + public void testAddingDuplicates() throws Exception { + HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { + for (int i = 0; i < HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1; i++) { + kls.add(i); + kls.add(i); + } + for (int j = 0; j < 1000; j++) { + kls.add(577); + } + assertEquals(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1, kls.getSize()); + for (int i = HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1; i < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + - 1; i++) { + kls.add(i); + kls.add(i); + } + for (int j = 0; j < 1000; j++) { + kls.add(12342); + } + assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - 1, kls.getSize()); + for (int i = HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - 1; i < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + + 5000; i++) { + kls.add(i); + kls.add(i); + } + for (int j = 0; j < 1000; j++) { + kls.add(-10004); + } + assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + 5000, kls.getSize()); + } + } + + public void testMemoryCapValueInitialization() { + double[] logModulos = new double[] { 0.0, 31.2, 30, 29, 28, 13 }; // these will decrement by 1 + double[] expectedMultipliers = new double[] { 1.35, 1.35, 1.6, 1.6, 1.6, 1.6 }; + double[] expectedSlopes = new double[] { 0.69, 0.69, 0.75, 0.75, 0.88, 0.88 }; + double[] expectedIntercepts = new double[] { -3, -3, -3.5, -3.5, -4.5, -4.5 }; + double memSizeCap = 100.0; + double delta = 0.01; + for (int i = 0; i < logModulos.length; i++) { + int modulo = 0; + if (logModulos[i] != 0) { + modulo = (int) Math.pow(2, logModulos[i]); + } + HybridIntKeyLookupStore rbm = new HybridIntKeyLookupStore(modulo, memSizeCap); + assertEquals(memSizeCap, rbm.getMemorySizeCap(), 1.0); + assertEquals(expectedMultipliers[i], rbm.getRBMMemBufferMultiplier(), delta); + assertEquals(expectedSlopes[i], rbm.getRBMMemSlope(), delta); + assertEquals(expectedIntercepts[i], rbm.getRBMMemIntercept(), delta); + } + } + + public void testMemoryCapBlocksTransitions() throws Exception { + double[] testModulos = new double[] { 0, Math.pow(2, 31), Math.pow(2, 29), Math.pow(2, 28), Math.pow(2, 26) }; + for (int i = 0; i < testModulos.length; i++) { + int modulo = (int) testModulos[i]; + double maxHashsetMemSize = HybridIntKeyLookupStore.getHashsetMemSize(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1); + double intArrMemSize = HybridIntKeyLookupStore.getIntArrMemSize(); + double minRBMMemSize = HybridIntKeyLookupStore.getRBMMemSizeWithModulo(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD, modulo); + + // test that transitions in data structure do indeed monotonically increase predicted memory size + assertTrue(maxHashsetMemSize < intArrMemSize); + assertTrue(intArrMemSize < minRBMMemSize); + + HybridIntKeyLookupStore kls = new HybridIntKeyLookupStore(modulo, intArrMemSize - 0.01); + for (int j = 0; j < HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1; j++) { + boolean didAdd = kls.add(j); + assertTrue(didAdd); + } + // now try to add one more, which would cause a transition and push us past the memory cap + assertFalse(kls.getIsAtCapacity()); + assertEquals("HashSet", kls.getCurrentStructure()); + boolean didAdd = kls.add(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1); + assertFalse(didAdd); + assertTrue(kls.getIsAtCapacity()); + assertEquals("HashSet", kls.getCurrentStructure()); + + kls = new HybridIntKeyLookupStore(modulo, minRBMMemSize); + for (int j = 0; j < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - 1; j++) { + didAdd = kls.add(j); + assertTrue(didAdd); + } + assertFalse(kls.getIsAtCapacity()); + didAdd = kls.add(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD); + assertFalse(didAdd); + assertTrue(kls.getIsAtCapacity()); + assertEquals("intArr", kls.getCurrentStructure()); + } + } + + public void testMemoryCapBlocksAdd() throws Exception { + double[] testModulos = new double[] { 0, Math.pow(2, 31), Math.pow(2, 29), Math.pow(2, 28), Math.pow(2, 26) }; + for (int i = 0; i < testModulos.length; i++) { + int modulo = (int) testModulos[i]; + + // test where max number of entries should be 3000 + double memSizeCap = HybridIntKeyLookupStore.HASHSET_MEM_SLOPE * 3000; + HybridIntKeyLookupStore kls = new HybridIntKeyLookupStore(modulo, memSizeCap); + for (int j = 0; j < 3500; j++) { + kls.add(j); + } + assertEquals(3000, kls.getSize()); + assertEquals("HashSet", kls.getCurrentStructure()); + + // test where max number of entries should be 999,999 (bounded at intArr size) + memSizeCap = HybridIntKeyLookupStore.getIntArrMemSize(); + kls = new HybridIntKeyLookupStore(modulo, memSizeCap); + for (int j = 0; j < 105000; j++) { + kls.add(j); + } + assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - 1, kls.getSize()); + assertEquals("intArr", kls.getCurrentStructure()); + + int maxEntries = 2342000; + memSizeCap = HybridIntKeyLookupStore.getRBMMemSizeWithModulo(maxEntries, modulo); + kls = new HybridIntKeyLookupStore(modulo, memSizeCap); + for (int j = 0; j < maxEntries + 1000; j++) { + kls.add(j); + } + assertTrue(Math.abs(maxEntries - kls.getSize()) < 2); // exact cap varies a small amount bc of floating point + } + } + + public void testConcurrency() throws Exception { + Random rand = Randomness.get(); + for (int j = 0; j < 5; j++) { // test with different numbers of threads + HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { + int numThreads = rand.nextInt(50) + 1; + ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads); + // In this test we want to add the first 200K numbers and check they're all correctly there. + // We do some duplicates too to ensure those aren't incorrectly added. + int amountToAdd = 200000; + ArrayList> wasAdded = new ArrayList<>(amountToAdd); // idk why i cant make an array??? + ArrayList> duplicatesWasAdded = new ArrayList<>(); + for (int i = 0; i < amountToAdd; i++) { + wasAdded.add(null); + } + for (int i = 0; i < amountToAdd; i++) { + final int val = i; + Future fut = executor.submit(() -> { + boolean didAdd; + try { + didAdd = kls.add(val); + } catch (Exception e) { + throw new RuntimeException(e); + } + return didAdd; + }); + wasAdded.set(val, fut); + if (val % 1000 == 0) { + // do a duplicate add + Future duplicateFut = executor.submit(() -> { + boolean didAdd; + try { + didAdd = kls.add(val); + } catch (Exception e) { + throw new RuntimeException(e); + } + return didAdd; + }); + duplicatesWasAdded.add(duplicateFut); + } + } + int originalAdds = 0; + int duplicateAdds = 0; + for (Future fut : wasAdded) { + if (fut.get()) { + originalAdds++; + } + } + for (Future duplicateFut : duplicatesWasAdded) { + if (duplicateFut.get()) { + duplicateAdds++; + } + } + for (int i = 0; i < amountToAdd; i++) { + assertTrue(kls.contains(i)); + } + assertEquals(amountToAdd, originalAdds + duplicateAdds); + assertEquals(amountToAdd, kls.getSize()); + assertEquals(amountToAdd / 1000, kls.getNumCollisions()); + executor.shutdown(); + } + } + } +} diff --git a/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java new file mode 100644 index 0000000000000..8430e387fdb33 --- /dev/null +++ b/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java @@ -0,0 +1,125 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.indices; + +// NOTE: Only new functionality is tested here. +// Inherited functionality is tested for both this class and the superclass in HybridIntKeyLookupStoreTests.java. + +public class RemovableHybridIntKeyLookupStoreTests extends org.apache.lucene.util.LuceneTestCase { + public void testRemoveNoCollisions() throws Exception { + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(0, 100.0); + // there should be no collisions for sequential positive numbers up to modulo + assertTrue(rkls.supportsRemoval()); + for (int i = 0; i < HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1; i++) { + rkls.add(i); + } + assertEquals("HashSet", rkls.getCurrentStructure()); + for (int i = 0; i < 1000; i++) { + assertTrue(rkls.remove(i)); + assertFalse(rkls.contains(i)); + assertFalse(rkls.valueHasHadCollision(i)); + } + assertEquals(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1001, rkls.getSize()); + for (int i = 0; i < 1000; i++) { + rkls.add(i); + assertFalse(rkls.valueHasHadCollision(i)); + } + + assertEquals(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1, rkls.getSize()); + for (int i = HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1; i < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + - 1; i++) { + rkls.add(i); + } + assertEquals("intArr", rkls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - 1, rkls.getSize()); + for (int i = 0; i < 1000; i++) { + assertTrue(rkls.remove(i)); + assertFalse(rkls.contains(i)); + assertFalse(rkls.valueHasHadCollision(i)); + } + for (int i = 0; i < 1000; i++) { + rkls.add(i); + assertFalse(rkls.valueHasHadCollision(i)); + } + + for (int i = HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - 1; i < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + 1000; i++) { + rkls.add(i); + } + assertEquals("RBM", rkls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + 1000, rkls.getSize()); + for (int i = 0; i < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + 1000; i++) { + assertTrue(rkls.remove(i)); + assertFalse(rkls.contains(i)); + assertFalse(rkls.valueHasHadCollision(i)); + } + assertEquals("RBM", rkls.getCurrentStructure()); + assertEquals(0, rkls.getSize()); + } + + public void testRemoveWithCollisions() throws Exception { + int modulo = (int) Math.pow(2, 26); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(modulo, 100.0); + for (int i = 0; i < 10; i++) { + rkls.add(i); + if (i % 2 == 0) { + rkls.add(-i); + assertTrue(rkls.valueHasHadCollision(i)); + } else { + assertFalse(rkls.valueHasHadCollision(i)); + } + } + assertEquals(10, rkls.getSize()); + for (int i = 0; i < 10; i++) { + boolean didRemove = rkls.remove(i); + if (i % 2 == 0) { + // we expect a collision with -i, so we can't remove + assertFalse(didRemove); + assertTrue(rkls.contains(i)); + } else { + // we expect no collision + assertTrue(didRemove); + assertFalse(rkls.contains(i)); + assertFalse(rkls.valueHasHadCollision(i)); + } + } + assertEquals(5, rkls.getSize()); + rkls.add(1); + for (int j = 1; j < 5; j++) { + rkls.add(1 + j * modulo); + } + assertEquals(6, rkls.getSize()); + assertFalse(rkls.remove(1 + modulo)); + assertTrue(rkls.valueHasHadCollision(1 + 15 * modulo)); + assertTrue(rkls.contains(1 + 17 * modulo)); + } +} From 5cdaf858f6d0d3ab7d34e39659c82bdb6c2548bc Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Fri, 22 Sep 2023 16:11:22 -0700 Subject: [PATCH 02/17] updated SHAs, did spotless check, etc --- server/licenses/RoaringBitmap-0.9.49.jar.sha1 | 1 + server/licenses/shims-0.9.49.jar.sha1 | 1 + .../opensearch/indices/HybridIntKeyLookupStore.java | 10 +++++++--- .../indices/HybridIntKeyLookupStoreTests.java | 6 ++++-- .../indices/RemovableHybridIntKeyLookupStoreTests.java | 4 +++- 5 files changed, 16 insertions(+), 6 deletions(-) create mode 100644 server/licenses/RoaringBitmap-0.9.49.jar.sha1 create mode 100644 server/licenses/shims-0.9.49.jar.sha1 diff --git a/server/licenses/RoaringBitmap-0.9.49.jar.sha1 b/server/licenses/RoaringBitmap-0.9.49.jar.sha1 new file mode 100644 index 0000000000000..919a73c074b6a --- /dev/null +++ b/server/licenses/RoaringBitmap-0.9.49.jar.sha1 @@ -0,0 +1 @@ +b45b49c1ec5c5fc48580412d0ca635e1833110ea \ No newline at end of file diff --git a/server/licenses/shims-0.9.49.jar.sha1 b/server/licenses/shims-0.9.49.jar.sha1 new file mode 100644 index 0000000000000..9e76614ca5207 --- /dev/null +++ b/server/licenses/shims-0.9.49.jar.sha1 @@ -0,0 +1 @@ +8bd7794fbdaa9536354dd2d8d961d9503beb9460 \ No newline at end of file diff --git a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java index d9fbde46f0bd7..f4adf49289a8e 100644 --- a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java @@ -32,12 +32,13 @@ package org.opensearch.indices; -import org.roaringbitmap.RoaringBitmap; -import java.util.HashSet; import java.util.Arrays; +import java.util.HashSet; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantReadWriteLock; +import org.roaringbitmap.RoaringBitmap; + /** * A store which dynamically switches its internal data structure from hash set to sorted int array * to roaring bitmap. For reasoning behind design decisions, see @@ -49,7 +50,10 @@ public class HybridIntKeyLookupStore implements IntKeyLookupStore { public static final int INTARR_TO_RBM_THRESHOLD = INTARR_SIZE; public static final double HASHSET_MEM_SLOPE = 6.46 * Math.pow(10, -6); // used to calculate memory usage - public enum StructureTypes { + /** + * Used to keep track of which structure is being used to store values. + */ + protected enum StructureTypes { HASHSET, INTARR, RBM diff --git a/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java index 6b45990d54fb2..44eb6f0f6b6a0 100644 --- a/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java @@ -31,8 +31,10 @@ package org.opensearch.indices; -import java.util.ArrayList; import org.opensearch.common.Randomness; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.ArrayList; import java.util.Random; import java.util.concurrent.Executors; import java.util.concurrent.Future; @@ -41,7 +43,7 @@ // Tests base functionality of HybridIntKeyLookupStore for both that class and the inheriting // RemovableHybridIntKeyLookupStore. -public class HybridIntKeyLookupStoreTests extends org.apache.lucene.util.LuceneTestCase { +public class HybridIntKeyLookupStoreTests extends OpenSearchTestCase { public void testInit() throws Exception { HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); diff --git a/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java index 8430e387fdb33..46fec6fad3b74 100644 --- a/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java @@ -32,10 +32,12 @@ package org.opensearch.indices; +import org.opensearch.test.OpenSearchTestCase; + // NOTE: Only new functionality is tested here. // Inherited functionality is tested for both this class and the superclass in HybridIntKeyLookupStoreTests.java. -public class RemovableHybridIntKeyLookupStoreTests extends org.apache.lucene.util.LuceneTestCase { +public class RemovableHybridIntKeyLookupStoreTests extends OpenSearchTestCase { public void testRemoveNoCollisions() throws Exception { RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(0, 100.0); // there should be no collisions for sequential positive numbers up to modulo From 7f5ade78dbc52999ce21e0b489177f3c7d68e141 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Fri, 22 Sep 2023 16:15:51 -0700 Subject: [PATCH 03/17] Adding implementations of IntKeyLookupStore. Signed-off-by: Peter Alfonsi --- .../java/org/opensearch/indices/HybridIntKeyLookupStore.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java index f4adf49289a8e..6fa565506a9ef 100644 --- a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java @@ -73,6 +73,7 @@ protected enum StructureTypes { protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); protected final Lock readLock = lock.readLock(); protected final Lock writeLock = lock.writeLock(); + // These are used to estimate RBM memory usage protected double RBMMemSlope; From bafda1ee17ef70bf8704cf5b13ceff1d91f12d5c Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 25 Sep 2023 09:18:00 -0700 Subject: [PATCH 04/17] Removed references to quip --- .../opensearch/indices/HybridIntKeyLookupStore.java | 11 ++--------- .../indices/RemovableHybridIntKeyLookupStore.java | 2 -- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java index 6fa565506a9ef..4463fd8687938 100644 --- a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java @@ -41,8 +41,7 @@ /** * A store which dynamically switches its internal data structure from hash set to sorted int array - * to roaring bitmap. For reasoning behind design decisions, see - * https://quip-amazon.com/JdWGAYm2doCm/Roaring-Bitmap-Performance-Testing + * to roaring bitmap. */ public class HybridIntKeyLookupStore implements IntKeyLookupStore { public static final int HASHSET_TO_INTARR_THRESHOLD = 5000; @@ -73,7 +72,7 @@ protected enum StructureTypes { protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); protected final Lock readLock = lock.readLock(); protected final Lock writeLock = lock.writeLock(); - + // These are used to estimate RBM memory usage protected double RBMMemSlope; @@ -413,8 +412,6 @@ public boolean isCollision(int value1, int value2) { protected static double[] memSizeHelperFunction(int modulo) { // Sets up values to help estimate RBM size given a modulo // Returns an array of {bufferMultiplier, slope, intercept} - // See https://quip-amazon.com/9Vl3A3kBq2bR/IntKeyLookupStore-Size-Estimates - // for an explanation of where these numbers came from double modifiedModulo; if (modulo == 0) { @@ -473,8 +470,6 @@ protected int calculateMaxNumEntries() { } protected static double getHashsetMemSize(int numEntries) { - // See https://quip-amazon.com/9Vl3A3kBq2bR/IntKeyLookupStore-Size-Estimates - // for an explanation of where these numbers came from return HASHSET_MEM_SLOPE * numEntries; } @@ -483,8 +478,6 @@ protected static double getIntArrMemSize() { } protected double getRBMMemSize(int numEntries) { - // See https://quip-amazon.com/9Vl3A3kBq2bR/IntKeyLookupStore-Size-Estimates - // for an explanation of where these numbers came from return Math.pow(numEntries, RBMMemSlope) * Math.pow(10, RBMMemIntercept) * RBMMemBufferMultiplier; } diff --git a/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java index ce6c441af972d..16e00b5bd63f3 100644 --- a/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java @@ -36,8 +36,6 @@ /** * A store which supports safe removal of keys by maintaining a hashset of values that have had collisions. - * For reasoning behind design decisions, see - * https://quip-amazon.com/JdWGAYm2doCm/Roaring-Bitmap-Performance-Testing */ public class RemovableHybridIntKeyLookupStore extends HybridIntKeyLookupStore implements IntKeyLookupStore { private HashSet collidedInts; From a4cf0192e31fe5cd032c0e4de3a2d9a4c0c767ef Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 25 Sep 2023 12:44:22 -0700 Subject: [PATCH 05/17] Addressed draft PR comments besides making interface generic (removed unneeded functions, added clear(), changed MB to bytes, misc renamings) --- .../indices/HybridIntKeyLookupStore.java | 93 ++++++++++--------- .../opensearch/indices/IntKeyLookupStore.java | 26 ++---- .../RemovableHybridIntKeyLookupStore.java | 14 ++- .../indices/HybridIntKeyLookupStoreTests.java | 80 ++++++++-------- ...RemovableHybridIntKeyLookupStoreTests.java | 6 +- 5 files changed, 113 insertions(+), 106 deletions(-) diff --git a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java index 4463fd8687938..fe17b52c87440 100644 --- a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java @@ -48,6 +48,7 @@ public class HybridIntKeyLookupStore implements IntKeyLookupStore { public static final int INTARR_SIZE = 100000; public static final int INTARR_TO_RBM_THRESHOLD = INTARR_SIZE; public static final double HASHSET_MEM_SLOPE = 6.46 * Math.pow(10, -6); // used to calculate memory usage + public static final int BYTES_IN_MB = 1048576; /** * Used to keep track of which structure is being used to store values. @@ -61,7 +62,7 @@ protected enum StructureTypes { protected StructureTypes currentStructure; protected final int modulo; protected int size; - protected double memSizeCap; // in MB + protected long memSizeCapInBytes; protected int numAddAttempts; protected int numCollisions; protected boolean guaranteesNoFalseNegatives; @@ -79,9 +80,9 @@ protected enum StructureTypes { protected double RBMMemBufferMultiplier; protected double RBMMemIntercept; protected int maxNumEntries; - protected boolean isAtCapacity; + protected boolean atCapacity; - public HybridIntKeyLookupStore(int modulo, double memSizeCap) { + public HybridIntKeyLookupStore(int modulo, long memSizeCapInBytes) { this.modulo = modulo; // A modulo of 0 means no modulo this.hashset = new HashSet(); this.currentStructure = StructureTypes.HASHSET; @@ -89,7 +90,7 @@ public HybridIntKeyLookupStore(int modulo, double memSizeCap) { this.numAddAttempts = 0; this.numCollisions = 0; this.guaranteesNoFalseNegatives = true; - this.memSizeCap = memSizeCap; // A cap of 0 means no cap + this.memSizeCapInBytes = memSizeCapInBytes ; // A cap of 0 means no cap memSizeInitFunction(); // Initialize values for RBM memory size estimates this.maxNumEntries = calculateMaxNumEntries(); } @@ -187,13 +188,13 @@ protected final void handleStructureSwitch() throws IllegalStateException { // w try { if (size == HASHSET_TO_INTARR_THRESHOLD - 1) { if (maxNumEntries <= HASHSET_TO_INTARR_THRESHOLD) { - isAtCapacity = true; + atCapacity = true; return; } switchHashsetToIntArr(); } else if (size == INTARR_TO_RBM_THRESHOLD - 1) { if (maxNumEntries <= INTARR_TO_RBM_THRESHOLD) { - isAtCapacity = true; + atCapacity = true; return; } switchIntArrToRBM(); @@ -227,10 +228,10 @@ public boolean add(int value) throws IllegalStateException { writeLock.lock(); try { if (size == maxNumEntries) { - isAtCapacity = true; + atCapacity = true; } - handleStructureSwitch(); // also might set isAtCapacity - if (!isAtCapacity) { + handleStructureSwitch(); // also might set atCapacity + if (!atCapacity) { numAddAttempts++; int transformedValue = transform(value); @@ -394,16 +395,6 @@ public String getCurrentStructure() throws IllegalStateException { } } - @Override - public int getModulo() { - return modulo; - } - - @Override - public boolean isUsingNegativeOnly() { - return true; - } - @Override public boolean isCollision(int value1, int value2) { return transform(value1) == transform(value2); @@ -450,58 +441,67 @@ protected void memSizeInitFunction() { } protected int calculateMaxNumEntries() { - double maxHashsetMemSize = HybridIntKeyLookupStore.getHashsetMemSize(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1); - double intArrMemSize = HybridIntKeyLookupStore.getIntArrMemSize(); - double minRBMMemSize = HybridIntKeyLookupStore.getRBMMemSizeWithModulo(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD, modulo); + double maxHashsetMemSize = getHashsetMemSizeInBytes(HASHSET_TO_INTARR_THRESHOLD - 1); + double intArrMemSize = getIntArrMemSizeInBytes(); + double minRBMMemSize = getRBMMemSizeWithModuloInBytes(INTARR_TO_RBM_THRESHOLD, modulo); - if (memSizeCap == 0) { + if (memSizeCapInBytes == 0) { return Integer.MAX_VALUE; } - if (memSizeCap >= minRBMMemSize) { + if (memSizeCapInBytes >= minRBMMemSize) { // max number of elements will be when we have an RBM - return (int) Math.pow(memSizeCap / (this.RBMMemBufferMultiplier * Math.pow(10, this.RBMMemIntercept)), 1 / this.RBMMemSlope); + // coefficients for memory calculations were done in MB, so we convert here + return (int) Math.pow(convertBytesToMB(memSizeCapInBytes) / (this.RBMMemBufferMultiplier * Math.pow(10, this.RBMMemIntercept)), 1 / this.RBMMemSlope); } - if (memSizeCap < intArrMemSize) { + if (memSizeCapInBytes < intArrMemSize) { // max number of elements will be when we have a hash set - return Math.min((int) (memSizeCap / HASHSET_MEM_SLOPE), HASHSET_TO_INTARR_THRESHOLD - 1); + return Math.min((int) (convertBytesToMB(memSizeCapInBytes) / HASHSET_MEM_SLOPE), HASHSET_TO_INTARR_THRESHOLD - 1); } // max number of elements will be when we have an intArr - return HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - 1; + return INTARR_TO_RBM_THRESHOLD - 1; } - protected static double getHashsetMemSize(int numEntries) { - return HASHSET_MEM_SLOPE * numEntries; + protected static long getHashsetMemSizeInBytes(int numEntries) { + return convertMBToBytes(HASHSET_MEM_SLOPE * numEntries); } - protected static double getIntArrMemSize() { - return (4 * INTARR_SIZE + 24) / (Math.pow(2, 20)); + protected static long getIntArrMemSizeInBytes() { + return (long) (4 * INTARR_SIZE + 24); } - protected double getRBMMemSize(int numEntries) { - return Math.pow(numEntries, RBMMemSlope) * Math.pow(10, RBMMemIntercept) * RBMMemBufferMultiplier; + protected long getRBMMemSizeInBytes(int numEntries) { + return convertMBToBytes(Math.pow(numEntries, RBMMemSlope) * Math.pow(10, RBMMemIntercept) * RBMMemBufferMultiplier); } - protected static double getRBMMemSizeWithModulo(int numEntries, int modulo) { + protected static long getRBMMemSizeWithModuloInBytes(int numEntries, int modulo) { double[] memSizeValues = memSizeHelperFunction(modulo); - return Math.pow(numEntries, memSizeValues[1]) * Math.pow(10, memSizeValues[2]) * memSizeValues[0]; + return convertMBToBytes(Math.pow(numEntries, memSizeValues[1]) * Math.pow(10, memSizeValues[2]) * memSizeValues[0]); + } + + protected static long convertMBToBytes(double valMB) { + return (long) (valMB * BYTES_IN_MB); + } + + protected static double convertBytesToMB(long valBytes) { + return (double) valBytes / BYTES_IN_MB; } @Override - public double getMemorySize() { + public long getMemorySizeInBytes() { switch (currentStructure) { case HASHSET: - return getHashsetMemSize(size); + return getHashsetMemSizeInBytes(size); case INTARR: - return getIntArrMemSize(); + return getIntArrMemSizeInBytes(); case RBM: - return getRBMMemSize(size); + return getRBMMemSizeInBytes(size); } return 0; } @Override - public double getMemorySizeCap() { - return memSizeCap; + public long getMemorySizeCapInBytes() { + return memSizeCapInBytes; } public double getRBMMemSlope() { @@ -521,8 +521,8 @@ public int getMaxNumEntries() { } @Override - public boolean getIsAtCapacity() { - return isAtCapacity; + public boolean isAtCapacity() { + return atCapacity; } @Override @@ -540,4 +540,9 @@ public void regenerateStore(int[] newValues) throws IllegalStateException { add(value); } } + + @Override + public void clear() { + regenerateStore(new int[]{}); + } } diff --git a/server/src/main/java/org/opensearch/indices/IntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/IntKeyLookupStore.java index 60a65ede756ef..be39c954fb0bc 100644 --- a/server/src/main/java/org/opensearch/indices/IntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/IntKeyLookupStore.java @@ -117,19 +117,6 @@ public interface IntKeyLookupStore { */ String getCurrentStructure() throws Exception; - /** - * If the structure uses a roaring bitmap with a modulo at some point, returns that modulo. - * If it uses a RBM without a modulo or doesn't use an RBM, returns 0. - * @return The modulo. - */ - int getModulo(); - - /** - * Returns true if the transformation involves taking -abs(), simplifying int[] access and sorting - * @return Whether transformed values are always negative. - */ - boolean isUsingNegativeOnly(); - /** * Checks if two values would collide after being transformed by this store's transformation. * @param value1 The first value to compare. @@ -142,18 +129,18 @@ public interface IntKeyLookupStore { * Returns an estimate of the store's memory usage. * @return The memory usage, in MB */ - double getMemorySize(); + long getMemorySizeInBytes(); /** * Returns the cap for the store's memory usage. - * @return The cap, in MB + * @return The cap, in bytes */ - double getMemorySizeCap(); + long getMemorySizeCapInBytes(); /** * Returns whether the store is at memory capacity */ - boolean getIsAtCapacity(); + boolean isAtCapacity(); /** * Deletes the internal data structure and regenerates it from the values passed in. @@ -161,4 +148,9 @@ public interface IntKeyLookupStore { * @param newValues The keys that should be in the reset structure. */ void regenerateStore(int[] newValues) throws Exception; + + /** + * Deletes all keys and resets all stats related to adding. + */ + void clear(); } diff --git a/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java index 16e00b5bd63f3..d3456c59c3d13 100644 --- a/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java @@ -42,8 +42,8 @@ public class RemovableHybridIntKeyLookupStore extends HybridIntKeyLookupStore im private int numRemovalAttempts; private int numSuccessfulRemovals; - RemovableHybridIntKeyLookupStore(int modulo, double memSizeCap) { - super(modulo, memSizeCap); + RemovableHybridIntKeyLookupStore(int modulo, long memSizeCapInBytes) { + super(modulo, memSizeCapInBytes); collidedInts = new HashSet<>(); numRemovalAttempts = 0; numSuccessfulRemovals = 0; @@ -87,8 +87,14 @@ public boolean remove(int value) throws IllegalStateException { } @Override - public double getMemorySize() { - return super.getMemorySize() + getHashsetMemSize(collidedInts.size()); + public long getMemorySizeInBytes() { + return super.getMemorySizeInBytes() + getHashsetMemSizeInBytes(collidedInts.size()); + } + + @Override + public void regenerateStore(int[] newValues) throws IllegalStateException { + collidedInts = new HashSet<>(); + super.regenerateStore(newValues); } public int getNumRemovalAttempts() { diff --git a/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java index 44eb6f0f6b6a0..e7e83d05213b1 100644 --- a/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java @@ -45,8 +45,8 @@ public class HybridIntKeyLookupStoreTests extends OpenSearchTestCase { public void testInit() throws Exception { - HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { assertEquals("HashSet", kls.getCurrentStructure()); assertEquals(0, kls.getSize()); @@ -54,8 +54,8 @@ public void testInit() throws Exception { } public void testStructureTransitions() throws Exception { - HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { for (int i = 0; i < HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD; i++) { kls.add(i); @@ -71,8 +71,8 @@ public void testStructureTransitions() throws Exception { } public void testArrayLogic() throws Exception { - HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { Random rand = Randomness.get(); int numToAdd = 50000; @@ -97,8 +97,8 @@ public void testArrayLogic() throws Exception { public void testTransformationLogic() throws Exception { int modulo = (int) Math.pow(2, 29); - HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore(modulo, 0.0); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(modulo, 0.0); + HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore(modulo, 0L); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(modulo, 0L); for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { int offset = 3; for (int i = 0; i < 4; i++) { // after this we run into max value, but thats not a flaw with the class design @@ -119,8 +119,8 @@ public void testTransformationLogic() throws Exception { } public void testContainsAndForceRemove() throws Exception { - HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { for (int i = 0; i < 2000; i++) { kls.add(i); @@ -158,11 +158,9 @@ public void testContainsAndForceRemove() throws Exception { public void testAddingStatsGetters() throws Exception { int modulo = (int) Math.pow(2, 15); - HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore(modulo, 0.0); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(modulo, 0.0); + HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore(modulo, 0L); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(modulo, 0L); for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { - assertEquals(modulo, kls.getModulo()); - kls.add(15); kls.add(-15); assertEquals(2, kls.getNumAddAttempts()); @@ -178,8 +176,8 @@ public void testAddingStatsGetters() throws Exception { } public void testRegenerateStore() throws Exception { - HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { Random rand = Randomness.get(); int[] resetNumbers = new int[] { @@ -199,12 +197,16 @@ public void testRegenerateStore() throws Exception { assertTrue(kls.getSize() >= resetNum); assertTrue(kls.getSize() <= newVals.length); } + // test clear() + kls.clear(); + assertEquals("HashSet", kls.getCurrentStructure()); + assertEquals(0, kls.getSize()); } } public void testAddingDuplicates() throws Exception { - HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { for (int i = 0; i < HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1; i++) { kls.add(i); @@ -240,15 +242,15 @@ public void testMemoryCapValueInitialization() { double[] expectedMultipliers = new double[] { 1.35, 1.35, 1.6, 1.6, 1.6, 1.6 }; double[] expectedSlopes = new double[] { 0.69, 0.69, 0.75, 0.75, 0.88, 0.88 }; double[] expectedIntercepts = new double[] { -3, -3, -3.5, -3.5, -4.5, -4.5 }; - double memSizeCap = 100.0; + long memSizeCapInBytes = (long) 100.0 * HybridIntKeyLookupStore.BYTES_IN_MB; double delta = 0.01; for (int i = 0; i < logModulos.length; i++) { int modulo = 0; if (logModulos[i] != 0) { modulo = (int) Math.pow(2, logModulos[i]); } - HybridIntKeyLookupStore rbm = new HybridIntKeyLookupStore(modulo, memSizeCap); - assertEquals(memSizeCap, rbm.getMemorySizeCap(), 1.0); + HybridIntKeyLookupStore rbm = new HybridIntKeyLookupStore(modulo, memSizeCapInBytes); + assertEquals(rbm.memSizeCapInBytes, rbm.getMemorySizeCapInBytes(), 1.0); assertEquals(expectedMultipliers[i], rbm.getRBMMemBufferMultiplier(), delta); assertEquals(expectedSlopes[i], rbm.getRBMMemSlope(), delta); assertEquals(expectedIntercepts[i], rbm.getRBMMemIntercept(), delta); @@ -259,25 +261,25 @@ public void testMemoryCapBlocksTransitions() throws Exception { double[] testModulos = new double[] { 0, Math.pow(2, 31), Math.pow(2, 29), Math.pow(2, 28), Math.pow(2, 26) }; for (int i = 0; i < testModulos.length; i++) { int modulo = (int) testModulos[i]; - double maxHashsetMemSize = HybridIntKeyLookupStore.getHashsetMemSize(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1); - double intArrMemSize = HybridIntKeyLookupStore.getIntArrMemSize(); - double minRBMMemSize = HybridIntKeyLookupStore.getRBMMemSizeWithModulo(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD, modulo); + long maxHashsetMemSize = HybridIntKeyLookupStore.getHashsetMemSizeInBytes(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1); + long intArrMemSize = HybridIntKeyLookupStore.getIntArrMemSizeInBytes(); + long minRBMMemSize = HybridIntKeyLookupStore.getRBMMemSizeWithModuloInBytes(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD, modulo); // test that transitions in data structure do indeed monotonically increase predicted memory size assertTrue(maxHashsetMemSize < intArrMemSize); assertTrue(intArrMemSize < minRBMMemSize); - HybridIntKeyLookupStore kls = new HybridIntKeyLookupStore(modulo, intArrMemSize - 0.01); + HybridIntKeyLookupStore kls = new HybridIntKeyLookupStore(modulo, intArrMemSize - 1000); for (int j = 0; j < HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1; j++) { boolean didAdd = kls.add(j); assertTrue(didAdd); } // now try to add one more, which would cause a transition and push us past the memory cap - assertFalse(kls.getIsAtCapacity()); + assertFalse(kls.isAtCapacity()); assertEquals("HashSet", kls.getCurrentStructure()); boolean didAdd = kls.add(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1); assertFalse(didAdd); - assertTrue(kls.getIsAtCapacity()); + assertTrue(kls.isAtCapacity()); assertEquals("HashSet", kls.getCurrentStructure()); kls = new HybridIntKeyLookupStore(modulo, minRBMMemSize); @@ -285,10 +287,10 @@ public void testMemoryCapBlocksTransitions() throws Exception { didAdd = kls.add(j); assertTrue(didAdd); } - assertFalse(kls.getIsAtCapacity()); + assertFalse(kls.isAtCapacity()); didAdd = kls.add(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD); assertFalse(didAdd); - assertTrue(kls.getIsAtCapacity()); + assertTrue(kls.isAtCapacity()); assertEquals("intArr", kls.getCurrentStructure()); } } @@ -299,17 +301,17 @@ public void testMemoryCapBlocksAdd() throws Exception { int modulo = (int) testModulos[i]; // test where max number of entries should be 3000 - double memSizeCap = HybridIntKeyLookupStore.HASHSET_MEM_SLOPE * 3000; - HybridIntKeyLookupStore kls = new HybridIntKeyLookupStore(modulo, memSizeCap); + long memSizeCapInBytes = (long) (HybridIntKeyLookupStore.HASHSET_MEM_SLOPE * 3000 * HybridIntKeyLookupStore.BYTES_IN_MB); + HybridIntKeyLookupStore kls = new HybridIntKeyLookupStore(modulo, memSizeCapInBytes); for (int j = 0; j < 3500; j++) { kls.add(j); } - assertEquals(3000, kls.getSize()); + assertTrue(Math.abs(3000 - kls.getSize()) < 2); // double --> long conversion adds a bit of lossiness assertEquals("HashSet", kls.getCurrentStructure()); // test where max number of entries should be 999,999 (bounded at intArr size) - memSizeCap = HybridIntKeyLookupStore.getIntArrMemSize(); - kls = new HybridIntKeyLookupStore(modulo, memSizeCap); + memSizeCapInBytes = HybridIntKeyLookupStore.getIntArrMemSizeInBytes(); + kls = new HybridIntKeyLookupStore(modulo, memSizeCapInBytes); for (int j = 0; j < 105000; j++) { kls.add(j); } @@ -317,20 +319,20 @@ public void testMemoryCapBlocksAdd() throws Exception { assertEquals("intArr", kls.getCurrentStructure()); int maxEntries = 2342000; - memSizeCap = HybridIntKeyLookupStore.getRBMMemSizeWithModulo(maxEntries, modulo); - kls = new HybridIntKeyLookupStore(modulo, memSizeCap); + memSizeCapInBytes = HybridIntKeyLookupStore.getRBMMemSizeWithModuloInBytes(maxEntries, modulo); + kls = new HybridIntKeyLookupStore(modulo, memSizeCapInBytes); for (int j = 0; j < maxEntries + 1000; j++) { kls.add(j); } - assertTrue(Math.abs(maxEntries - kls.getSize()) < 2); // exact cap varies a small amount bc of floating point + assertTrue(Math.abs(maxEntries - kls.getSize()) < 5); // exact cap varies a small amount bc of floating point } } public void testConcurrency() throws Exception { Random rand = Randomness.get(); for (int j = 0; j < 5; j++) { // test with different numbers of threads - HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0.0); + HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { int numThreads = rand.nextInt(50) + 1; ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads); diff --git a/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java index 46fec6fad3b74..0fb63f79ed9b2 100644 --- a/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java @@ -39,7 +39,8 @@ public class RemovableHybridIntKeyLookupStoreTests extends OpenSearchTestCase { public void testRemoveNoCollisions() throws Exception { - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(0, 100.0); + long memCap = 100L * HybridIntKeyLookupStore.BYTES_IN_MB; + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(0, memCap); // there should be no collisions for sequential positive numbers up to modulo assertTrue(rkls.supportsRemoval()); for (int i = 0; i < HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1; i++) { @@ -90,7 +91,8 @@ public void testRemoveNoCollisions() throws Exception { public void testRemoveWithCollisions() throws Exception { int modulo = (int) Math.pow(2, 26); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(modulo, 100.0); + long memCap = 100L * HybridIntKeyLookupStore.BYTES_IN_MB; + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(modulo, memCap); for (int i = 0; i < 10; i++) { rkls.add(i); if (i % 2 == 0) { From 6ef29629d82cdb75a027d9fe8125e576d758c9dc Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 25 Sep 2023 14:24:53 -0700 Subject: [PATCH 06/17] Moved RBM memory size estimation logic into its own class to prepare for an RBM-only store --- .../indices/HybridIntKeyLookupStore.java | 63 ++-------- .../opensearch/indices/RBMSizeEstimator.java | 108 ++++++++++++++++++ 2 files changed, 119 insertions(+), 52 deletions(-) create mode 100644 server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java diff --git a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java index fe17b52c87440..0009f22a12810 100644 --- a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java @@ -75,10 +75,9 @@ protected enum StructureTypes { protected final Lock writeLock = lock.writeLock(); - // These are used to estimate RBM memory usage - protected double RBMMemSlope; - protected double RBMMemBufferMultiplier; - protected double RBMMemIntercept; + // Used to estimate RBM memory usage + protected RBMSizeEstimator sizeEstimator; + protected int maxNumEntries; protected boolean atCapacity; @@ -91,7 +90,8 @@ public HybridIntKeyLookupStore(int modulo, long memSizeCapInBytes) { this.numCollisions = 0; this.guaranteesNoFalseNegatives = true; this.memSizeCapInBytes = memSizeCapInBytes ; // A cap of 0 means no cap - memSizeInitFunction(); // Initialize values for RBM memory size estimates + this.sizeEstimator = new RBMSizeEstimator(modulo / 2); + // The effective modulo is halved compared to tests because of taking only negative values for the sorted int array this.maxNumEntries = calculateMaxNumEntries(); } @@ -400,46 +400,6 @@ public boolean isCollision(int value1, int value2) { return transform(value1) == transform(value2); } - protected static double[] memSizeHelperFunction(int modulo) { - // Sets up values to help estimate RBM size given a modulo - // Returns an array of {bufferMultiplier, slope, intercept} - - double modifiedModulo; - if (modulo == 0) { - modifiedModulo = 31.0; - } else { - modifiedModulo = Math.log(0.5 * modulo) / Math.log(2); - } - // Note the effective modulos are 0.5x compared to tests, since we also use only negative numbers due to the intArr - double highCutoff = 29.001; // Floating point makes 29 not work - double lowCutoff = 28.0; - double bufferMultiplier = 1.35; - if (modifiedModulo <= highCutoff) { - bufferMultiplier = 1.6; - } - - double slope; - double intercept; - if (modifiedModulo > highCutoff) { - slope = 0.69; - intercept = -3; - } else if (modifiedModulo >= lowCutoff) { - slope = 0.75; - intercept = -3.5; - } else { - slope = 0.88; - intercept = -4.5; - } - return new double[] { bufferMultiplier, slope, intercept }; - } - - protected void memSizeInitFunction() { - double[] memSizeValues = memSizeHelperFunction(modulo); - this.RBMMemBufferMultiplier = memSizeValues[0]; - this.RBMMemSlope = memSizeValues[1]; - this.RBMMemIntercept = memSizeValues[2]; - } - protected int calculateMaxNumEntries() { double maxHashsetMemSize = getHashsetMemSizeInBytes(HASHSET_TO_INTARR_THRESHOLD - 1); double intArrMemSize = getIntArrMemSizeInBytes(); @@ -451,7 +411,7 @@ protected int calculateMaxNumEntries() { if (memSizeCapInBytes >= minRBMMemSize) { // max number of elements will be when we have an RBM // coefficients for memory calculations were done in MB, so we convert here - return (int) Math.pow(convertBytesToMB(memSizeCapInBytes) / (this.RBMMemBufferMultiplier * Math.pow(10, this.RBMMemIntercept)), 1 / this.RBMMemSlope); + return sizeEstimator.getNumEntriesFromSizeInMB(convertBytesToMB(memSizeCapInBytes)); } if (memSizeCapInBytes < intArrMemSize) { // max number of elements will be when we have a hash set @@ -470,12 +430,11 @@ protected static long getIntArrMemSizeInBytes() { } protected long getRBMMemSizeInBytes(int numEntries) { - return convertMBToBytes(Math.pow(numEntries, RBMMemSlope) * Math.pow(10, RBMMemIntercept) * RBMMemBufferMultiplier); + return convertMBToBytes(sizeEstimator.getSizeInMB(numEntries)); } protected static long getRBMMemSizeWithModuloInBytes(int numEntries, int modulo) { - double[] memSizeValues = memSizeHelperFunction(modulo); - return convertMBToBytes(Math.pow(numEntries, memSizeValues[1]) * Math.pow(10, memSizeValues[2]) * memSizeValues[0]); + return convertMBToBytes(RBMSizeEstimator.getSizeWithModuloInMB(numEntries, modulo / 2)); } protected static long convertMBToBytes(double valMB) { @@ -505,15 +464,15 @@ public long getMemorySizeCapInBytes() { } public double getRBMMemSlope() { - return RBMMemSlope; + return sizeEstimator.getSlope(); } public double getRBMMemBufferMultiplier() { - return RBMMemBufferMultiplier; + return sizeEstimator.getBufferMultiplier(); } public double getRBMMemIntercept() { - return RBMMemIntercept; + return sizeEstimator.getIntercept(); } public int getMaxNumEntries() { diff --git a/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java b/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java new file mode 100644 index 0000000000000..c4a203a66d80b --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java @@ -0,0 +1,108 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.indices; + +/** + * A class used to estimate roaring bitmap memory sizes. + * An instance is made with a particular modulo to avoid recomputing + * values. + */ +public class RBMSizeEstimator { + public static final double HASHSET_MEM_SLOPE = 6.46 * Math.pow(10, -6); + protected double slope; + protected double bufferMultiplier; + protected double intercept; + protected int modulo; + RBMSizeEstimator(int modulo) { + this.modulo = modulo; + double[] memSizeValues = calculateMemoryCoefficients(modulo); + this.bufferMultiplier = memSizeValues[0]; + this.slope = memSizeValues[1]; + this.intercept = memSizeValues[2]; + } + public static double[] calculateMemoryCoefficients(int modulo) { + // Sets up values to help estimate RBM size given a modulo + // Returns an array of {bufferMultiplier, slope, intercept} + + double modifiedModulo; + if (modulo == 0) { + modifiedModulo = 32.0; + } else { + modifiedModulo = Math.log(modulo) / Math.log(2); + } + // The effective modulo should be passed in - aka 0.5 * modulo for a hybrid store + double highCutoff = 29.001; // Floating point makes 29 not work + double lowCutoff = 28.0; + double bufferMultiplier = 1.35; + if (modifiedModulo <= highCutoff) { + bufferMultiplier = 1.6; + } + + double slope; + double intercept; + if (modifiedModulo > highCutoff) { + slope = 0.69; + intercept = -3; + } else if (modifiedModulo >= lowCutoff) { + slope = 0.75; + intercept = -3.5; + } else { + slope = 0.88; + intercept = -4.5; + } + return new double[] { bufferMultiplier, slope, intercept }; + } + public static double getSizeWithModuloInMB(int numEntries, int modulo) { + double[] memCoefs = calculateMemoryCoefficients(modulo); + return Math.pow(numEntries, memCoefs[1]) * Math.pow(10, memCoefs[2]) * memCoefs[0]; + } + + public double getSizeInMB(int numEntries) { + return Math.pow(numEntries, slope) * Math.pow(10, intercept) * bufferMultiplier; + } + + public int getNumEntriesFromSizeInMB(double sizeInMB) { + return (int) Math.pow(sizeInMB / (bufferMultiplier * Math.pow(10, intercept)), 1 / slope); + } + + public double getSlope() { + return slope; + } + public double getIntercept() { + return intercept; + } + public double getBufferMultiplier() { + return bufferMultiplier; + } + +} From 776f3ca1a50f6523efe1d79213fb2f8d65a30d0f Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 25 Sep 2023 15:18:11 -0700 Subject: [PATCH 07/17] Added and tested new implementing class which only uses an RBM internally --- .../indices/HybridIntKeyLookupStore.java | 21 +- .../opensearch/indices/IntKeyLookupStore.java | 2 +- .../indices/RBMIntKeyLookupStore.java | 224 +++++++++++++++++ .../opensearch/indices/RBMSizeEstimator.java | 13 +- .../indices/HybridIntKeyLookupStoreTests.java | 4 +- .../indices/RBMIntKeyLookupStoreTests.java | 229 ++++++++++++++++++ ...RemovableHybridIntKeyLookupStoreTests.java | 4 +- 7 files changed, 476 insertions(+), 21 deletions(-) create mode 100644 server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java create mode 100644 server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java diff --git a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java index 0009f22a12810..685f04e1b8cbe 100644 --- a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java @@ -48,7 +48,6 @@ public class HybridIntKeyLookupStore implements IntKeyLookupStore { public static final int INTARR_SIZE = 100000; public static final int INTARR_TO_RBM_THRESHOLD = INTARR_SIZE; public static final double HASHSET_MEM_SLOPE = 6.46 * Math.pow(10, -6); // used to calculate memory usage - public static final int BYTES_IN_MB = 1048576; /** * Used to keep track of which structure is being used to store values. @@ -89,7 +88,7 @@ public HybridIntKeyLookupStore(int modulo, long memSizeCapInBytes) { this.numAddAttempts = 0; this.numCollisions = 0; this.guaranteesNoFalseNegatives = true; - this.memSizeCapInBytes = memSizeCapInBytes ; // A cap of 0 means no cap + this.memSizeCapInBytes = memSizeCapInBytes; // A cap of 0 means no cap this.sizeEstimator = new RBMSizeEstimator(modulo / 2); // The effective modulo is halved compared to tests because of taking only negative values for the sorted int array this.maxNumEntries = calculateMaxNumEntries(); @@ -411,18 +410,18 @@ protected int calculateMaxNumEntries() { if (memSizeCapInBytes >= minRBMMemSize) { // max number of elements will be when we have an RBM // coefficients for memory calculations were done in MB, so we convert here - return sizeEstimator.getNumEntriesFromSizeInMB(convertBytesToMB(memSizeCapInBytes)); + return sizeEstimator.getNumEntriesFromSizeInMB(RBMSizeEstimator.convertBytesToMB(memSizeCapInBytes)); } if (memSizeCapInBytes < intArrMemSize) { // max number of elements will be when we have a hash set - return Math.min((int) (convertBytesToMB(memSizeCapInBytes) / HASHSET_MEM_SLOPE), HASHSET_TO_INTARR_THRESHOLD - 1); + return Math.min((int) (RBMSizeEstimator.convertBytesToMB(memSizeCapInBytes) / HASHSET_MEM_SLOPE), HASHSET_TO_INTARR_THRESHOLD - 1); } // max number of elements will be when we have an intArr return INTARR_TO_RBM_THRESHOLD - 1; } protected static long getHashsetMemSizeInBytes(int numEntries) { - return convertMBToBytes(HASHSET_MEM_SLOPE * numEntries); + return RBMSizeEstimator.convertMBToBytes(HASHSET_MEM_SLOPE * numEntries); } protected static long getIntArrMemSizeInBytes() { @@ -430,20 +429,14 @@ protected static long getIntArrMemSizeInBytes() { } protected long getRBMMemSizeInBytes(int numEntries) { - return convertMBToBytes(sizeEstimator.getSizeInMB(numEntries)); + return RBMSizeEstimator.convertMBToBytes(sizeEstimator.getSizeInMB(numEntries)); } protected static long getRBMMemSizeWithModuloInBytes(int numEntries, int modulo) { - return convertMBToBytes(RBMSizeEstimator.getSizeWithModuloInMB(numEntries, modulo / 2)); + return RBMSizeEstimator.convertMBToBytes(RBMSizeEstimator.getSizeWithModuloInMB(numEntries, modulo / 2)); } - protected static long convertMBToBytes(double valMB) { - return (long) (valMB * BYTES_IN_MB); - } - protected static double convertBytesToMB(long valBytes) { - return (double) valBytes / BYTES_IN_MB; - } @Override public long getMemorySizeInBytes() { @@ -501,7 +494,7 @@ public void regenerateStore(int[] newValues) throws IllegalStateException { } @Override - public void clear() { + public void clear() throws Exception { regenerateStore(new int[]{}); } } diff --git a/server/src/main/java/org/opensearch/indices/IntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/IntKeyLookupStore.java index be39c954fb0bc..349529aaf1d69 100644 --- a/server/src/main/java/org/opensearch/indices/IntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/IntKeyLookupStore.java @@ -152,5 +152,5 @@ public interface IntKeyLookupStore { /** * Deletes all keys and resets all stats related to adding. */ - void clear(); + void clear() throws Exception; } diff --git a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java new file mode 100644 index 0000000000000..def2e162aef39 --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java @@ -0,0 +1,224 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.indices; + +import org.roaringbitmap.RoaringBitmap; + +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +public class RBMIntKeyLookupStore implements IntKeyLookupStore { + // This class shares a lot of the same fields with HybridIntKeyLookupStore, but basically none of the logic + // besides getters, so I decided against making it the superclass to HybridIntKeyLookupStore + protected final int modulo; + protected int size; + protected long memSizeCapInBytes; + protected int numAddAttempts; + protected int numCollisions; + protected boolean guaranteesNoFalseNegatives; + protected RoaringBitmap rbm; + protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + protected final Lock readLock = lock.readLock(); + protected final Lock writeLock = lock.writeLock(); + protected RBMSizeEstimator sizeEstimator; + protected int maxNumEntries; + protected boolean atCapacity; + + RBMIntKeyLookupStore(int modulo, long memSizeCapInBytes) { + this.modulo = modulo; + this.size = 0; + this.numAddAttempts = 0; + this.numCollisions = 0; + this.guaranteesNoFalseNegatives = true; + this.memSizeCapInBytes = memSizeCapInBytes; // A cap of 0 means no cap + this.sizeEstimator = new RBMSizeEstimator(modulo); + this.maxNumEntries = calculateMaxNumEntries(); + this.rbm = new RoaringBitmap(); + } + + protected int calculateMaxNumEntries() { + if (memSizeCapInBytes == 0) { + return Integer.MAX_VALUE; + } + return sizeEstimator.getNumEntriesFromSizeInMB(RBMSizeEstimator.convertBytesToMB(memSizeCapInBytes)); + } + + protected final int transform(int value) { + return modulo == 0 ? value : value % modulo; + } + + protected void handleCollisions(int transformedValue) { + numCollisions++; + } + + @Override + public boolean add(int value) throws Exception { + writeLock.lock(); + numAddAttempts++; + try { + if (size == maxNumEntries) { + atCapacity = true; + return false; + } + int transformedValue = transform(value); + boolean alreadyContained = contains(transformedValue); + if (!alreadyContained) { + rbm.add(transformedValue); + size++; + return true; + } + handleCollisions(transformedValue); + return false; + } finally { + writeLock.unlock(); + } + } + + @Override + public boolean contains(int value) throws Exception { + int transformedValue = transform(value); + readLock.lock(); + try { + return rbm.contains(transformedValue); + } finally { + readLock.unlock(); + } + } + + @Override + public int getInternalRepresentation(int value) { + return transform(value); + } + + @Override + public boolean remove(int value) throws Exception { + return false; + } + + @Override + public boolean supportsRemoval() { + return false; + } + + @Override + public void forceRemove(int value) throws Exception { + writeLock.lock(); + guaranteesNoFalseNegatives = false; + try { + int transformedValue = transform(value); + rbm.remove(transformedValue); + size--; + } finally { + writeLock.unlock(); + } + } + + @Override + public boolean canHaveFalseNegatives() { + return !guaranteesNoFalseNegatives; + } + + @Override + public int getSize() { + readLock.lock(); + try { + return size; + } finally { + readLock.unlock(); + } + } + + @Override + public int getNumAddAttempts() { + return numAddAttempts; + } + + @Override + public int getNumCollisions() { + return numCollisions; + } + + @Override + public String getCurrentStructure() throws Exception { + return "RBM"; + } + + @Override + public boolean isCollision(int value1, int value2) { + return transform(value1) == transform(value2); + } + + @Override + public long getMemorySizeInBytes() { + return RBMSizeEstimator.convertMBToBytes(sizeEstimator.getSizeInMB(size)); + } + + @Override + public long getMemorySizeCapInBytes() { + return memSizeCapInBytes; + } + + @Override + public boolean isAtCapacity() { + return atCapacity; + } + + @Override + public void regenerateStore(int[] newValues) throws Exception { + rbm.clear(); + size = 0; + this.numAddAttempts = 0; + this.numCollisions = 0; + this.guaranteesNoFalseNegatives = true; + for (int newValue : newValues) { + add(newValue); + } + } + + @Override + public void clear() throws Exception { + regenerateStore(new int[]{}); + } + + public double getRBMMemSlope() { + return sizeEstimator.getSlope(); + } + + public double getRBMMemBufferMultiplier() { + return sizeEstimator.getBufferMultiplier(); + } + + public double getRBMMemIntercept() { + return sizeEstimator.getIntercept(); + } +} diff --git a/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java b/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java index c4a203a66d80b..c761cb310a7dc 100644 --- a/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java +++ b/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java @@ -38,11 +38,13 @@ * values. */ public class RBMSizeEstimator { - public static final double HASHSET_MEM_SLOPE = 6.46 * Math.pow(10, -6); + public static final int BYTES_IN_MB = 1048576; protected double slope; protected double bufferMultiplier; protected double intercept; protected int modulo; + + RBMSizeEstimator(int modulo) { this.modulo = modulo; double[] memSizeValues = calculateMemoryCoefficients(modulo); @@ -95,6 +97,14 @@ public int getNumEntriesFromSizeInMB(double sizeInMB) { return (int) Math.pow(sizeInMB / (bufferMultiplier * Math.pow(10, intercept)), 1 / slope); } + protected static long convertMBToBytes(double valMB) { + return (long) (valMB * BYTES_IN_MB); + } + + protected static double convertBytesToMB(long valBytes) { + return (double) valBytes / BYTES_IN_MB; + } + public double getSlope() { return slope; } @@ -104,5 +114,4 @@ public double getIntercept() { public double getBufferMultiplier() { return bufferMultiplier; } - } diff --git a/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java index e7e83d05213b1..8f14395ceda9d 100644 --- a/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java @@ -242,7 +242,7 @@ public void testMemoryCapValueInitialization() { double[] expectedMultipliers = new double[] { 1.35, 1.35, 1.6, 1.6, 1.6, 1.6 }; double[] expectedSlopes = new double[] { 0.69, 0.69, 0.75, 0.75, 0.88, 0.88 }; double[] expectedIntercepts = new double[] { -3, -3, -3.5, -3.5, -4.5, -4.5 }; - long memSizeCapInBytes = (long) 100.0 * HybridIntKeyLookupStore.BYTES_IN_MB; + long memSizeCapInBytes = (long) 100.0 * RBMSizeEstimator.BYTES_IN_MB; double delta = 0.01; for (int i = 0; i < logModulos.length; i++) { int modulo = 0; @@ -301,7 +301,7 @@ public void testMemoryCapBlocksAdd() throws Exception { int modulo = (int) testModulos[i]; // test where max number of entries should be 3000 - long memSizeCapInBytes = (long) (HybridIntKeyLookupStore.HASHSET_MEM_SLOPE * 3000 * HybridIntKeyLookupStore.BYTES_IN_MB); + long memSizeCapInBytes = (long) (HybridIntKeyLookupStore.HASHSET_MEM_SLOPE * 3000 * RBMSizeEstimator.BYTES_IN_MB); HybridIntKeyLookupStore kls = new HybridIntKeyLookupStore(modulo, memSizeCapInBytes); for (int j = 0; j < 3500; j++) { kls.add(j); diff --git a/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java new file mode 100644 index 0000000000000..a7bd7f4cf11ff --- /dev/null +++ b/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java @@ -0,0 +1,229 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.indices; + +import org.opensearch.common.Randomness; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.ArrayList; +import java.util.Random; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadPoolExecutor; + +public class RBMIntKeyLookupStoreTests extends OpenSearchTestCase { + // Tests mostly based on HybridIntKeyStoreTests.java + public void testInit() { + long memCap = 100 * RBMSizeEstimator.BYTES_IN_MB; + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memCap); + assertEquals(0, kls.getSize()); + assertEquals(memCap, kls.getMemorySizeCapInBytes()); + } + public void testTransformationLogic() throws Exception { + int modulo = (int) Math.pow(2, 29); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, 0L); + int offset = 3; + for (int i = 0; i < 4; i++) { // after this we run into max value, but thats not a flaw with the class design + int posValue = i * modulo + offset; + kls.add(posValue); + int negValue = -(i * modulo + offset); + kls.add(negValue); + } + assertEquals(2, kls.getSize()); + int[] testVals = new int[] { 0, 1, -1, -23495, 23058, modulo, -modulo, Integer.MAX_VALUE, Integer.MIN_VALUE }; + for (int value : testVals) { + assertTrue(kls.getInternalRepresentation(value) < modulo); + assertTrue(kls.getInternalRepresentation(value) > -modulo); + } + } + + public void testContainsAndForceRemove() throws Exception { + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (int i = 0; i < 2000; i++) { + kls.add(i); + assertTrue(kls.contains(i)); + } + assertFalse(kls.canHaveFalseNegatives()); + for (int i = 1900; i < 2000; i++) { + kls.forceRemove(i); + assertFalse(kls.contains(i)); + } + assertEquals(1900, kls.getSize()); + } + + public void testAddingStatsGetters() throws Exception { + int modulo = (int) Math.pow(2, 15); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, 0L); + kls.add(15); + kls.add(-15); + assertEquals(2, kls.getNumAddAttempts()); + assertEquals(0, kls.getNumCollisions()); + + int offset = 1; + for (int i = 0; i < 10; i++) { + kls.add(i * modulo + offset); + } + assertEquals(12, kls.getNumAddAttempts()); + assertEquals(9, kls.getNumCollisions()); + + } + + public void testRegenerateStore() throws Exception { + int numToAdd = 10000000; + Random rand = Randomness.get(); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (int i = 0; i < numToAdd; i++) { + kls.add(i); + } + assertEquals(numToAdd, kls.getSize()); + int[] newVals = new int[1000]; // margin accounts for collisions + for (int j = 0; j < newVals.length; j++) { + newVals[j] = rand.nextInt(); + } + kls.regenerateStore(newVals); + System.out.println("size " + kls.getSize()); + assertTrue(Math.abs(kls.getSize() - newVals.length) < 3); // inexact due to collisions + + // test clear() + kls.clear(); + assertEquals(0, kls.getSize()); + } + + public void testAddingDuplicates() throws Exception { + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + int numToAdd = 4820411; + for (int i = 0; i < numToAdd; i++) { + kls.add(i); + kls.add(i); + } + for (int j = 0; j < 1000; j++) { + kls.add(577); + } + assertEquals(numToAdd, kls.getSize()); + } + + public void testMemoryCapValueInitialization() { + double[] logModulos = new double[] { 0.0, 31.2, 30, 29, 28, 13 }; // these will NOT decrement by 1 + double[] expectedMultipliers = new double[] { 1.35, 1.35, 1.35, 1.6, 1.6, 1.6 }; + double[] expectedSlopes = new double[] { 0.69, 0.69, 0.69, 0.75, 0.75, 0.88 }; + double[] expectedIntercepts = new double[] { -3, -3, -3, -3.5, -3.5, -4.5 }; + long memSizeCapInBytes = (long) 100.0 * RBMSizeEstimator.BYTES_IN_MB; + double delta = 0.01; + for (int i = 0; i < logModulos.length; i++) { + int modulo = 0; + if (logModulos[i] != 0) { + modulo = (int) Math.pow(2, logModulos[i]); + } + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, memSizeCapInBytes); + assertEquals(kls.memSizeCapInBytes, kls.getMemorySizeCapInBytes(), 1.0); + assertEquals(expectedMultipliers[i], kls.getRBMMemBufferMultiplier(), delta); + assertEquals(expectedSlopes[i], kls.getRBMMemSlope(), delta); + assertEquals(expectedIntercepts[i], kls.getRBMMemIntercept(), delta); + } + } + + public void testMemoryCapBlocksAdd() throws Exception { + int modulo = (int) Math.pow(2, 29); + for (int maxEntries: new int[]{2342000, 1000, 100000}) { + long memSizeCapInBytes = HybridIntKeyLookupStore.getRBMMemSizeWithModuloInBytes(maxEntries, modulo); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, memSizeCapInBytes); + for (int j = 0; j < maxEntries + 1000; j++) { + kls.add(j); + } + assertTrue(Math.abs(maxEntries - kls.getSize()) < 5); // exact cap varies a small amount bc of floating point + } + } + + public void testConcurrency() throws Exception { + Random rand = Randomness.get(); + int modulo = (int) Math.pow(2, 29); + long memCap = 100 * RBMSizeEstimator.BYTES_IN_MB; + for (int j = 0; j < 5; j++) { // test with different numbers of threads + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, memCap); + int numThreads = rand.nextInt(50) + 1; + ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads); + // In this test we want to add the first 200K numbers and check they're all correctly there. + // We do some duplicates too to ensure those aren't incorrectly added. + int amountToAdd = 200000; + ArrayList> wasAdded = new ArrayList<>(amountToAdd); + ArrayList> duplicatesWasAdded = new ArrayList<>(); + for (int i = 0; i < amountToAdd; i++) { + wasAdded.add(null); + } + for (int i = 0; i < amountToAdd; i++) { + final int val = i; + Future fut = executor.submit(() -> { + boolean didAdd; + try { + didAdd = kls.add(val); + } catch (Exception e) { + throw new RuntimeException(e); + } + return didAdd; + }); + wasAdded.set(val, fut); + if (val % 1000 == 0) { + // do a duplicate add + Future duplicateFut = executor.submit(() -> { + boolean didAdd; + try { + didAdd = kls.add(val); + } catch (Exception e) { + throw new RuntimeException(e); + } + return didAdd; + }); + duplicatesWasAdded.add(duplicateFut); + } + } + int originalAdds = 0; + int duplicateAdds = 0; + for (Future fut : wasAdded) { + if (fut.get()) { + originalAdds++; + } + } + for (Future duplicateFut : duplicatesWasAdded) { + if (duplicateFut.get()) { + duplicateAdds++; + } + } + for (int i = 0; i < amountToAdd; i++) { + assertTrue(kls.contains(i)); + } + assertEquals(amountToAdd, originalAdds + duplicateAdds); + assertEquals(amountToAdd, kls.getSize()); + assertEquals(amountToAdd / 1000, kls.getNumCollisions()); + executor.shutdown(); + } + } +} diff --git a/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java index 0fb63f79ed9b2..9bec52aa51f28 100644 --- a/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java @@ -39,7 +39,7 @@ public class RemovableHybridIntKeyLookupStoreTests extends OpenSearchTestCase { public void testRemoveNoCollisions() throws Exception { - long memCap = 100L * HybridIntKeyLookupStore.BYTES_IN_MB; + long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(0, memCap); // there should be no collisions for sequential positive numbers up to modulo assertTrue(rkls.supportsRemoval()); @@ -91,7 +91,7 @@ public void testRemoveNoCollisions() throws Exception { public void testRemoveWithCollisions() throws Exception { int modulo = (int) Math.pow(2, 26); - long memCap = 100L * HybridIntKeyLookupStore.BYTES_IN_MB; + long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(modulo, memCap); for (int i = 0; i < 10; i++) { rkls.add(i); From 2d19d4f59233dd3762bc6afa68a7bc1ef68fdd95 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 25 Sep 2023 16:12:57 -0700 Subject: [PATCH 08/17] Added and tested new implementing class which uses only an RBM + keeps track of collided values, see comment for justification for code reuse --- .../indices/HybridIntKeyLookupStore.java | 5 +- .../opensearch/indices/RBMSizeEstimator.java | 7 +- .../RemovableRBMIntKeyLookupStore.java | 117 +++++++ .../indices/HybridIntKeyLookupStoreTests.java | 2 +- .../indices/RBMIntKeyLookupStoreTests.java | 313 +++++++++++------- 5 files changed, 327 insertions(+), 117 deletions(-) create mode 100644 server/src/main/java/org/opensearch/indices/RemovableRBMIntKeyLookupStore.java diff --git a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java index 685f04e1b8cbe..344101ebf3ac9 100644 --- a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java @@ -47,7 +47,6 @@ public class HybridIntKeyLookupStore implements IntKeyLookupStore { public static final int HASHSET_TO_INTARR_THRESHOLD = 5000; public static final int INTARR_SIZE = 100000; public static final int INTARR_TO_RBM_THRESHOLD = INTARR_SIZE; - public static final double HASHSET_MEM_SLOPE = 6.46 * Math.pow(10, -6); // used to calculate memory usage /** * Used to keep track of which structure is being used to store values. @@ -414,14 +413,14 @@ protected int calculateMaxNumEntries() { } if (memSizeCapInBytes < intArrMemSize) { // max number of elements will be when we have a hash set - return Math.min((int) (RBMSizeEstimator.convertBytesToMB(memSizeCapInBytes) / HASHSET_MEM_SLOPE), HASHSET_TO_INTARR_THRESHOLD - 1); + return Math.min((int) (RBMSizeEstimator.convertBytesToMB(memSizeCapInBytes) / RBMSizeEstimator.HASHSET_MEM_SLOPE), HASHSET_TO_INTARR_THRESHOLD - 1); } // max number of elements will be when we have an intArr return INTARR_TO_RBM_THRESHOLD - 1; } protected static long getHashsetMemSizeInBytes(int numEntries) { - return RBMSizeEstimator.convertMBToBytes(HASHSET_MEM_SLOPE * numEntries); + return RBMSizeEstimator.getHashsetMemSizeInBytes(numEntries); } protected static long getIntArrMemSizeInBytes() { diff --git a/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java b/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java index c761cb310a7dc..0ccc1caa699cc 100644 --- a/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java +++ b/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java @@ -33,12 +33,13 @@ package org.opensearch.indices; /** - * A class used to estimate roaring bitmap memory sizes. + * A class used to estimate roaring bitmap memory sizes (and hash set sizes). * An instance is made with a particular modulo to avoid recomputing * values. */ public class RBMSizeEstimator { public static final int BYTES_IN_MB = 1048576; + public static final double HASHSET_MEM_SLOPE = 6.46 * Math.pow(10, -6); protected double slope; protected double bufferMultiplier; protected double intercept; @@ -105,6 +106,10 @@ protected static double convertBytesToMB(long valBytes) { return (double) valBytes / BYTES_IN_MB; } + protected static long getHashsetMemSizeInBytes(int numEntries) { + return convertMBToBytes(HASHSET_MEM_SLOPE * numEntries); + } + public double getSlope() { return slope; } diff --git a/server/src/main/java/org/opensearch/indices/RemovableRBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RemovableRBMIntKeyLookupStore.java new file mode 100644 index 0000000000000..70a11538acb93 --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/RemovableRBMIntKeyLookupStore.java @@ -0,0 +1,117 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.indices; + +import org.roaringbitmap.RoaringBitmap; +import java.util.HashSet; + +public class RemovableRBMIntKeyLookupStore extends RBMIntKeyLookupStore implements IntKeyLookupStore { + // The code for this class is almost the same as RemovableHybridIntKeyLookupStore, + // just with different superclasses. + // I considered changing the separate Removable classes into a CollisionHandler object + // which could be reused as a field of the KeyLookupStore objects, but since we will ultimately + // only use one of these four possible classes after doing performance testing, + // I don't think it's worth it to make the logic more complex just to avoid reusing code that might be deleted. + + private HashSet collidedInts; + private int numRemovalAttempts; + private int numSuccessfulRemovals; + + RemovableRBMIntKeyLookupStore(int modulo, long memSizeCapInBytes) { + super(modulo, memSizeCapInBytes); + collidedInts = new HashSet<>(); + numRemovalAttempts = 0; + numSuccessfulRemovals = 0; + } + + @Override + protected void handleCollisions(int transformedValue) { + numCollisions++; + collidedInts.add(transformedValue); + } + + @Override + public boolean supportsRemoval() { + return true; + } + + // Check if the value to remove has had a collision, and if not, remove it + @Override + public boolean remove(int value) throws Exception { + int transformedValue = transform(value); + readLock.lock(); + try { + if (!contains(value)) { + return false; + } + numRemovalAttempts++; + if (collidedInts.contains(transformedValue)) { + return false; + } + } finally { + readLock.unlock(); + } + writeLock.lock(); + try { + rbm.remove(transformedValue); + size--; + numSuccessfulRemovals++; + return true; + } finally { + writeLock.unlock(); + } + } + + @Override + public long getMemorySizeInBytes() { + return super.getMemorySizeInBytes() + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size()); + } + + @Override + public void regenerateStore(int[] newValues) throws Exception { + collidedInts = new HashSet<>(); + super.regenerateStore(newValues); + } + + public int getNumRemovalAttempts() { + return numRemovalAttempts; + } + + public int getNumSuccessfulRemovals() { + return numSuccessfulRemovals; + } + + public boolean valueHasHadCollision(int value) { + return collidedInts.contains(transform(value)); + } +} diff --git a/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java index 8f14395ceda9d..5601086734721 100644 --- a/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java @@ -301,7 +301,7 @@ public void testMemoryCapBlocksAdd() throws Exception { int modulo = (int) testModulos[i]; // test where max number of entries should be 3000 - long memSizeCapInBytes = (long) (HybridIntKeyLookupStore.HASHSET_MEM_SLOPE * 3000 * RBMSizeEstimator.BYTES_IN_MB); + long memSizeCapInBytes = (long) (RBMSizeEstimator.HASHSET_MEM_SLOPE * 3000 * RBMSizeEstimator.BYTES_IN_MB); HybridIntKeyLookupStore kls = new HybridIntKeyLookupStore(modulo, memSizeCapInBytes); for (int j = 0; j < 3500; j++) { kls.add(j); diff --git a/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java index a7bd7f4cf11ff..a7459f5c94864 100644 --- a/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java @@ -44,91 +44,108 @@ public class RBMIntKeyLookupStoreTests extends OpenSearchTestCase { // Tests mostly based on HybridIntKeyStoreTests.java public void testInit() { long memCap = 100 * RBMSizeEstimator.BYTES_IN_MB; - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memCap); - assertEquals(0, kls.getSize()); - assertEquals(memCap, kls.getMemorySizeCapInBytes()); + RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memCap); + RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), memCap); + for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + assertEquals(0, kls.getSize()); + assertEquals(memCap, kls.getMemorySizeCapInBytes()); + } } public void testTransformationLogic() throws Exception { int modulo = (int) Math.pow(2, 29); - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, 0L); - int offset = 3; - for (int i = 0; i < 4; i++) { // after this we run into max value, but thats not a flaw with the class design - int posValue = i * modulo + offset; - kls.add(posValue); - int negValue = -(i * modulo + offset); - kls.add(negValue); - } - assertEquals(2, kls.getSize()); - int[] testVals = new int[] { 0, 1, -1, -23495, 23058, modulo, -modulo, Integer.MAX_VALUE, Integer.MIN_VALUE }; - for (int value : testVals) { - assertTrue(kls.getInternalRepresentation(value) < modulo); - assertTrue(kls.getInternalRepresentation(value) > -modulo); + RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + int offset = 3; + for (int i = 0; i < 4; i++) { // after this we run into max value, but thats not a flaw with the class design + int posValue = i * modulo + offset; + kls.add(posValue); + int negValue = -(i * modulo + offset); + kls.add(negValue); + } + assertEquals(2, kls.getSize()); + int[] testVals = new int[]{0, 1, -1, -23495, 23058, modulo, -modulo, Integer.MAX_VALUE, Integer.MIN_VALUE}; + for (int value : testVals) { + assertTrue(kls.getInternalRepresentation(value) < modulo); + assertTrue(kls.getInternalRepresentation(value) > -modulo); + } } } public void testContainsAndForceRemove() throws Exception { - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (int i = 0; i < 2000; i++) { - kls.add(i); - assertTrue(kls.contains(i)); - } - assertFalse(kls.canHaveFalseNegatives()); - for (int i = 1900; i < 2000; i++) { - kls.forceRemove(i); - assertFalse(kls.contains(i)); + RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + for (int i = 0; i < 2000; i++) { + kls.add(i); + assertTrue(kls.contains(i)); + } + assertFalse(kls.canHaveFalseNegatives()); + for (int i = 1900; i < 2000; i++) { + kls.forceRemove(i); + assertFalse(kls.contains(i)); + } + assertEquals(1900, kls.getSize()); } - assertEquals(1900, kls.getSize()); } public void testAddingStatsGetters() throws Exception { int modulo = (int) Math.pow(2, 15); - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, 0L); - kls.add(15); - kls.add(-15); - assertEquals(2, kls.getNumAddAttempts()); - assertEquals(0, kls.getNumCollisions()); + RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore(modulo, 0L); + RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore(modulo, 0L); + for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + kls.add(15); + kls.add(-15); + assertEquals(2, kls.getNumAddAttempts()); + assertEquals(0, kls.getNumCollisions()); - int offset = 1; - for (int i = 0; i < 10; i++) { - kls.add(i * modulo + offset); + int offset = 1; + for (int i = 0; i < 10; i++) { + kls.add(i * modulo + offset); + } + assertEquals(12, kls.getNumAddAttempts()); + assertEquals(9, kls.getNumCollisions()); } - assertEquals(12, kls.getNumAddAttempts()); - assertEquals(9, kls.getNumCollisions()); } public void testRegenerateStore() throws Exception { int numToAdd = 10000000; Random rand = Randomness.get(); - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (int i = 0; i < numToAdd; i++) { - kls.add(i); - } - assertEquals(numToAdd, kls.getSize()); - int[] newVals = new int[1000]; // margin accounts for collisions - for (int j = 0; j < newVals.length; j++) { - newVals[j] = rand.nextInt(); - } - kls.regenerateStore(newVals); - System.out.println("size " + kls.getSize()); - assertTrue(Math.abs(kls.getSize() - newVals.length) < 3); // inexact due to collisions + RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + for (int i = 0; i < numToAdd; i++) { + kls.add(i); + } + assertEquals(numToAdd, kls.getSize()); + int[] newVals = new int[1000]; // margin accounts for collisions + for (int j = 0; j < newVals.length; j++) { + newVals[j] = rand.nextInt(); + } + kls.regenerateStore(newVals); + assertTrue(Math.abs(kls.getSize() - newVals.length) < 3); // inexact due to collisions - // test clear() - kls.clear(); - assertEquals(0, kls.getSize()); + // test clear() + kls.clear(); + assertEquals(0, kls.getSize()); + } } public void testAddingDuplicates() throws Exception { - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - int numToAdd = 4820411; - for (int i = 0; i < numToAdd; i++) { - kls.add(i); - kls.add(i); - } - for (int j = 0; j < 1000; j++) { - kls.add(577); + RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + int numToAdd = 4820411; + for (int i = 0; i < numToAdd; i++) { + kls.add(i); + kls.add(i); + } + for (int j = 0; j < 1000; j++) { + kls.add(577); + } + assertEquals(numToAdd, kls.getSize()); } - assertEquals(numToAdd, kls.getSize()); } public void testMemoryCapValueInitialization() { @@ -143,11 +160,14 @@ public void testMemoryCapValueInitialization() { if (logModulos[i] != 0) { modulo = (int) Math.pow(2, logModulos[i]); } - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, memSizeCapInBytes); - assertEquals(kls.memSizeCapInBytes, kls.getMemorySizeCapInBytes(), 1.0); - assertEquals(expectedMultipliers[i], kls.getRBMMemBufferMultiplier(), delta); - assertEquals(expectedSlopes[i], kls.getRBMMemSlope(), delta); - assertEquals(expectedIntercepts[i], kls.getRBMMemIntercept(), delta); + RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore(modulo, memSizeCapInBytes); + RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore(modulo, memSizeCapInBytes); + for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + assertEquals(kls.memSizeCapInBytes, kls.getMemorySizeCapInBytes(), 1.0); + assertEquals(expectedMultipliers[i], kls.getRBMMemBufferMultiplier(), delta); + assertEquals(expectedSlopes[i], kls.getRBMMemSlope(), delta); + assertEquals(expectedIntercepts[i], kls.getRBMMemIntercept(), delta); + } } } @@ -155,11 +175,14 @@ public void testMemoryCapBlocksAdd() throws Exception { int modulo = (int) Math.pow(2, 29); for (int maxEntries: new int[]{2342000, 1000, 100000}) { long memSizeCapInBytes = HybridIntKeyLookupStore.getRBMMemSizeWithModuloInBytes(maxEntries, modulo); - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, memSizeCapInBytes); - for (int j = 0; j < maxEntries + 1000; j++) { - kls.add(j); + RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memSizeCapInBytes); + RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), memSizeCapInBytes); + for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + for (int j = 0; j < maxEntries + 1000; j++) { + kls.add(j); + } + assertTrue(Math.abs(maxEntries - kls.getSize()) < 5); // exact cap varies a small amount bc of floating point } - assertTrue(Math.abs(maxEntries - kls.getSize()) < 5); // exact cap varies a small amount bc of floating point } } @@ -168,32 +191,22 @@ public void testConcurrency() throws Exception { int modulo = (int) Math.pow(2, 29); long memCap = 100 * RBMSizeEstimator.BYTES_IN_MB; for (int j = 0; j < 5; j++) { // test with different numbers of threads - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, memCap); - int numThreads = rand.nextInt(50) + 1; - ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads); - // In this test we want to add the first 200K numbers and check they're all correctly there. - // We do some duplicates too to ensure those aren't incorrectly added. - int amountToAdd = 200000; - ArrayList> wasAdded = new ArrayList<>(amountToAdd); - ArrayList> duplicatesWasAdded = new ArrayList<>(); - for (int i = 0; i < amountToAdd; i++) { - wasAdded.add(null); - } - for (int i = 0; i < amountToAdd; i++) { - final int val = i; - Future fut = executor.submit(() -> { - boolean didAdd; - try { - didAdd = kls.add(val); - } catch (Exception e) { - throw new RuntimeException(e); - } - return didAdd; - }); - wasAdded.set(val, fut); - if (val % 1000 == 0) { - // do a duplicate add - Future duplicateFut = executor.submit(() -> { + RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + int numThreads = rand.nextInt(50) + 1; + ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads); + // In this test we want to add the first 200K numbers and check they're all correctly there. + // We do some duplicates too to ensure those aren't incorrectly added. + int amountToAdd = 200000; + ArrayList> wasAdded = new ArrayList<>(amountToAdd); + ArrayList> duplicatesWasAdded = new ArrayList<>(); + for (int i = 0; i < amountToAdd; i++) { + wasAdded.add(null); + } + for (int i = 0; i < amountToAdd; i++) { + final int val = i; + Future fut = executor.submit(() -> { boolean didAdd; try { didAdd = kls.add(val); @@ -202,28 +215,104 @@ public void testConcurrency() throws Exception { } return didAdd; }); - duplicatesWasAdded.add(duplicateFut); + wasAdded.set(val, fut); + if (val % 1000 == 0) { + // do a duplicate add + Future duplicateFut = executor.submit(() -> { + boolean didAdd; + try { + didAdd = kls.add(val); + } catch (Exception e) { + throw new RuntimeException(e); + } + return didAdd; + }); + duplicatesWasAdded.add(duplicateFut); + } } - } - int originalAdds = 0; - int duplicateAdds = 0; - for (Future fut : wasAdded) { - if (fut.get()) { - originalAdds++; + int originalAdds = 0; + int duplicateAdds = 0; + for (Future fut : wasAdded) { + if (fut.get()) { + originalAdds++; + } } - } - for (Future duplicateFut : duplicatesWasAdded) { - if (duplicateFut.get()) { - duplicateAdds++; + for (Future duplicateFut : duplicatesWasAdded) { + if (duplicateFut.get()) { + duplicateAdds++; + } + } + for (int i = 0; i < amountToAdd; i++) { + assertTrue(kls.contains(i)); } + assertEquals(amountToAdd, originalAdds + duplicateAdds); + assertEquals(amountToAdd, kls.getSize()); + assertEquals(amountToAdd / 1000, kls.getNumCollisions()); + executor.shutdown(); } - for (int i = 0; i < amountToAdd; i++) { - assertTrue(kls.contains(i)); + } + } + + public void testRemoveNoCollisions() throws Exception { + // only for RemovableRBMIntKeyLookupStore + long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; + int numToAdd = 195000; + RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore(0, memCap); + // there should be no collisions for sequential positive numbers up to modulo + assertTrue(rkls.supportsRemoval()); + for (int i = 0; i < numToAdd; i++) { + rkls.add(i); + } + for (int i = 0; i < 1000; i++) { + assertTrue(rkls.remove(i)); + assertFalse(rkls.contains(i)); + assertFalse(rkls.valueHasHadCollision(i)); + } + assertEquals(numToAdd - 1000, rkls.getSize()); + } + + public void testRemoveWithCollisions() throws Exception { + int modulo = (int) Math.pow(2, 26); + long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; + RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore(modulo, memCap); + for (int i = 0; i < 10; i++) { + rkls.add(i); + if (i % 2 == 1) { + rkls.add(-i); + assertFalse(rkls.valueHasHadCollision(i)); + rkls.add(i + modulo); + assertTrue(rkls.valueHasHadCollision(i)); + } else { + assertFalse(rkls.valueHasHadCollision(i)); } - assertEquals(amountToAdd, originalAdds + duplicateAdds); - assertEquals(amountToAdd, kls.getSize()); - assertEquals(amountToAdd / 1000, kls.getNumCollisions()); - executor.shutdown(); } + assertEquals(15, rkls.getSize()); + for (int i = 0; i < 10; i++) { + boolean didRemove = rkls.remove(i); + if (i % 2 == 1) { + // we expect a collision with i + modulo, so we can't remove + assertFalse(didRemove); + assertTrue(rkls.contains(i)); + // but we should be able to remove -i + boolean didRemoveNegative = rkls.remove(-i); + assertTrue(didRemoveNegative); + assertFalse(rkls.contains(-i)); + } else { + // we expect no collision + assertTrue(didRemove); + assertFalse(rkls.contains(i)); + assertFalse(rkls.valueHasHadCollision(i)); + } + } + assertEquals(5, rkls.getSize()); + int offset = 12; + rkls.add(offset); + for (int j = 1; j < 5; j++) { + rkls.add(offset + j * modulo); + } + assertEquals(6, rkls.getSize()); + assertFalse(rkls.remove(offset + modulo)); + assertTrue(rkls.valueHasHadCollision(offset + 15 * modulo)); + assertTrue(rkls.contains(offset + 17 * modulo)); } } From 75000f96d1cfb6e58ee98fbbabf4dfde9d1c54c1 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Tue, 26 Sep 2023 11:12:48 -0700 Subject: [PATCH 09/17] Made interface generic rather than int only --- .gitignore | 3 +- .../indices/HybridIntKeyLookupStore.java | 41 +++++++++++++------ ...eyLookupStore.java => KeyLookupStore.java} | 16 ++++---- .../indices/RBMIntKeyLookupStore.java | 41 +++++++++++++------ .../RemovableHybridIntKeyLookupStore.java | 14 +++++-- .../RemovableRBMIntKeyLookupStore.java | 14 +++++-- .../indices/HybridIntKeyLookupStoreTests.java | 20 ++++++++- .../indices/RBMIntKeyLookupStoreTests.java | 20 ++++++++- 8 files changed, 126 insertions(+), 43 deletions(-) rename server/src/main/java/org/opensearch/indices/{IntKeyLookupStore.java => KeyLookupStore.java} (93%) diff --git a/.gitignore b/.gitignore index 7514d55cc3c9a..291a63cdeef92 100644 --- a/.gitignore +++ b/.gitignore @@ -64,4 +64,5 @@ testfixtures_shared/ .ci/jobs/ # build files generated -doc-tools/missing-doclet/bin/ \ No newline at end of file +doc-tools/missing-doclet/bin/ +server/src/main/java/org/opensearch/indices/KLSPerformanceTest.java diff --git a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java index 344101ebf3ac9..19d05722ddaba 100644 --- a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java @@ -43,7 +43,7 @@ * A store which dynamically switches its internal data structure from hash set to sorted int array * to roaring bitmap. */ -public class HybridIntKeyLookupStore implements IntKeyLookupStore { +public class HybridIntKeyLookupStore implements KeyLookupStore { public static final int HASHSET_TO_INTARR_THRESHOLD = 5000; public static final int INTARR_SIZE = 100000; public static final int INTARR_TO_RBM_THRESHOLD = INTARR_SIZE; @@ -222,7 +222,10 @@ protected void handleCollisions(int transformedValue) { } @Override - public boolean add(int value) throws IllegalStateException { + public boolean add(Integer value) throws IllegalStateException { + if (value == null) { + return false; + } writeLock.lock(); try { if (size == maxNumEntries) { @@ -302,18 +305,24 @@ protected boolean arrayCorrectlySorted() { } @Override - public boolean contains(int value) throws IllegalStateException { + public boolean contains(Integer value) throws IllegalStateException { + if (value == null) { + return false; + } int transformedValue = transform(value); return containsTransformed(transformedValue); } @Override - public int getInternalRepresentation(int value) { - return transform(value); + public Integer getInternalRepresentation(Integer value) { + if (value == null) { + return 0; + } + return Integer.valueOf(transform(value)); } @Override - public boolean remove(int value) throws IllegalStateException { + public boolean remove(Integer value) throws IllegalStateException { return false; } @@ -340,7 +349,10 @@ protected void removeHelperFunction(int transformedValue) throws IllegalStateExc } @Override - public void forceRemove(int value) throws IllegalStateException { + public void forceRemove(Integer value) throws IllegalStateException { + if (value == null) { + return; + } writeLock.lock(); guaranteesNoFalseNegatives = false; try { @@ -394,7 +406,10 @@ public String getCurrentStructure() throws IllegalStateException { } @Override - public boolean isCollision(int value1, int value2) { + public boolean isCollision(Integer value1, Integer value2) { + if (value1 == null || value2 == null) { + return false; + } return transform(value1) == transform(value2); } @@ -477,7 +492,7 @@ public boolean isAtCapacity() { } @Override - public void regenerateStore(int[] newValues) throws IllegalStateException { + public void regenerateStore(Integer[] newValues) throws IllegalStateException { intArr = null; rbm = null; size = 0; @@ -487,13 +502,15 @@ public void regenerateStore(int[] newValues) throws IllegalStateException { currentStructure = StructureTypes.HASHSET; hashset = new HashSet<>(); - for (int value : newValues) { - add(value); + for (int i = 0; i < newValues.length; i++) { + if (newValues[i] != null) { + add(newValues[i]); + } } } @Override public void clear() throws Exception { - regenerateStore(new int[]{}); + regenerateStore(new Integer[]{}); } } diff --git a/server/src/main/java/org/opensearch/indices/IntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/KeyLookupStore.java similarity index 93% rename from server/src/main/java/org/opensearch/indices/IntKeyLookupStore.java rename to server/src/main/java/org/opensearch/indices/KeyLookupStore.java index 349529aaf1d69..b77f4d53756c5 100644 --- a/server/src/main/java/org/opensearch/indices/IntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/KeyLookupStore.java @@ -38,7 +38,7 @@ * int values. The internal representations may have collisions. Example transformations include a modulo * or -abs(value), or some combination. */ -public interface IntKeyLookupStore { +public interface KeyLookupStore { /** * Transforms the input value into the internal representation for this keystore * and adds it to the internal data structure. @@ -46,7 +46,7 @@ public interface IntKeyLookupStore { * @return true if the value was added, false if it wasn't added because of a * collision or if it was already present. */ - boolean add(int value) throws Exception; + boolean add(T value) throws Exception; /** * Checks if the transformation of the value is in the keystore. @@ -54,7 +54,7 @@ public interface IntKeyLookupStore { * @return true if the value was found, false otherwise. Due to collisions, false positives are * possible, but there should be no false negatives unless forceRemove() is called. */ - boolean contains(int value) throws Exception; + boolean contains(T value) throws Exception; /** * Returns the transformed version of the input value, that would be used to stored it in the keystore. @@ -62,7 +62,7 @@ public interface IntKeyLookupStore { * @param value The value to transform. * @return The transformed value. */ - int getInternalRepresentation(int value); + T getInternalRepresentation(T value); /** * Attempts to safely remove a value from the internal structure, maintaining the property that contains(value) @@ -71,7 +71,7 @@ public interface IntKeyLookupStore { * @param value The value to attempt to remove. * @return true if the value was removed, false if it wasn't. */ - boolean remove(int value) throws Exception; + boolean remove(T value) throws Exception; /** * Check if the implementing class supports safe removals. If it doesn't, remove() will always return false. @@ -84,7 +84,7 @@ public interface IntKeyLookupStore { * contains() to return false negatives for future values. * @param value The value to forcibly remove. */ - void forceRemove(int value) throws Exception; + void forceRemove(T value) throws Exception; /** * Check if the object currently guarantees having no false negatives when running contains(). @@ -123,7 +123,7 @@ public interface IntKeyLookupStore { * @param value2 The second value to compare. * @return true if the transformations are equal, false otherwise. */ - boolean isCollision(int value1, int value2); + boolean isCollision(T value1, T value2); /** * Returns an estimate of the store's memory usage. @@ -147,7 +147,7 @@ public interface IntKeyLookupStore { * Also resets all stats related to adding. * @param newValues The keys that should be in the reset structure. */ - void regenerateStore(int[] newValues) throws Exception; + void regenerateStore(T[] newValues) throws Exception; /** * Deletes all keys and resets all stats related to adding. diff --git a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java index def2e162aef39..76dbe9165e95f 100644 --- a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java @@ -37,7 +37,7 @@ import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantReadWriteLock; -public class RBMIntKeyLookupStore implements IntKeyLookupStore { +public class RBMIntKeyLookupStore implements KeyLookupStore { // This class shares a lot of the same fields with HybridIntKeyLookupStore, but basically none of the logic // besides getters, so I decided against making it the superclass to HybridIntKeyLookupStore protected final int modulo; @@ -82,7 +82,10 @@ protected void handleCollisions(int transformedValue) { } @Override - public boolean add(int value) throws Exception { + public boolean add(Integer value) throws Exception { + if (value == null) { + return false; + } writeLock.lock(); numAddAttempts++; try { @@ -105,7 +108,10 @@ public boolean add(int value) throws Exception { } @Override - public boolean contains(int value) throws Exception { + public boolean contains(Integer value) throws Exception { + if (value == null) { + return false; + } int transformedValue = transform(value); readLock.lock(); try { @@ -116,12 +122,15 @@ public boolean contains(int value) throws Exception { } @Override - public int getInternalRepresentation(int value) { - return transform(value); + public Integer getInternalRepresentation(Integer value) { + if (value == null) { + return 0; + } + return Integer.valueOf(transform(value)); } @Override - public boolean remove(int value) throws Exception { + public boolean remove(Integer value) throws Exception { return false; } @@ -131,7 +140,10 @@ public boolean supportsRemoval() { } @Override - public void forceRemove(int value) throws Exception { + public void forceRemove(Integer value) throws Exception { + if (value == null) { + return; + } writeLock.lock(); guaranteesNoFalseNegatives = false; try { @@ -174,7 +186,10 @@ public String getCurrentStructure() throws Exception { } @Override - public boolean isCollision(int value1, int value2) { + public boolean isCollision(Integer value1, Integer value2) { + if (value1 == null || value2 == null) { + return false; + } return transform(value1) == transform(value2); } @@ -194,20 +209,22 @@ public boolean isAtCapacity() { } @Override - public void regenerateStore(int[] newValues) throws Exception { + public void regenerateStore(Integer[] newValues) throws Exception { rbm.clear(); size = 0; this.numAddAttempts = 0; this.numCollisions = 0; this.guaranteesNoFalseNegatives = true; - for (int newValue : newValues) { - add(newValue); + for (int i = 0; i < newValues.length; i++) { + if (newValues[i] != null) { + add(newValues[i]); + } } } @Override public void clear() throws Exception { - regenerateStore(new int[]{}); + regenerateStore(new Integer[]{}); } public double getRBMMemSlope() { diff --git a/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java index d3456c59c3d13..c1e223eb05730 100644 --- a/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java @@ -37,7 +37,7 @@ /** * A store which supports safe removal of keys by maintaining a hashset of values that have had collisions. */ -public class RemovableHybridIntKeyLookupStore extends HybridIntKeyLookupStore implements IntKeyLookupStore { +public class RemovableHybridIntKeyLookupStore extends HybridIntKeyLookupStore implements KeyLookupStore { private HashSet collidedInts; private int numRemovalAttempts; private int numSuccessfulRemovals; @@ -62,7 +62,10 @@ public boolean supportsRemoval() { // Check if the value to remove has had a collision, and if not, remove it @Override - public boolean remove(int value) throws IllegalStateException { + public boolean remove(Integer value) throws IllegalStateException { + if (value == null) { + return false; + } int transformedValue = transform(value); readLock.lock(); try { @@ -92,7 +95,7 @@ public long getMemorySizeInBytes() { } @Override - public void regenerateStore(int[] newValues) throws IllegalStateException { + public void regenerateStore(Integer[] newValues) throws IllegalStateException { collidedInts = new HashSet<>(); super.regenerateStore(newValues); } @@ -105,7 +108,10 @@ public int getNumSuccessfulRemovals() { return numSuccessfulRemovals; } - public boolean valueHasHadCollision(int value) { + public boolean valueHasHadCollision(Integer value) { + if (value == null) { + return false; + } return collidedInts.contains(transform(value)); } diff --git a/server/src/main/java/org/opensearch/indices/RemovableRBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RemovableRBMIntKeyLookupStore.java index 70a11538acb93..1bbe021962335 100644 --- a/server/src/main/java/org/opensearch/indices/RemovableRBMIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/RemovableRBMIntKeyLookupStore.java @@ -35,7 +35,7 @@ import org.roaringbitmap.RoaringBitmap; import java.util.HashSet; -public class RemovableRBMIntKeyLookupStore extends RBMIntKeyLookupStore implements IntKeyLookupStore { +public class RemovableRBMIntKeyLookupStore extends RBMIntKeyLookupStore implements KeyLookupStore { // The code for this class is almost the same as RemovableHybridIntKeyLookupStore, // just with different superclasses. // I considered changing the separate Removable classes into a CollisionHandler object @@ -67,7 +67,10 @@ public boolean supportsRemoval() { // Check if the value to remove has had a collision, and if not, remove it @Override - public boolean remove(int value) throws Exception { + public boolean remove(Integer value) throws Exception { + if (value == null) { + return false; + } int transformedValue = transform(value); readLock.lock(); try { @@ -98,7 +101,7 @@ public long getMemorySizeInBytes() { } @Override - public void regenerateStore(int[] newValues) throws Exception { + public void regenerateStore(Integer[] newValues) throws Exception { collidedInts = new HashSet<>(); super.regenerateStore(newValues); } @@ -111,7 +114,10 @@ public int getNumSuccessfulRemovals() { return numSuccessfulRemovals; } - public boolean valueHasHadCollision(int value) { + public boolean valueHasHadCollision(Integer value) { + if (value == null) { + return false; + } return collidedInts.contains(transform(value)); } } diff --git a/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java index 5601086734721..8643f0e72c381 100644 --- a/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java @@ -189,7 +189,7 @@ public void testRegenerateStore() throws Exception { for (int i = 0; i < resetNum; i++) { kls.add(i); } - int[] newVals = new int[(int) (resetNum * 1.1)]; // margin accounts for collisions + Integer[] newVals = new Integer[(int) (resetNum * 1.1)]; // margin accounts for collisions for (int j = 0; j < newVals.length; j++) { newVals[j] = rand.nextInt(); } @@ -392,4 +392,22 @@ public void testConcurrency() throws Exception { } } } + + public void testNullInputs() throws Exception { + HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { + assertFalse(kls.add(null)); + assertFalse(kls.contains(null)); + assertEquals(0, (int) kls.getInternalRepresentation(null)); + assertFalse(kls.remove(null)); + kls.forceRemove(null); + assertFalse(kls.canHaveFalseNegatives()); + assertFalse(kls.isCollision(null, null)); + assertEquals(0, kls.getNumAddAttempts()); + Integer[] newVals = new Integer[]{1, 17, -2, null, -4, null}; + kls.regenerateStore(newVals); + assertEquals(4, kls.getSize()); + } + } } diff --git a/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java index a7459f5c94864..5f1830aa528e9 100644 --- a/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java @@ -119,7 +119,7 @@ public void testRegenerateStore() throws Exception { kls.add(i); } assertEquals(numToAdd, kls.getSize()); - int[] newVals = new int[1000]; // margin accounts for collisions + Integer[] newVals = new Integer[1000]; // margin accounts for collisions for (int j = 0; j < newVals.length; j++) { newVals[j] = rand.nextInt(); } @@ -315,4 +315,22 @@ public void testRemoveWithCollisions() throws Exception { assertTrue(rkls.valueHasHadCollision(offset + 15 * modulo)); assertTrue(rkls.contains(offset + 17 * modulo)); } + + public void testNullInputs() throws Exception { + RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + assertFalse(kls.add(null)); + assertFalse(kls.contains(null)); + assertEquals(0, (int) kls.getInternalRepresentation(null)); + assertFalse(kls.remove(null)); + kls.forceRemove(null); + assertFalse(kls.canHaveFalseNegatives()); + assertFalse(kls.isCollision(null, null)); + assertEquals(0, kls.getNumAddAttempts()); + Integer[] newVals = new Integer[]{1, 17, -2, null, -4, null}; + kls.regenerateStore(newVals); + assertEquals(4, kls.getSize()); + } + } } From 970f716df83da3209b25d2535928a9bf81a7218a Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Fri, 29 Sep 2023 15:41:54 -0700 Subject: [PATCH 10/17] Updated + simplified RBM memory size estimator based on new measurements --- .../indices/HybridIntKeyLookupStore.java | 52 ++++--------- .../opensearch/indices/KeyLookupStore.java | 2 + .../indices/RBMIntKeyLookupStore.java | 25 +++---- .../opensearch/indices/RBMSizeEstimator.java | 74 ++++--------------- .../RemovableHybridIntKeyLookupStore.java | 2 +- .../indices/HybridIntKeyLookupStoreTests.java | 30 ++------ .../indices/RBMIntKeyLookupStoreTests.java | 29 +------- 7 files changed, 50 insertions(+), 164 deletions(-) diff --git a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java index 19d05722ddaba..ea2b2dbaeb4f8 100644 --- a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java @@ -55,7 +55,7 @@ protected enum StructureTypes { HASHSET, INTARR, RBM - }; + } protected StructureTypes currentStructure; protected final int modulo; @@ -73,9 +73,6 @@ protected enum StructureTypes { protected final Lock writeLock = lock.writeLock(); - // Used to estimate RBM memory usage - protected RBMSizeEstimator sizeEstimator; - protected int maxNumEntries; protected boolean atCapacity; @@ -88,9 +85,17 @@ public HybridIntKeyLookupStore(int modulo, long memSizeCapInBytes) { this.numCollisions = 0; this.guaranteesNoFalseNegatives = true; this.memSizeCapInBytes = memSizeCapInBytes; // A cap of 0 means no cap - this.sizeEstimator = new RBMSizeEstimator(modulo / 2); // The effective modulo is halved compared to tests because of taking only negative values for the sorted int array this.maxNumEntries = calculateMaxNumEntries(); + //intArr = null; + //rbm = null; + } + + public long memEstimateRBMLib() { // debug only + if (rbm != null) { + return rbm.getLongSizeInBytes(); + } + return 0L; } protected final int customAbs(int value) { @@ -414,17 +419,16 @@ public boolean isCollision(Integer value1, Integer value2) { } protected int calculateMaxNumEntries() { - double maxHashsetMemSize = getHashsetMemSizeInBytes(HASHSET_TO_INTARR_THRESHOLD - 1); + double maxHashsetMemSize = RBMSizeEstimator.getHashsetMemSizeInBytes(HASHSET_TO_INTARR_THRESHOLD - 1); double intArrMemSize = getIntArrMemSizeInBytes(); - double minRBMMemSize = getRBMMemSizeWithModuloInBytes(INTARR_TO_RBM_THRESHOLD, modulo); + double minRBMMemSize = RBMSizeEstimator.getSizeInBytes(INTARR_TO_RBM_THRESHOLD); if (memSizeCapInBytes == 0) { return Integer.MAX_VALUE; } if (memSizeCapInBytes >= minRBMMemSize) { // max number of elements will be when we have an RBM - // coefficients for memory calculations were done in MB, so we convert here - return sizeEstimator.getNumEntriesFromSizeInMB(RBMSizeEstimator.convertBytesToMB(memSizeCapInBytes)); + return Math.max(RBMSizeEstimator.getNumEntriesFromSizeInBytes(memSizeCapInBytes), INTARR_TO_RBM_THRESHOLD); // there's some floating point weirdness, so we need the min to ensure we dont get values slightly below 100k } if (memSizeCapInBytes < intArrMemSize) { // max number of elements will be when we have a hash set @@ -434,33 +438,19 @@ protected int calculateMaxNumEntries() { return INTARR_TO_RBM_THRESHOLD - 1; } - protected static long getHashsetMemSizeInBytes(int numEntries) { - return RBMSizeEstimator.getHashsetMemSizeInBytes(numEntries); - } - protected static long getIntArrMemSizeInBytes() { return (long) (4 * INTARR_SIZE + 24); } - protected long getRBMMemSizeInBytes(int numEntries) { - return RBMSizeEstimator.convertMBToBytes(sizeEstimator.getSizeInMB(numEntries)); - } - - protected static long getRBMMemSizeWithModuloInBytes(int numEntries, int modulo) { - return RBMSizeEstimator.convertMBToBytes(RBMSizeEstimator.getSizeWithModuloInMB(numEntries, modulo / 2)); - } - - - @Override public long getMemorySizeInBytes() { switch (currentStructure) { case HASHSET: - return getHashsetMemSizeInBytes(size); + return RBMSizeEstimator.getHashsetMemSizeInBytes(size); case INTARR: return getIntArrMemSizeInBytes(); case RBM: - return getRBMMemSizeInBytes(size); + return RBMSizeEstimator.getSizeInBytes(size); } return 0; } @@ -470,18 +460,6 @@ public long getMemorySizeCapInBytes() { return memSizeCapInBytes; } - public double getRBMMemSlope() { - return sizeEstimator.getSlope(); - } - - public double getRBMMemBufferMultiplier() { - return sizeEstimator.getBufferMultiplier(); - } - - public double getRBMMemIntercept() { - return sizeEstimator.getIntercept(); - } - public int getMaxNumEntries() { return maxNumEntries; } diff --git a/server/src/main/java/org/opensearch/indices/KeyLookupStore.java b/server/src/main/java/org/opensearch/indices/KeyLookupStore.java index b77f4d53756c5..bad99cfc1ff04 100644 --- a/server/src/main/java/org/opensearch/indices/KeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/KeyLookupStore.java @@ -39,6 +39,8 @@ * or -abs(value), or some combination. */ public interface KeyLookupStore { + + long memEstimateRBMLib(); // debug only /** * Transforms the input value into the internal representation for this keystore * and adds it to the internal data structure. diff --git a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java index 76dbe9165e95f..3dec4197a9c19 100644 --- a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java @@ -50,7 +50,6 @@ public class RBMIntKeyLookupStore implements KeyLookupStore { protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); protected final Lock readLock = lock.readLock(); protected final Lock writeLock = lock.writeLock(); - protected RBMSizeEstimator sizeEstimator; protected int maxNumEntries; protected boolean atCapacity; @@ -61,16 +60,22 @@ public class RBMIntKeyLookupStore implements KeyLookupStore { this.numCollisions = 0; this.guaranteesNoFalseNegatives = true; this.memSizeCapInBytes = memSizeCapInBytes; // A cap of 0 means no cap - this.sizeEstimator = new RBMSizeEstimator(modulo); this.maxNumEntries = calculateMaxNumEntries(); this.rbm = new RoaringBitmap(); } + public long memEstimateRBMLib() { // debug only + if (rbm != null) { + return rbm.getLongSizeInBytes(); + } + return 0L; + } + protected int calculateMaxNumEntries() { if (memSizeCapInBytes == 0) { return Integer.MAX_VALUE; } - return sizeEstimator.getNumEntriesFromSizeInMB(RBMSizeEstimator.convertBytesToMB(memSizeCapInBytes)); + return RBMSizeEstimator.getNumEntriesFromSizeInBytes(memSizeCapInBytes); } protected final int transform(int value) { @@ -195,7 +200,7 @@ public boolean isCollision(Integer value1, Integer value2) { @Override public long getMemorySizeInBytes() { - return RBMSizeEstimator.convertMBToBytes(sizeEstimator.getSizeInMB(size)); + return RBMSizeEstimator.getSizeInBytes(size); } @Override @@ -226,16 +231,4 @@ public void regenerateStore(Integer[] newValues) throws Exception { public void clear() throws Exception { regenerateStore(new Integer[]{}); } - - public double getRBMMemSlope() { - return sizeEstimator.getSlope(); - } - - public double getRBMMemBufferMultiplier() { - return sizeEstimator.getBufferMultiplier(); - } - - public double getRBMMemIntercept() { - return sizeEstimator.getIntercept(); - } } diff --git a/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java b/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java index 0ccc1caa699cc..08c9143f395b1 100644 --- a/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java +++ b/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java @@ -39,63 +39,25 @@ */ public class RBMSizeEstimator { public static final int BYTES_IN_MB = 1048576; - public static final double HASHSET_MEM_SLOPE = 6.46 * Math.pow(10, -6); - protected double slope; - protected double bufferMultiplier; - protected double intercept; - protected int modulo; + public static final double HASHSET_MEM_SLOPE = 6.46 * Math.pow(10, -5); + public static final double slope = 0.62; + public static final double bufferMultiplier = 1.5; + public static final double intercept = 2.9; - RBMSizeEstimator(int modulo) { - this.modulo = modulo; - double[] memSizeValues = calculateMemoryCoefficients(modulo); - this.bufferMultiplier = memSizeValues[0]; - this.slope = memSizeValues[1]; - this.intercept = memSizeValues[2]; - } - public static double[] calculateMemoryCoefficients(int modulo) { - // Sets up values to help estimate RBM size given a modulo - // Returns an array of {bufferMultiplier, slope, intercept} - - double modifiedModulo; - if (modulo == 0) { - modifiedModulo = 32.0; - } else { - modifiedModulo = Math.log(modulo) / Math.log(2); - } - // The effective modulo should be passed in - aka 0.5 * modulo for a hybrid store - double highCutoff = 29.001; // Floating point makes 29 not work - double lowCutoff = 28.0; - double bufferMultiplier = 1.35; - if (modifiedModulo <= highCutoff) { - bufferMultiplier = 1.6; - } + RBMSizeEstimator() {} - double slope; - double intercept; - if (modifiedModulo > highCutoff) { - slope = 0.69; - intercept = -3; - } else if (modifiedModulo >= lowCutoff) { - slope = 0.75; - intercept = -3.5; - } else { - slope = 0.88; - intercept = -4.5; - } - return new double[] { bufferMultiplier, slope, intercept }; - } - public static double getSizeWithModuloInMB(int numEntries, int modulo) { - double[] memCoefs = calculateMemoryCoefficients(modulo); - return Math.pow(numEntries, memCoefs[1]) * Math.pow(10, memCoefs[2]) * memCoefs[0]; + public static long getSizeInBytes(int numEntries) { + return (long) ((long) Math.pow(numEntries, slope) * (long) Math.pow(10, intercept) * bufferMultiplier); } - public double getSizeInMB(int numEntries) { - return Math.pow(numEntries, slope) * Math.pow(10, intercept) * bufferMultiplier; - } + public static int getNumEntriesFromSizeInBytes(long sizeInBytes) { + // This function has some precision issues especially when composed with its inverse: numEntries = getNumEntriesFromSizeInBytes(getSizeInBytes(numEntries)) + // In this case the result can be off by up to a couple percent + // However, this shouldn't really matter as both functions are based on memory estimates with higher errors than a couple percent + // and this composition won't happen outside of tests + return (int) Math.pow(sizeInBytes / (bufferMultiplier * Math.pow(10, intercept)), 1 / slope); - public int getNumEntriesFromSizeInMB(double sizeInMB) { - return (int) Math.pow(sizeInMB / (bufferMultiplier * Math.pow(10, intercept)), 1 / slope); } protected static long convertMBToBytes(double valMB) { @@ -109,14 +71,4 @@ protected static double convertBytesToMB(long valBytes) { protected static long getHashsetMemSizeInBytes(int numEntries) { return convertMBToBytes(HASHSET_MEM_SLOPE * numEntries); } - - public double getSlope() { - return slope; - } - public double getIntercept() { - return intercept; - } - public double getBufferMultiplier() { - return bufferMultiplier; - } } diff --git a/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java index c1e223eb05730..363ceaddce74f 100644 --- a/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java @@ -91,7 +91,7 @@ public boolean remove(Integer value) throws IllegalStateException { @Override public long getMemorySizeInBytes() { - return super.getMemorySizeInBytes() + getHashsetMemSizeInBytes(collidedInts.size()); + return super.getMemorySizeInBytes() + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size()); } @Override diff --git a/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java index 8643f0e72c381..a0159cff6d807 100644 --- a/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java @@ -237,33 +237,13 @@ public void testAddingDuplicates() throws Exception { } } - public void testMemoryCapValueInitialization() { - double[] logModulos = new double[] { 0.0, 31.2, 30, 29, 28, 13 }; // these will decrement by 1 - double[] expectedMultipliers = new double[] { 1.35, 1.35, 1.6, 1.6, 1.6, 1.6 }; - double[] expectedSlopes = new double[] { 0.69, 0.69, 0.75, 0.75, 0.88, 0.88 }; - double[] expectedIntercepts = new double[] { -3, -3, -3.5, -3.5, -4.5, -4.5 }; - long memSizeCapInBytes = (long) 100.0 * RBMSizeEstimator.BYTES_IN_MB; - double delta = 0.01; - for (int i = 0; i < logModulos.length; i++) { - int modulo = 0; - if (logModulos[i] != 0) { - modulo = (int) Math.pow(2, logModulos[i]); - } - HybridIntKeyLookupStore rbm = new HybridIntKeyLookupStore(modulo, memSizeCapInBytes); - assertEquals(rbm.memSizeCapInBytes, rbm.getMemorySizeCapInBytes(), 1.0); - assertEquals(expectedMultipliers[i], rbm.getRBMMemBufferMultiplier(), delta); - assertEquals(expectedSlopes[i], rbm.getRBMMemSlope(), delta); - assertEquals(expectedIntercepts[i], rbm.getRBMMemIntercept(), delta); - } - } - public void testMemoryCapBlocksTransitions() throws Exception { double[] testModulos = new double[] { 0, Math.pow(2, 31), Math.pow(2, 29), Math.pow(2, 28), Math.pow(2, 26) }; for (int i = 0; i < testModulos.length; i++) { int modulo = (int) testModulos[i]; - long maxHashsetMemSize = HybridIntKeyLookupStore.getHashsetMemSizeInBytes(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1); + long maxHashsetMemSize = RBMSizeEstimator.getHashsetMemSizeInBytes(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1); long intArrMemSize = HybridIntKeyLookupStore.getIntArrMemSizeInBytes(); - long minRBMMemSize = HybridIntKeyLookupStore.getRBMMemSizeWithModuloInBytes(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD, modulo); + long minRBMMemSize = RBMSizeEstimator.getSizeInBytes(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD); // test that transitions in data structure do indeed monotonically increase predicted memory size assertTrue(maxHashsetMemSize < intArrMemSize); @@ -319,12 +299,14 @@ public void testMemoryCapBlocksAdd() throws Exception { assertEquals("intArr", kls.getCurrentStructure()); int maxEntries = 2342000; - memSizeCapInBytes = HybridIntKeyLookupStore.getRBMMemSizeWithModuloInBytes(maxEntries, modulo); + memSizeCapInBytes = RBMSizeEstimator.getSizeInBytes(maxEntries); kls = new HybridIntKeyLookupStore(modulo, memSizeCapInBytes); for (int j = 0; j < maxEntries + 1000; j++) { kls.add(j); } - assertTrue(Math.abs(maxEntries - kls.getSize()) < 5); // exact cap varies a small amount bc of floating point + assertTrue(Math.abs(maxEntries - kls.getSize()) < (double) maxEntries / 25); + // exact cap varies a small amount bc of floating point, especially when we use bytes instead of MB for calculations + // precision gets much worse when we compose the two functions, as we do here, but this wouldn't happen in an actual use case } } diff --git a/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java index 5f1830aa528e9..27c0e069e4edb 100644 --- a/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java @@ -148,40 +148,19 @@ public void testAddingDuplicates() throws Exception { } } - public void testMemoryCapValueInitialization() { - double[] logModulos = new double[] { 0.0, 31.2, 30, 29, 28, 13 }; // these will NOT decrement by 1 - double[] expectedMultipliers = new double[] { 1.35, 1.35, 1.35, 1.6, 1.6, 1.6 }; - double[] expectedSlopes = new double[] { 0.69, 0.69, 0.69, 0.75, 0.75, 0.88 }; - double[] expectedIntercepts = new double[] { -3, -3, -3, -3.5, -3.5, -4.5 }; - long memSizeCapInBytes = (long) 100.0 * RBMSizeEstimator.BYTES_IN_MB; - double delta = 0.01; - for (int i = 0; i < logModulos.length; i++) { - int modulo = 0; - if (logModulos[i] != 0) { - modulo = (int) Math.pow(2, logModulos[i]); - } - RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore(modulo, memSizeCapInBytes); - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore(modulo, memSizeCapInBytes); - for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { - assertEquals(kls.memSizeCapInBytes, kls.getMemorySizeCapInBytes(), 1.0); - assertEquals(expectedMultipliers[i], kls.getRBMMemBufferMultiplier(), delta); - assertEquals(expectedSlopes[i], kls.getRBMMemSlope(), delta); - assertEquals(expectedIntercepts[i], kls.getRBMMemIntercept(), delta); - } - } - } - public void testMemoryCapBlocksAdd() throws Exception { int modulo = (int) Math.pow(2, 29); for (int maxEntries: new int[]{2342000, 1000, 100000}) { - long memSizeCapInBytes = HybridIntKeyLookupStore.getRBMMemSizeWithModuloInBytes(maxEntries, modulo); + long memSizeCapInBytes = RBMSizeEstimator.getSizeInBytes(maxEntries); RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memSizeCapInBytes); RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), memSizeCapInBytes); for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { for (int j = 0; j < maxEntries + 1000; j++) { kls.add(j); } - assertTrue(Math.abs(maxEntries - kls.getSize()) < 5); // exact cap varies a small amount bc of floating point + assertTrue(Math.abs(maxEntries - kls.getSize()) < (double) maxEntries / 25); + // exact cap varies a small amount bc of floating point, especially when we use bytes instead of MB for calculations + // precision gets much worse when we compose the two functions, as we do here, but this wouldn't happen in an actual use case } } } From 85f1ed2eaca311e58a24cb322136955bffa1e46f Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Fri, 29 Sep 2023 15:45:29 -0700 Subject: [PATCH 11/17] Removed debug functions accidentally added to last commit --- .../org/opensearch/indices/HybridIntKeyLookupStore.java | 7 ------- .../main/java/org/opensearch/indices/KeyLookupStore.java | 1 - .../java/org/opensearch/indices/RBMIntKeyLookupStore.java | 7 ------- 3 files changed, 15 deletions(-) diff --git a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java index ea2b2dbaeb4f8..602a722c64063 100644 --- a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java @@ -91,13 +91,6 @@ public HybridIntKeyLookupStore(int modulo, long memSizeCapInBytes) { //rbm = null; } - public long memEstimateRBMLib() { // debug only - if (rbm != null) { - return rbm.getLongSizeInBytes(); - } - return 0L; - } - protected final int customAbs(int value) { if (value < 0 && value > Integer.MIN_VALUE) { return -value; diff --git a/server/src/main/java/org/opensearch/indices/KeyLookupStore.java b/server/src/main/java/org/opensearch/indices/KeyLookupStore.java index bad99cfc1ff04..fa9c8dc38df9a 100644 --- a/server/src/main/java/org/opensearch/indices/KeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/KeyLookupStore.java @@ -40,7 +40,6 @@ */ public interface KeyLookupStore { - long memEstimateRBMLib(); // debug only /** * Transforms the input value into the internal representation for this keystore * and adds it to the internal data structure. diff --git a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java index 3dec4197a9c19..c452b82521265 100644 --- a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java @@ -64,13 +64,6 @@ public class RBMIntKeyLookupStore implements KeyLookupStore { this.rbm = new RoaringBitmap(); } - public long memEstimateRBMLib() { // debug only - if (rbm != null) { - return rbm.getLongSizeInBytes(); - } - return 0L; - } - protected int calculateMaxNumEntries() { if (memSizeCapInBytes == 0) { return Integer.MAX_VALUE; From f0a62e8b5ddb591179ff1705909cf252e23a10cb Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 2 Oct 2023 13:40:55 -0700 Subject: [PATCH 12/17] Addressed misc second round comments --- .../indices/BaseRBMIntKeyLookupStore.java | 234 ++++++++++++++++++ .../indices/HybridIntKeyLookupStore.java | 153 ++++++------ .../opensearch/indices/KeyLookupStore.java | 18 +- .../indices/RBMIntKeyLookupStore.java | 189 +++----------- .../RemovableHybridIntKeyLookupStore.java | 18 +- .../RemovableRBMIntKeyLookupStore.java | 123 --------- ...ava => BaseRBMIntKeyLookupStoreTests.java} | 73 +++--- .../indices/HybridIntKeyLookupStoreTests.java | 42 ++-- ...RemovableHybridIntKeyLookupStoreTests.java | 1 - 9 files changed, 414 insertions(+), 437 deletions(-) create mode 100644 server/src/main/java/org/opensearch/indices/BaseRBMIntKeyLookupStore.java delete mode 100644 server/src/main/java/org/opensearch/indices/RemovableRBMIntKeyLookupStore.java rename server/src/test/java/org/opensearch/indices/{RBMIntKeyLookupStoreTests.java => BaseRBMIntKeyLookupStoreTests.java} (77%) diff --git a/server/src/main/java/org/opensearch/indices/BaseRBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/BaseRBMIntKeyLookupStore.java new file mode 100644 index 0000000000000..24eb1a4736dfe --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/BaseRBMIntKeyLookupStore.java @@ -0,0 +1,234 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.indices; + +import org.opensearch.common.metrics.CounterMetric; +import org.roaringbitmap.RoaringBitmap; + +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +public class BaseRBMIntKeyLookupStore implements KeyLookupStore { + // This class shares a lot of the same fields with HybridIntKeyLookupStore, but basically none of the logic + // besides getters, so I decided against making it the superclass to HybridIntKeyLookupStore + protected final int modulo; + protected class KeyStoreStats { + protected int size; + protected long memSizeCapInBytes; + protected CounterMetric numAddAttempts; + protected CounterMetric numCollisions; + protected boolean guaranteesNoFalseNegatives; + protected int maxNumEntries; + protected boolean atCapacity; + protected CounterMetric numRemovalAttempts; // used in removable classes + protected CounterMetric numSuccessfulRemovals; + protected KeyStoreStats(long memSizeCapInBytes, int maxNumEntries) { + this.size = 0; + this.numAddAttempts = new CounterMetric(); + this.numCollisions = new CounterMetric(); + this.guaranteesNoFalseNegatives = true; + this.memSizeCapInBytes = memSizeCapInBytes; + this.maxNumEntries = maxNumEntries; + this.atCapacity = false; + this.numRemovalAttempts = new CounterMetric(); + this.numSuccessfulRemovals = new CounterMetric(); + } + } + + protected KeyStoreStats stats; + protected RoaringBitmap rbm; + protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + protected final Lock readLock = lock.readLock(); + protected final Lock writeLock = lock.writeLock(); + + BaseRBMIntKeyLookupStore(int modulo, long memSizeCapInBytes) { + this.modulo = modulo; + this.stats = new KeyStoreStats(memSizeCapInBytes, calculateMaxNumEntries(memSizeCapInBytes)); + this.rbm = new RoaringBitmap(); + } + + protected int calculateMaxNumEntries(long memSizeCapInBytes) { + if (memSizeCapInBytes == 0) { + return Integer.MAX_VALUE; + } + return RBMSizeEstimator.getNumEntriesFromSizeInBytes(memSizeCapInBytes); + } + + protected final int transform(int value) { + return modulo == 0 ? value : value % modulo; + } + + protected void handleCollisions(int transformedValue) { + stats.numCollisions.inc(); + } + + @Override + public boolean add(Integer value) throws Exception { + if (value == null) { + return false; + } + writeLock.lock(); + stats.numAddAttempts.inc(); + try { + if (stats.size == stats.maxNumEntries) { + stats.atCapacity = true; + return false; + } + int transformedValue = transform(value); + boolean alreadyContained = contains(value); + if (!alreadyContained) { + rbm.add(transformedValue); + stats.size++; + return true; + } + handleCollisions(transformedValue); + return false; + } finally { + writeLock.unlock(); + } + } + + @Override + public boolean contains(Integer value) throws Exception { + if (value == null) { + return false; + } + int transformedValue = transform(value); + readLock.lock(); + try { + return rbm.contains(transformedValue); + } finally { + readLock.unlock(); + } + } + + @Override + public Integer getInternalRepresentation(Integer value) { + if (value == null) { + return 0; + } + return Integer.valueOf(transform(value)); + } + + @Override + public boolean remove(Integer value) throws Exception { + return false; + } + + + @Override + public void forceRemove(Integer value) throws Exception { + if (value == null) { + return; + } + writeLock.lock(); + stats.guaranteesNoFalseNegatives = false; + try { + int transformedValue = transform(value); + rbm.remove(transformedValue); + stats.size--; + } finally { + writeLock.unlock(); + } + } + + @Override + public boolean canHaveFalseNegatives() { + return !stats.guaranteesNoFalseNegatives; + } + + @Override + public int getSize() { + readLock.lock(); + try { + return stats.size; + } finally { + readLock.unlock(); + } + } + + @Override + public int getTotalAdds() { + return (int) stats.numAddAttempts.count(); + } + + @Override + public int getCollisions() { + return (int) stats.numCollisions.count(); + } + + + @Override + public boolean isCollision(Integer value1, Integer value2) { + if (value1 == null || value2 == null) { + return false; + } + return transform(value1) == transform(value2); + } + + @Override + public long getMemorySizeInBytes() { + return RBMSizeEstimator.getSizeInBytes(stats.size); + } + + @Override + public long getMemorySizeCapInBytes() { + return stats.memSizeCapInBytes; + } + + @Override + public boolean isFull() { + return stats.atCapacity; + } + + @Override + public void regenerateStore(Integer[] newValues) throws Exception { + rbm.clear(); + stats.size = 0; + stats.numAddAttempts = new CounterMetric(); + stats.numCollisions = new CounterMetric(); + stats.guaranteesNoFalseNegatives = true; + stats.numRemovalAttempts = new CounterMetric(); + stats.numSuccessfulRemovals = new CounterMetric(); + for (int i = 0; i < newValues.length; i++) { + if (newValues[i] != null) { + add(newValues[i]); + } + } + } + + @Override + public void clear() throws Exception { + regenerateStore(new Integer[]{}); + } +} diff --git a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java index 602a722c64063..4d94cadce0b77 100644 --- a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java @@ -37,7 +37,9 @@ import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantReadWriteLock; +import org.opensearch.common.metrics.CounterMetric; import org.roaringbitmap.RoaringBitmap; +import org.w3c.dom.css.Counter; /** * A store which dynamically switches its internal data structure from hash set to sorted int array @@ -51,19 +53,39 @@ public class HybridIntKeyLookupStore implements KeyLookupStore { /** * Used to keep track of which structure is being used to store values. */ - protected enum StructureTypes { + public enum StructureTypes { HASHSET, INTARR, RBM } + protected class KeyStoreStats { + protected int size; + protected long memSizeCapInBytes; + protected CounterMetric numAddAttempts; + protected CounterMetric numCollisions; + protected boolean guaranteesNoFalseNegatives; + protected int maxNumEntries; + protected boolean atCapacity; + protected CounterMetric numRemovalAttempts; // used in removable classes + protected CounterMetric numSuccessfulRemovals; + protected KeyStoreStats(long memSizeCapInBytes, int maxNumEntries) { + this.size = 0; + this.numAddAttempts = new CounterMetric(); + this.numCollisions = new CounterMetric(); + this.guaranteesNoFalseNegatives = true; + this.memSizeCapInBytes = memSizeCapInBytes; + this.maxNumEntries = maxNumEntries; + this.atCapacity = false; + this.numRemovalAttempts = new CounterMetric(); + this.numSuccessfulRemovals = new CounterMetric(); + } + } + + protected KeyStoreStats stats; protected StructureTypes currentStructure; protected final int modulo; - protected int size; - protected long memSizeCapInBytes; - protected int numAddAttempts; - protected int numCollisions; - protected boolean guaranteesNoFalseNegatives; + protected HashSet hashset; protected int[] intArr; @@ -72,23 +94,11 @@ protected enum StructureTypes { protected final Lock readLock = lock.readLock(); protected final Lock writeLock = lock.writeLock(); - - protected int maxNumEntries; - protected boolean atCapacity; - public HybridIntKeyLookupStore(int modulo, long memSizeCapInBytes) { this.modulo = modulo; // A modulo of 0 means no modulo this.hashset = new HashSet(); this.currentStructure = StructureTypes.HASHSET; - this.size = 0; - this.numAddAttempts = 0; - this.numCollisions = 0; - this.guaranteesNoFalseNegatives = true; - this.memSizeCapInBytes = memSizeCapInBytes; // A cap of 0 means no cap - // The effective modulo is halved compared to tests because of taking only negative values for the sorted int array - this.maxNumEntries = calculateMaxNumEntries(); - //intArr = null; - //rbm = null; + this.stats = new KeyStoreStats(memSizeCapInBytes, calculateMaxNumEntries(memSizeCapInBytes)); } protected final int customAbs(int value) { @@ -142,14 +152,14 @@ protected final void switchHashsetToIntArr() throws IllegalStateException { writeLock.lock(); try { if (currentStructure == StructureTypes.HASHSET) { - size = 0; + stats.size = 0; intArr = new int[INTARR_SIZE]; currentStructure = StructureTypes.INTARR; for (int value : hashset) { - boolean alreadyContained = isInIntArr(value, size, true); + boolean alreadyContained = isInIntArr(value, stats.size, true); // should never be already contained, but just to be safe if (!alreadyContained) { - size++; + stats.size++; } } hashset = null; @@ -165,7 +175,7 @@ protected final void switchIntArrToRBM() { if (currentStructure == StructureTypes.INTARR) { currentStructure = StructureTypes.RBM; rbm = new RoaringBitmap(); - for (int i = 0; i < size; i++) { + for (int i = 0; i < stats.size; i++) { rbm.add(intArr[i]); } intArr = null; @@ -182,15 +192,15 @@ protected final void switchIntArrToRBM() { protected final void handleStructureSwitch() throws IllegalStateException { // write lock? writeLock.lock(); try { - if (size == HASHSET_TO_INTARR_THRESHOLD - 1) { - if (maxNumEntries <= HASHSET_TO_INTARR_THRESHOLD) { - atCapacity = true; + if (stats.size == HASHSET_TO_INTARR_THRESHOLD - 1) { + if (stats.maxNumEntries <= HASHSET_TO_INTARR_THRESHOLD) { + stats.atCapacity = true; return; } switchHashsetToIntArr(); - } else if (size == INTARR_TO_RBM_THRESHOLD - 1) { - if (maxNumEntries <= INTARR_TO_RBM_THRESHOLD) { - atCapacity = true; + } else if (stats.size == INTARR_TO_RBM_THRESHOLD - 1) { + if (stats.maxNumEntries <= INTARR_TO_RBM_THRESHOLD) { + stats.atCapacity = true; return; } switchIntArrToRBM(); @@ -204,11 +214,11 @@ protected final void removeFromIntArr(int value) throws IllegalStateException { writeLock.lock(); try { intArrChecks(value); - int index = Arrays.binarySearch(intArr, 0, size, value); + int index = Arrays.binarySearch(intArr, 0, stats.size, value); if (index >= 0) { - System.arraycopy(intArr, index + 1, intArr, index, size - index - 1); - intArr[size - 1] = 0; - size--; + System.arraycopy(intArr, index + 1, intArr, index, stats.size - index - 1); + intArr[stats.size - 1] = 0; + stats.size--; } } finally { writeLock.unlock(); @@ -216,7 +226,7 @@ protected final void removeFromIntArr(int value) throws IllegalStateException { } protected void handleCollisions(int transformedValue) { - numCollisions++; + stats.numCollisions.inc(); } @Override @@ -226,13 +236,13 @@ public boolean add(Integer value) throws IllegalStateException { } writeLock.lock(); try { - if (size == maxNumEntries) { - atCapacity = true; + if (stats.size == stats.maxNumEntries) { + stats.atCapacity = true; } handleStructureSwitch(); // also might set atCapacity - if (!atCapacity) { + if (!stats.atCapacity) { - numAddAttempts++; + stats.numAddAttempts.inc(); int transformedValue = transform(value); boolean alreadyContained; @@ -241,7 +251,7 @@ public boolean add(Integer value) throws IllegalStateException { alreadyContained = !(hashset.add(transformedValue)); break; case INTARR: - alreadyContained = isInIntArr(transformedValue, size, true); + alreadyContained = isInIntArr(transformedValue, stats.size, true); break; case RBM: alreadyContained = containsTransformed(transformedValue); @@ -256,7 +266,7 @@ public boolean add(Integer value) throws IllegalStateException { handleCollisions(transformedValue); return false; } - size++; + stats.size++; return true; } return false; @@ -272,7 +282,7 @@ protected boolean containsTransformed(int transformedValue) throws IllegalStateE case HASHSET: return hashset.contains(transformedValue); case INTARR: - return isInIntArr(transformedValue, size, false); + return isInIntArr(transformedValue, stats.size, false); case RBM: return rbm.contains(transformedValue); default: @@ -324,10 +334,6 @@ public boolean remove(Integer value) throws IllegalStateException { return false; } - @Override - public boolean supportsRemoval() { - return false; - } protected void removeHelperFunction(int transformedValue) throws IllegalStateException { // allows code to be reused in forceRemove() of this class and remove() of inheriting class @@ -335,14 +341,14 @@ protected void removeHelperFunction(int transformedValue) throws IllegalStateExc switch (currentStructure) { case HASHSET: hashset.remove(transformedValue); - size--; + stats.size--; return; case INTARR: removeFromIntArr(transformedValue); // size is decreased in this function already return; case RBM: rbm.remove(transformedValue); - size--; + stats.size--; } } @@ -352,7 +358,7 @@ public void forceRemove(Integer value) throws IllegalStateException { return; } writeLock.lock(); - guaranteesNoFalseNegatives = false; + stats.guaranteesNoFalseNegatives = false; try { int transformedValue = transform(value); boolean alreadyContained = contains(transformedValue); @@ -366,41 +372,32 @@ public void forceRemove(Integer value) throws IllegalStateException { @Override public boolean canHaveFalseNegatives() { - return !guaranteesNoFalseNegatives; + return !stats.guaranteesNoFalseNegatives; } @Override public int getSize() { readLock.lock(); // needed because size is changed during switchHashsetToIntarr() try { - return size; + return stats.size; } finally { readLock.unlock(); } } @Override - public int getNumAddAttempts() { - return numAddAttempts; + public int getTotalAdds() { + return (int) stats.numAddAttempts.count(); } @Override - public int getNumCollisions() { - return numCollisions; + public int getCollisions() { + return (int) stats.numCollisions.count(); } - @Override - public String getCurrentStructure() throws IllegalStateException { - switch (currentStructure) { - case HASHSET: - return "HashSet"; - case INTARR: - return "intArr"; - case RBM: - return "RBM"; - default: - throw new IllegalStateException("currentStructure is none of possible values"); - } + + public StructureTypes getCurrentStructure() throws IllegalStateException { + return currentStructure; } @Override @@ -411,7 +408,7 @@ public boolean isCollision(Integer value1, Integer value2) { return transform(value1) == transform(value2); } - protected int calculateMaxNumEntries() { + protected int calculateMaxNumEntries(long memSizeCapInBytes) { double maxHashsetMemSize = RBMSizeEstimator.getHashsetMemSizeInBytes(HASHSET_TO_INTARR_THRESHOLD - 1); double intArrMemSize = getIntArrMemSizeInBytes(); double minRBMMemSize = RBMSizeEstimator.getSizeInBytes(INTARR_TO_RBM_THRESHOLD); @@ -439,37 +436,39 @@ protected static long getIntArrMemSizeInBytes() { public long getMemorySizeInBytes() { switch (currentStructure) { case HASHSET: - return RBMSizeEstimator.getHashsetMemSizeInBytes(size); + return RBMSizeEstimator.getHashsetMemSizeInBytes(stats.size); case INTARR: return getIntArrMemSizeInBytes(); case RBM: - return RBMSizeEstimator.getSizeInBytes(size); + return RBMSizeEstimator.getSizeInBytes(stats.size); } return 0; } @Override public long getMemorySizeCapInBytes() { - return memSizeCapInBytes; + return stats.memSizeCapInBytes; } public int getMaxNumEntries() { - return maxNumEntries; + return stats.maxNumEntries; } @Override - public boolean isAtCapacity() { - return atCapacity; + public boolean isFull() { + return stats.atCapacity; } @Override public void regenerateStore(Integer[] newValues) throws IllegalStateException { intArr = null; rbm = null; - size = 0; - numCollisions = 0; - numAddAttempts = 0; - guaranteesNoFalseNegatives = true; + stats.size = 0; + stats.numCollisions = new CounterMetric(); + stats.numAddAttempts = new CounterMetric(); + stats.guaranteesNoFalseNegatives = true; + stats.numRemovalAttempts = new CounterMetric(); + stats.numSuccessfulRemovals = new CounterMetric(); currentStructure = StructureTypes.HASHSET; hashset = new HashSet<>(); diff --git a/server/src/main/java/org/opensearch/indices/KeyLookupStore.java b/server/src/main/java/org/opensearch/indices/KeyLookupStore.java index fa9c8dc38df9a..5d94bef417898 100644 --- a/server/src/main/java/org/opensearch/indices/KeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/KeyLookupStore.java @@ -74,11 +74,6 @@ public interface KeyLookupStore { */ boolean remove(T value) throws Exception; - /** - * Check if the implementing class supports safe removals. If it doesn't, remove() will always return false. - * @return true if the class supports safe removals, false if it doesn't. - */ - boolean supportsRemoval(); /** * Remove the transformed version of this value from the store. Calling this function may cause @@ -104,19 +99,14 @@ public interface KeyLookupStore { * Returns the number of times add() has been run, including unsuccessful attempts. * @return The number of adding attempts. */ - int getNumAddAttempts(); + int getTotalAdds(); /** * Returns the number of times add() has returned false due to a collision. * @return The number of collisions. */ - int getNumCollisions(); + int getCollisions(); - /** - * Returns the current internal data structure. - * @return A string representing the currently used internal data structure. - */ - String getCurrentStructure() throws Exception; /** * Checks if two values would collide after being transformed by this store's transformation. @@ -139,9 +129,9 @@ public interface KeyLookupStore { long getMemorySizeCapInBytes(); /** - * Returns whether the store is at memory capacity + * Returns whether the store is at memory capacity and can't accept more entries */ - boolean isAtCapacity(); + boolean isFull(); /** * Deletes the internal data structure and regenerates it from the values passed in. diff --git a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java index c452b82521265..2b40ce3ec4c0c 100644 --- a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java @@ -32,196 +32,83 @@ package org.opensearch.indices; -import org.roaringbitmap.RoaringBitmap; +import java.util.HashSet; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantReadWriteLock; +public class RBMIntKeyLookupStore extends BaseRBMIntKeyLookupStore implements KeyLookupStore { + // The code for this class is almost the same as RemovableHybridIntKeyLookupStore, + // just with different superclasses. + // I considered changing the separate Removable classes into a CollisionHandler object + // which could be reused as a field of the KeyLookupStore objects, but since we will ultimately + // only use one of these four possible classes after doing performance testing, + // I don't think it's worth it to make the logic more complex just to avoid reusing code that might be deleted. -public class RBMIntKeyLookupStore implements KeyLookupStore { - // This class shares a lot of the same fields with HybridIntKeyLookupStore, but basically none of the logic - // besides getters, so I decided against making it the superclass to HybridIntKeyLookupStore - protected final int modulo; - protected int size; - protected long memSizeCapInBytes; - protected int numAddAttempts; - protected int numCollisions; - protected boolean guaranteesNoFalseNegatives; - protected RoaringBitmap rbm; - protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); - protected final Lock readLock = lock.readLock(); - protected final Lock writeLock = lock.writeLock(); - protected int maxNumEntries; - protected boolean atCapacity; + private HashSet collidedInts; RBMIntKeyLookupStore(int modulo, long memSizeCapInBytes) { - this.modulo = modulo; - this.size = 0; - this.numAddAttempts = 0; - this.numCollisions = 0; - this.guaranteesNoFalseNegatives = true; - this.memSizeCapInBytes = memSizeCapInBytes; // A cap of 0 means no cap - this.maxNumEntries = calculateMaxNumEntries(); - this.rbm = new RoaringBitmap(); - } - - protected int calculateMaxNumEntries() { - if (memSizeCapInBytes == 0) { - return Integer.MAX_VALUE; - } - return RBMSizeEstimator.getNumEntriesFromSizeInBytes(memSizeCapInBytes); - } - - protected final int transform(int value) { - return modulo == 0 ? value : value % modulo; + super(modulo, memSizeCapInBytes); + collidedInts = new HashSet<>(); } + @Override protected void handleCollisions(int transformedValue) { - numCollisions++; + stats.numCollisions.inc(); + collidedInts.add(transformedValue); } - @Override - public boolean add(Integer value) throws Exception { - if (value == null) { - return false; - } - writeLock.lock(); - numAddAttempts++; - try { - if (size == maxNumEntries) { - atCapacity = true; - return false; - } - int transformedValue = transform(value); - boolean alreadyContained = contains(transformedValue); - if (!alreadyContained) { - rbm.add(transformedValue); - size++; - return true; - } - handleCollisions(transformedValue); - return false; - } finally { - writeLock.unlock(); - } - } + // Check if the value to remove has had a collision, and if not, remove it @Override - public boolean contains(Integer value) throws Exception { + public boolean remove(Integer value) throws Exception { if (value == null) { return false; } int transformedValue = transform(value); readLock.lock(); try { - return rbm.contains(transformedValue); + if (!contains(value)) { + return false; + } + stats.numRemovalAttempts.inc(); + if (collidedInts.contains(transformedValue)) { + return false; + } } finally { readLock.unlock(); } - } - - @Override - public Integer getInternalRepresentation(Integer value) { - if (value == null) { - return 0; - } - return Integer.valueOf(transform(value)); - } - - @Override - public boolean remove(Integer value) throws Exception { - return false; - } - - @Override - public boolean supportsRemoval() { - return false; - } - - @Override - public void forceRemove(Integer value) throws Exception { - if (value == null) { - return; - } writeLock.lock(); - guaranteesNoFalseNegatives = false; try { - int transformedValue = transform(value); rbm.remove(transformedValue); - size--; + stats.size--; + stats.numSuccessfulRemovals.inc(); + return true; } finally { writeLock.unlock(); } } @Override - public boolean canHaveFalseNegatives() { - return !guaranteesNoFalseNegatives; - } - - @Override - public int getSize() { - readLock.lock(); - try { - return size; - } finally { - readLock.unlock(); - } + public long getMemorySizeInBytes() { + return super.getMemorySizeInBytes() + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size()); } @Override - public int getNumAddAttempts() { - return numAddAttempts; + public void regenerateStore(Integer[] newValues) throws Exception { + collidedInts = new HashSet<>(); + super.regenerateStore(newValues); } - @Override - public int getNumCollisions() { - return numCollisions; + public int getNumRemovalAttempts() { + return (int) stats.numRemovalAttempts.count(); } - @Override - public String getCurrentStructure() throws Exception { - return "RBM"; + public int getNumSuccessfulRemovals() { + return (int) stats.numSuccessfulRemovals.count(); } - @Override - public boolean isCollision(Integer value1, Integer value2) { - if (value1 == null || value2 == null) { + public boolean valueHasHadCollision(Integer value) { + if (value == null) { return false; } - return transform(value1) == transform(value2); - } - - @Override - public long getMemorySizeInBytes() { - return RBMSizeEstimator.getSizeInBytes(size); - } - - @Override - public long getMemorySizeCapInBytes() { - return memSizeCapInBytes; - } - - @Override - public boolean isAtCapacity() { - return atCapacity; - } - - @Override - public void regenerateStore(Integer[] newValues) throws Exception { - rbm.clear(); - size = 0; - this.numAddAttempts = 0; - this.numCollisions = 0; - this.guaranteesNoFalseNegatives = true; - for (int i = 0; i < newValues.length; i++) { - if (newValues[i] != null) { - add(newValues[i]); - } - } - } - - @Override - public void clear() throws Exception { - regenerateStore(new Integer[]{}); + return collidedInts.contains(transform(value)); } } diff --git a/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java index 363ceaddce74f..9b58ca0bf0498 100644 --- a/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java @@ -39,26 +39,18 @@ */ public class RemovableHybridIntKeyLookupStore extends HybridIntKeyLookupStore implements KeyLookupStore { private HashSet collidedInts; - private int numRemovalAttempts; - private int numSuccessfulRemovals; RemovableHybridIntKeyLookupStore(int modulo, long memSizeCapInBytes) { super(modulo, memSizeCapInBytes); collidedInts = new HashSet<>(); - numRemovalAttempts = 0; - numSuccessfulRemovals = 0; } @Override protected void handleCollisions(int transformedValue) { - numCollisions++; + stats.numCollisions.inc(); collidedInts.add(transformedValue); } - @Override - public boolean supportsRemoval() { - return true; - } // Check if the value to remove has had a collision, and if not, remove it @Override @@ -72,7 +64,7 @@ public boolean remove(Integer value) throws IllegalStateException { if (!contains(value)) { return false; } - numRemovalAttempts++; + stats.numRemovalAttempts.inc(); if (collidedInts.contains(transformedValue)) { return false; } @@ -82,7 +74,7 @@ public boolean remove(Integer value) throws IllegalStateException { writeLock.lock(); try { removeHelperFunction(transformedValue); - numSuccessfulRemovals++; + stats.numSuccessfulRemovals.inc(); return true; } finally { writeLock.unlock(); @@ -101,11 +93,11 @@ public void regenerateStore(Integer[] newValues) throws IllegalStateException { } public int getNumRemovalAttempts() { - return numRemovalAttempts; + return (int) stats.numRemovalAttempts.count(); } public int getNumSuccessfulRemovals() { - return numSuccessfulRemovals; + return (int) stats.numSuccessfulRemovals.count(); } public boolean valueHasHadCollision(Integer value) { diff --git a/server/src/main/java/org/opensearch/indices/RemovableRBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RemovableRBMIntKeyLookupStore.java deleted file mode 100644 index 1bbe021962335..0000000000000 --- a/server/src/main/java/org/opensearch/indices/RemovableRBMIntKeyLookupStore.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -package org.opensearch.indices; - -import org.roaringbitmap.RoaringBitmap; -import java.util.HashSet; - -public class RemovableRBMIntKeyLookupStore extends RBMIntKeyLookupStore implements KeyLookupStore { - // The code for this class is almost the same as RemovableHybridIntKeyLookupStore, - // just with different superclasses. - // I considered changing the separate Removable classes into a CollisionHandler object - // which could be reused as a field of the KeyLookupStore objects, but since we will ultimately - // only use one of these four possible classes after doing performance testing, - // I don't think it's worth it to make the logic more complex just to avoid reusing code that might be deleted. - - private HashSet collidedInts; - private int numRemovalAttempts; - private int numSuccessfulRemovals; - - RemovableRBMIntKeyLookupStore(int modulo, long memSizeCapInBytes) { - super(modulo, memSizeCapInBytes); - collidedInts = new HashSet<>(); - numRemovalAttempts = 0; - numSuccessfulRemovals = 0; - } - - @Override - protected void handleCollisions(int transformedValue) { - numCollisions++; - collidedInts.add(transformedValue); - } - - @Override - public boolean supportsRemoval() { - return true; - } - - // Check if the value to remove has had a collision, and if not, remove it - @Override - public boolean remove(Integer value) throws Exception { - if (value == null) { - return false; - } - int transformedValue = transform(value); - readLock.lock(); - try { - if (!contains(value)) { - return false; - } - numRemovalAttempts++; - if (collidedInts.contains(transformedValue)) { - return false; - } - } finally { - readLock.unlock(); - } - writeLock.lock(); - try { - rbm.remove(transformedValue); - size--; - numSuccessfulRemovals++; - return true; - } finally { - writeLock.unlock(); - } - } - - @Override - public long getMemorySizeInBytes() { - return super.getMemorySizeInBytes() + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size()); - } - - @Override - public void regenerateStore(Integer[] newValues) throws Exception { - collidedInts = new HashSet<>(); - super.regenerateStore(newValues); - } - - public int getNumRemovalAttempts() { - return numRemovalAttempts; - } - - public int getNumSuccessfulRemovals() { - return numSuccessfulRemovals; - } - - public boolean valueHasHadCollision(Integer value) { - if (value == null) { - return false; - } - return collidedInts.contains(transform(value)); - } -} diff --git a/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/BaseRBMIntKeyLookupStoreTests.java similarity index 77% rename from server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java rename to server/src/test/java/org/opensearch/indices/BaseRBMIntKeyLookupStoreTests.java index 27c0e069e4edb..7a26fe3b03965 100644 --- a/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/BaseRBMIntKeyLookupStoreTests.java @@ -40,22 +40,22 @@ import java.util.concurrent.Future; import java.util.concurrent.ThreadPoolExecutor; -public class RBMIntKeyLookupStoreTests extends OpenSearchTestCase { +public class BaseRBMIntKeyLookupStoreTests extends OpenSearchTestCase { // Tests mostly based on HybridIntKeyStoreTests.java public void testInit() { long memCap = 100 * RBMSizeEstimator.BYTES_IN_MB; - RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memCap); - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), memCap); - for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), memCap); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memCap); + for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { assertEquals(0, kls.getSize()); assertEquals(memCap, kls.getMemorySizeCapInBytes()); } } public void testTransformationLogic() throws Exception { int modulo = (int) Math.pow(2, 29); - RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { int offset = 3; for (int i = 0; i < 4; i++) { // after this we run into max value, but thats not a flaw with the class design int posValue = i * modulo + offset; @@ -73,9 +73,9 @@ public void testTransformationLogic() throws Exception { } public void testContainsAndForceRemove() throws Exception { - RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { for (int i = 0; i < 2000; i++) { kls.add(i); assertTrue(kls.contains(i)); @@ -91,20 +91,20 @@ public void testContainsAndForceRemove() throws Exception { public void testAddingStatsGetters() throws Exception { int modulo = (int) Math.pow(2, 15); - RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore(modulo, 0L); - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore(modulo, 0L); - for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore(modulo, 0L); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore(modulo, 0L); + for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { kls.add(15); kls.add(-15); - assertEquals(2, kls.getNumAddAttempts()); - assertEquals(0, kls.getNumCollisions()); + assertEquals(2, kls.getTotalAdds()); + assertEquals(0, kls.getCollisions()); int offset = 1; for (int i = 0; i < 10; i++) { kls.add(i * modulo + offset); } - assertEquals(12, kls.getNumAddAttempts()); - assertEquals(9, kls.getNumCollisions()); + assertEquals(12, kls.getTotalAdds()); + assertEquals(9, kls.getCollisions()); } } @@ -112,9 +112,9 @@ public void testAddingStatsGetters() throws Exception { public void testRegenerateStore() throws Exception { int numToAdd = 10000000; Random rand = Randomness.get(); - RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { for (int i = 0; i < numToAdd; i++) { kls.add(i); } @@ -133,9 +133,9 @@ public void testRegenerateStore() throws Exception { } public void testAddingDuplicates() throws Exception { - RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { int numToAdd = 4820411; for (int i = 0; i < numToAdd; i++) { kls.add(i); @@ -152,9 +152,9 @@ public void testMemoryCapBlocksAdd() throws Exception { int modulo = (int) Math.pow(2, 29); for (int maxEntries: new int[]{2342000, 1000, 100000}) { long memSizeCapInBytes = RBMSizeEstimator.getSizeInBytes(maxEntries); - RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memSizeCapInBytes); - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), memSizeCapInBytes); - for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), memSizeCapInBytes); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memSizeCapInBytes); + for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { for (int j = 0; j < maxEntries + 1000; j++) { kls.add(j); } @@ -170,9 +170,9 @@ public void testConcurrency() throws Exception { int modulo = (int) Math.pow(2, 29); long memCap = 100 * RBMSizeEstimator.BYTES_IN_MB; for (int j = 0; j < 5; j++) { // test with different numbers of threads - RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { int numThreads = rand.nextInt(50) + 1; ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads); // In this test we want to add the first 200K numbers and check they're all correctly there. @@ -226,7 +226,7 @@ public void testConcurrency() throws Exception { } assertEquals(amountToAdd, originalAdds + duplicateAdds); assertEquals(amountToAdd, kls.getSize()); - assertEquals(amountToAdd / 1000, kls.getNumCollisions()); + assertEquals(amountToAdd / 1000, kls.getCollisions()); executor.shutdown(); } } @@ -236,9 +236,8 @@ public void testRemoveNoCollisions() throws Exception { // only for RemovableRBMIntKeyLookupStore long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; int numToAdd = 195000; - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore(0, memCap); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore(0, memCap); // there should be no collisions for sequential positive numbers up to modulo - assertTrue(rkls.supportsRemoval()); for (int i = 0; i < numToAdd; i++) { rkls.add(i); } @@ -253,7 +252,7 @@ public void testRemoveNoCollisions() throws Exception { public void testRemoveWithCollisions() throws Exception { int modulo = (int) Math.pow(2, 26); long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore(modulo, memCap); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore(modulo, memCap); for (int i = 0; i < 10; i++) { rkls.add(i); if (i % 2 == 1) { @@ -296,9 +295,9 @@ public void testRemoveWithCollisions() throws Exception { } public void testNullInputs() throws Exception { - RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { assertFalse(kls.add(null)); assertFalse(kls.contains(null)); assertEquals(0, (int) kls.getInternalRepresentation(null)); @@ -306,7 +305,7 @@ public void testNullInputs() throws Exception { kls.forceRemove(null); assertFalse(kls.canHaveFalseNegatives()); assertFalse(kls.isCollision(null, null)); - assertEquals(0, kls.getNumAddAttempts()); + assertEquals(0, kls.getTotalAdds()); Integer[] newVals = new Integer[]{1, 17, -2, null, -4, null}; kls.regenerateStore(newVals); assertEquals(4, kls.getSize()); diff --git a/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java index a0159cff6d807..5f66f8dd86401 100644 --- a/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java @@ -48,7 +48,7 @@ public void testInit() throws Exception { HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { - assertEquals("HashSet", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.HASHSET, kls.getCurrentStructure()); assertEquals(0, kls.getSize()); } } @@ -60,12 +60,12 @@ public void testStructureTransitions() throws Exception { for (int i = 0; i < HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD; i++) { kls.add(i); } - assertEquals("intArr", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.INTARR, kls.getCurrentStructure()); assertEquals(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD, kls.getSize()); for (int i = HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD; i < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD; i++) { kls.add(i); } - assertEquals("RBM", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.RBM, kls.getCurrentStructure()); assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD, kls.getSize()); } } @@ -140,13 +140,13 @@ public void testContainsAndForceRemove() throws Exception { assertEquals(1, kls.getSize() - lastSize); lastSize = kls.getSize(); } - assertEquals("intArr", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.INTARR, kls.getCurrentStructure()); assertEquals(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD, kls.getSize()); for (int i = kls.getSize(); i < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + 1000; i++) { kls.add(i); assertTrue(kls.contains(i)); } - assertEquals("RBM", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.RBM, kls.getCurrentStructure()); assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + 1000, kls.getSize()); for (int i = 5000; i < 10000; i++) { kls.forceRemove(i); @@ -163,15 +163,15 @@ public void testAddingStatsGetters() throws Exception { for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { kls.add(15); kls.add(-15); - assertEquals(2, kls.getNumAddAttempts()); - assertEquals(1, kls.getNumCollisions()); + assertEquals(2, kls.getTotalAdds()); + assertEquals(1, kls.getCollisions()); int offset = 1; for (int i = 0; i < 10; i++) { kls.add(i * modulo + offset); } - assertEquals(12, kls.getNumAddAttempts()); - assertEquals(10, kls.getNumCollisions()); + assertEquals(12, kls.getTotalAdds()); + assertEquals(10, kls.getCollisions()); } } @@ -199,7 +199,7 @@ public void testRegenerateStore() throws Exception { } // test clear() kls.clear(); - assertEquals("HashSet", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.HASHSET, kls.getCurrentStructure()); assertEquals(0, kls.getSize()); } } @@ -255,23 +255,23 @@ public void testMemoryCapBlocksTransitions() throws Exception { assertTrue(didAdd); } // now try to add one more, which would cause a transition and push us past the memory cap - assertFalse(kls.isAtCapacity()); - assertEquals("HashSet", kls.getCurrentStructure()); + assertFalse(kls.isFull()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.HASHSET, kls.getCurrentStructure()); boolean didAdd = kls.add(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1); assertFalse(didAdd); - assertTrue(kls.isAtCapacity()); - assertEquals("HashSet", kls.getCurrentStructure()); + assertTrue(kls.isFull()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.HASHSET, kls.getCurrentStructure()); kls = new HybridIntKeyLookupStore(modulo, minRBMMemSize); for (int j = 0; j < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - 1; j++) { didAdd = kls.add(j); assertTrue(didAdd); } - assertFalse(kls.isAtCapacity()); + assertFalse(kls.isFull()); didAdd = kls.add(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD); assertFalse(didAdd); - assertTrue(kls.isAtCapacity()); - assertEquals("intArr", kls.getCurrentStructure()); + assertTrue(kls.isFull()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.INTARR, kls.getCurrentStructure()); } } @@ -287,7 +287,7 @@ public void testMemoryCapBlocksAdd() throws Exception { kls.add(j); } assertTrue(Math.abs(3000 - kls.getSize()) < 2); // double --> long conversion adds a bit of lossiness - assertEquals("HashSet", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.HASHSET, kls.getCurrentStructure()); // test where max number of entries should be 999,999 (bounded at intArr size) memSizeCapInBytes = HybridIntKeyLookupStore.getIntArrMemSizeInBytes(); @@ -296,7 +296,7 @@ public void testMemoryCapBlocksAdd() throws Exception { kls.add(j); } assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - 1, kls.getSize()); - assertEquals("intArr", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.INTARR, kls.getCurrentStructure()); int maxEntries = 2342000; memSizeCapInBytes = RBMSizeEstimator.getSizeInBytes(maxEntries); @@ -369,7 +369,7 @@ public void testConcurrency() throws Exception { } assertEquals(amountToAdd, originalAdds + duplicateAdds); assertEquals(amountToAdd, kls.getSize()); - assertEquals(amountToAdd / 1000, kls.getNumCollisions()); + assertEquals(amountToAdd / 1000, kls.getCollisions()); executor.shutdown(); } } @@ -386,7 +386,7 @@ public void testNullInputs() throws Exception { kls.forceRemove(null); assertFalse(kls.canHaveFalseNegatives()); assertFalse(kls.isCollision(null, null)); - assertEquals(0, kls.getNumAddAttempts()); + assertEquals(0, kls.getTotalAdds()); Integer[] newVals = new Integer[]{1, 17, -2, null, -4, null}; kls.regenerateStore(newVals); assertEquals(4, kls.getSize()); diff --git a/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java index 9bec52aa51f28..975daa107ef39 100644 --- a/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java @@ -42,7 +42,6 @@ public void testRemoveNoCollisions() throws Exception { long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(0, memCap); // there should be no collisions for sequential positive numbers up to modulo - assertTrue(rkls.supportsRemoval()); for (int i = 0; i < HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1; i++) { rkls.add(i); } From c62bce6d8e24d961c8bb8eb4fe2737dcd19a04fe Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 2 Oct 2023 13:40:55 -0700 Subject: [PATCH 13/17] Addressed misc second round comments --- .../indices/BaseRBMIntKeyLookupStore.java | 234 ++++++++++++++++++ .../indices/HybridIntKeyLookupStore.java | 153 ++++++------ .../opensearch/indices/KeyLookupStore.java | 18 +- .../indices/RBMIntKeyLookupStore.java | 189 +++----------- .../RemovableHybridIntKeyLookupStore.java | 18 +- .../RemovableRBMIntKeyLookupStore.java | 123 --------- ...ava => BaseRBMIntKeyLookupStoreTests.java} | 73 +++--- .../indices/HybridIntKeyLookupStoreTests.java | 42 ++-- ...RemovableHybridIntKeyLookupStoreTests.java | 1 - 9 files changed, 414 insertions(+), 437 deletions(-) create mode 100644 server/src/main/java/org/opensearch/indices/BaseRBMIntKeyLookupStore.java delete mode 100644 server/src/main/java/org/opensearch/indices/RemovableRBMIntKeyLookupStore.java rename server/src/test/java/org/opensearch/indices/{RBMIntKeyLookupStoreTests.java => BaseRBMIntKeyLookupStoreTests.java} (77%) diff --git a/server/src/main/java/org/opensearch/indices/BaseRBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/BaseRBMIntKeyLookupStore.java new file mode 100644 index 0000000000000..24eb1a4736dfe --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/BaseRBMIntKeyLookupStore.java @@ -0,0 +1,234 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.indices; + +import org.opensearch.common.metrics.CounterMetric; +import org.roaringbitmap.RoaringBitmap; + +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +public class BaseRBMIntKeyLookupStore implements KeyLookupStore { + // This class shares a lot of the same fields with HybridIntKeyLookupStore, but basically none of the logic + // besides getters, so I decided against making it the superclass to HybridIntKeyLookupStore + protected final int modulo; + protected class KeyStoreStats { + protected int size; + protected long memSizeCapInBytes; + protected CounterMetric numAddAttempts; + protected CounterMetric numCollisions; + protected boolean guaranteesNoFalseNegatives; + protected int maxNumEntries; + protected boolean atCapacity; + protected CounterMetric numRemovalAttempts; // used in removable classes + protected CounterMetric numSuccessfulRemovals; + protected KeyStoreStats(long memSizeCapInBytes, int maxNumEntries) { + this.size = 0; + this.numAddAttempts = new CounterMetric(); + this.numCollisions = new CounterMetric(); + this.guaranteesNoFalseNegatives = true; + this.memSizeCapInBytes = memSizeCapInBytes; + this.maxNumEntries = maxNumEntries; + this.atCapacity = false; + this.numRemovalAttempts = new CounterMetric(); + this.numSuccessfulRemovals = new CounterMetric(); + } + } + + protected KeyStoreStats stats; + protected RoaringBitmap rbm; + protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + protected final Lock readLock = lock.readLock(); + protected final Lock writeLock = lock.writeLock(); + + BaseRBMIntKeyLookupStore(int modulo, long memSizeCapInBytes) { + this.modulo = modulo; + this.stats = new KeyStoreStats(memSizeCapInBytes, calculateMaxNumEntries(memSizeCapInBytes)); + this.rbm = new RoaringBitmap(); + } + + protected int calculateMaxNumEntries(long memSizeCapInBytes) { + if (memSizeCapInBytes == 0) { + return Integer.MAX_VALUE; + } + return RBMSizeEstimator.getNumEntriesFromSizeInBytes(memSizeCapInBytes); + } + + protected final int transform(int value) { + return modulo == 0 ? value : value % modulo; + } + + protected void handleCollisions(int transformedValue) { + stats.numCollisions.inc(); + } + + @Override + public boolean add(Integer value) throws Exception { + if (value == null) { + return false; + } + writeLock.lock(); + stats.numAddAttempts.inc(); + try { + if (stats.size == stats.maxNumEntries) { + stats.atCapacity = true; + return false; + } + int transformedValue = transform(value); + boolean alreadyContained = contains(value); + if (!alreadyContained) { + rbm.add(transformedValue); + stats.size++; + return true; + } + handleCollisions(transformedValue); + return false; + } finally { + writeLock.unlock(); + } + } + + @Override + public boolean contains(Integer value) throws Exception { + if (value == null) { + return false; + } + int transformedValue = transform(value); + readLock.lock(); + try { + return rbm.contains(transformedValue); + } finally { + readLock.unlock(); + } + } + + @Override + public Integer getInternalRepresentation(Integer value) { + if (value == null) { + return 0; + } + return Integer.valueOf(transform(value)); + } + + @Override + public boolean remove(Integer value) throws Exception { + return false; + } + + + @Override + public void forceRemove(Integer value) throws Exception { + if (value == null) { + return; + } + writeLock.lock(); + stats.guaranteesNoFalseNegatives = false; + try { + int transformedValue = transform(value); + rbm.remove(transformedValue); + stats.size--; + } finally { + writeLock.unlock(); + } + } + + @Override + public boolean canHaveFalseNegatives() { + return !stats.guaranteesNoFalseNegatives; + } + + @Override + public int getSize() { + readLock.lock(); + try { + return stats.size; + } finally { + readLock.unlock(); + } + } + + @Override + public int getTotalAdds() { + return (int) stats.numAddAttempts.count(); + } + + @Override + public int getCollisions() { + return (int) stats.numCollisions.count(); + } + + + @Override + public boolean isCollision(Integer value1, Integer value2) { + if (value1 == null || value2 == null) { + return false; + } + return transform(value1) == transform(value2); + } + + @Override + public long getMemorySizeInBytes() { + return RBMSizeEstimator.getSizeInBytes(stats.size); + } + + @Override + public long getMemorySizeCapInBytes() { + return stats.memSizeCapInBytes; + } + + @Override + public boolean isFull() { + return stats.atCapacity; + } + + @Override + public void regenerateStore(Integer[] newValues) throws Exception { + rbm.clear(); + stats.size = 0; + stats.numAddAttempts = new CounterMetric(); + stats.numCollisions = new CounterMetric(); + stats.guaranteesNoFalseNegatives = true; + stats.numRemovalAttempts = new CounterMetric(); + stats.numSuccessfulRemovals = new CounterMetric(); + for (int i = 0; i < newValues.length; i++) { + if (newValues[i] != null) { + add(newValues[i]); + } + } + } + + @Override + public void clear() throws Exception { + regenerateStore(new Integer[]{}); + } +} diff --git a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java index 602a722c64063..4d94cadce0b77 100644 --- a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java @@ -37,7 +37,9 @@ import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantReadWriteLock; +import org.opensearch.common.metrics.CounterMetric; import org.roaringbitmap.RoaringBitmap; +import org.w3c.dom.css.Counter; /** * A store which dynamically switches its internal data structure from hash set to sorted int array @@ -51,19 +53,39 @@ public class HybridIntKeyLookupStore implements KeyLookupStore { /** * Used to keep track of which structure is being used to store values. */ - protected enum StructureTypes { + public enum StructureTypes { HASHSET, INTARR, RBM } + protected class KeyStoreStats { + protected int size; + protected long memSizeCapInBytes; + protected CounterMetric numAddAttempts; + protected CounterMetric numCollisions; + protected boolean guaranteesNoFalseNegatives; + protected int maxNumEntries; + protected boolean atCapacity; + protected CounterMetric numRemovalAttempts; // used in removable classes + protected CounterMetric numSuccessfulRemovals; + protected KeyStoreStats(long memSizeCapInBytes, int maxNumEntries) { + this.size = 0; + this.numAddAttempts = new CounterMetric(); + this.numCollisions = new CounterMetric(); + this.guaranteesNoFalseNegatives = true; + this.memSizeCapInBytes = memSizeCapInBytes; + this.maxNumEntries = maxNumEntries; + this.atCapacity = false; + this.numRemovalAttempts = new CounterMetric(); + this.numSuccessfulRemovals = new CounterMetric(); + } + } + + protected KeyStoreStats stats; protected StructureTypes currentStructure; protected final int modulo; - protected int size; - protected long memSizeCapInBytes; - protected int numAddAttempts; - protected int numCollisions; - protected boolean guaranteesNoFalseNegatives; + protected HashSet hashset; protected int[] intArr; @@ -72,23 +94,11 @@ protected enum StructureTypes { protected final Lock readLock = lock.readLock(); protected final Lock writeLock = lock.writeLock(); - - protected int maxNumEntries; - protected boolean atCapacity; - public HybridIntKeyLookupStore(int modulo, long memSizeCapInBytes) { this.modulo = modulo; // A modulo of 0 means no modulo this.hashset = new HashSet(); this.currentStructure = StructureTypes.HASHSET; - this.size = 0; - this.numAddAttempts = 0; - this.numCollisions = 0; - this.guaranteesNoFalseNegatives = true; - this.memSizeCapInBytes = memSizeCapInBytes; // A cap of 0 means no cap - // The effective modulo is halved compared to tests because of taking only negative values for the sorted int array - this.maxNumEntries = calculateMaxNumEntries(); - //intArr = null; - //rbm = null; + this.stats = new KeyStoreStats(memSizeCapInBytes, calculateMaxNumEntries(memSizeCapInBytes)); } protected final int customAbs(int value) { @@ -142,14 +152,14 @@ protected final void switchHashsetToIntArr() throws IllegalStateException { writeLock.lock(); try { if (currentStructure == StructureTypes.HASHSET) { - size = 0; + stats.size = 0; intArr = new int[INTARR_SIZE]; currentStructure = StructureTypes.INTARR; for (int value : hashset) { - boolean alreadyContained = isInIntArr(value, size, true); + boolean alreadyContained = isInIntArr(value, stats.size, true); // should never be already contained, but just to be safe if (!alreadyContained) { - size++; + stats.size++; } } hashset = null; @@ -165,7 +175,7 @@ protected final void switchIntArrToRBM() { if (currentStructure == StructureTypes.INTARR) { currentStructure = StructureTypes.RBM; rbm = new RoaringBitmap(); - for (int i = 0; i < size; i++) { + for (int i = 0; i < stats.size; i++) { rbm.add(intArr[i]); } intArr = null; @@ -182,15 +192,15 @@ protected final void switchIntArrToRBM() { protected final void handleStructureSwitch() throws IllegalStateException { // write lock? writeLock.lock(); try { - if (size == HASHSET_TO_INTARR_THRESHOLD - 1) { - if (maxNumEntries <= HASHSET_TO_INTARR_THRESHOLD) { - atCapacity = true; + if (stats.size == HASHSET_TO_INTARR_THRESHOLD - 1) { + if (stats.maxNumEntries <= HASHSET_TO_INTARR_THRESHOLD) { + stats.atCapacity = true; return; } switchHashsetToIntArr(); - } else if (size == INTARR_TO_RBM_THRESHOLD - 1) { - if (maxNumEntries <= INTARR_TO_RBM_THRESHOLD) { - atCapacity = true; + } else if (stats.size == INTARR_TO_RBM_THRESHOLD - 1) { + if (stats.maxNumEntries <= INTARR_TO_RBM_THRESHOLD) { + stats.atCapacity = true; return; } switchIntArrToRBM(); @@ -204,11 +214,11 @@ protected final void removeFromIntArr(int value) throws IllegalStateException { writeLock.lock(); try { intArrChecks(value); - int index = Arrays.binarySearch(intArr, 0, size, value); + int index = Arrays.binarySearch(intArr, 0, stats.size, value); if (index >= 0) { - System.arraycopy(intArr, index + 1, intArr, index, size - index - 1); - intArr[size - 1] = 0; - size--; + System.arraycopy(intArr, index + 1, intArr, index, stats.size - index - 1); + intArr[stats.size - 1] = 0; + stats.size--; } } finally { writeLock.unlock(); @@ -216,7 +226,7 @@ protected final void removeFromIntArr(int value) throws IllegalStateException { } protected void handleCollisions(int transformedValue) { - numCollisions++; + stats.numCollisions.inc(); } @Override @@ -226,13 +236,13 @@ public boolean add(Integer value) throws IllegalStateException { } writeLock.lock(); try { - if (size == maxNumEntries) { - atCapacity = true; + if (stats.size == stats.maxNumEntries) { + stats.atCapacity = true; } handleStructureSwitch(); // also might set atCapacity - if (!atCapacity) { + if (!stats.atCapacity) { - numAddAttempts++; + stats.numAddAttempts.inc(); int transformedValue = transform(value); boolean alreadyContained; @@ -241,7 +251,7 @@ public boolean add(Integer value) throws IllegalStateException { alreadyContained = !(hashset.add(transformedValue)); break; case INTARR: - alreadyContained = isInIntArr(transformedValue, size, true); + alreadyContained = isInIntArr(transformedValue, stats.size, true); break; case RBM: alreadyContained = containsTransformed(transformedValue); @@ -256,7 +266,7 @@ public boolean add(Integer value) throws IllegalStateException { handleCollisions(transformedValue); return false; } - size++; + stats.size++; return true; } return false; @@ -272,7 +282,7 @@ protected boolean containsTransformed(int transformedValue) throws IllegalStateE case HASHSET: return hashset.contains(transformedValue); case INTARR: - return isInIntArr(transformedValue, size, false); + return isInIntArr(transformedValue, stats.size, false); case RBM: return rbm.contains(transformedValue); default: @@ -324,10 +334,6 @@ public boolean remove(Integer value) throws IllegalStateException { return false; } - @Override - public boolean supportsRemoval() { - return false; - } protected void removeHelperFunction(int transformedValue) throws IllegalStateException { // allows code to be reused in forceRemove() of this class and remove() of inheriting class @@ -335,14 +341,14 @@ protected void removeHelperFunction(int transformedValue) throws IllegalStateExc switch (currentStructure) { case HASHSET: hashset.remove(transformedValue); - size--; + stats.size--; return; case INTARR: removeFromIntArr(transformedValue); // size is decreased in this function already return; case RBM: rbm.remove(transformedValue); - size--; + stats.size--; } } @@ -352,7 +358,7 @@ public void forceRemove(Integer value) throws IllegalStateException { return; } writeLock.lock(); - guaranteesNoFalseNegatives = false; + stats.guaranteesNoFalseNegatives = false; try { int transformedValue = transform(value); boolean alreadyContained = contains(transformedValue); @@ -366,41 +372,32 @@ public void forceRemove(Integer value) throws IllegalStateException { @Override public boolean canHaveFalseNegatives() { - return !guaranteesNoFalseNegatives; + return !stats.guaranteesNoFalseNegatives; } @Override public int getSize() { readLock.lock(); // needed because size is changed during switchHashsetToIntarr() try { - return size; + return stats.size; } finally { readLock.unlock(); } } @Override - public int getNumAddAttempts() { - return numAddAttempts; + public int getTotalAdds() { + return (int) stats.numAddAttempts.count(); } @Override - public int getNumCollisions() { - return numCollisions; + public int getCollisions() { + return (int) stats.numCollisions.count(); } - @Override - public String getCurrentStructure() throws IllegalStateException { - switch (currentStructure) { - case HASHSET: - return "HashSet"; - case INTARR: - return "intArr"; - case RBM: - return "RBM"; - default: - throw new IllegalStateException("currentStructure is none of possible values"); - } + + public StructureTypes getCurrentStructure() throws IllegalStateException { + return currentStructure; } @Override @@ -411,7 +408,7 @@ public boolean isCollision(Integer value1, Integer value2) { return transform(value1) == transform(value2); } - protected int calculateMaxNumEntries() { + protected int calculateMaxNumEntries(long memSizeCapInBytes) { double maxHashsetMemSize = RBMSizeEstimator.getHashsetMemSizeInBytes(HASHSET_TO_INTARR_THRESHOLD - 1); double intArrMemSize = getIntArrMemSizeInBytes(); double minRBMMemSize = RBMSizeEstimator.getSizeInBytes(INTARR_TO_RBM_THRESHOLD); @@ -439,37 +436,39 @@ protected static long getIntArrMemSizeInBytes() { public long getMemorySizeInBytes() { switch (currentStructure) { case HASHSET: - return RBMSizeEstimator.getHashsetMemSizeInBytes(size); + return RBMSizeEstimator.getHashsetMemSizeInBytes(stats.size); case INTARR: return getIntArrMemSizeInBytes(); case RBM: - return RBMSizeEstimator.getSizeInBytes(size); + return RBMSizeEstimator.getSizeInBytes(stats.size); } return 0; } @Override public long getMemorySizeCapInBytes() { - return memSizeCapInBytes; + return stats.memSizeCapInBytes; } public int getMaxNumEntries() { - return maxNumEntries; + return stats.maxNumEntries; } @Override - public boolean isAtCapacity() { - return atCapacity; + public boolean isFull() { + return stats.atCapacity; } @Override public void regenerateStore(Integer[] newValues) throws IllegalStateException { intArr = null; rbm = null; - size = 0; - numCollisions = 0; - numAddAttempts = 0; - guaranteesNoFalseNegatives = true; + stats.size = 0; + stats.numCollisions = new CounterMetric(); + stats.numAddAttempts = new CounterMetric(); + stats.guaranteesNoFalseNegatives = true; + stats.numRemovalAttempts = new CounterMetric(); + stats.numSuccessfulRemovals = new CounterMetric(); currentStructure = StructureTypes.HASHSET; hashset = new HashSet<>(); diff --git a/server/src/main/java/org/opensearch/indices/KeyLookupStore.java b/server/src/main/java/org/opensearch/indices/KeyLookupStore.java index fa9c8dc38df9a..5d94bef417898 100644 --- a/server/src/main/java/org/opensearch/indices/KeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/KeyLookupStore.java @@ -74,11 +74,6 @@ public interface KeyLookupStore { */ boolean remove(T value) throws Exception; - /** - * Check if the implementing class supports safe removals. If it doesn't, remove() will always return false. - * @return true if the class supports safe removals, false if it doesn't. - */ - boolean supportsRemoval(); /** * Remove the transformed version of this value from the store. Calling this function may cause @@ -104,19 +99,14 @@ public interface KeyLookupStore { * Returns the number of times add() has been run, including unsuccessful attempts. * @return The number of adding attempts. */ - int getNumAddAttempts(); + int getTotalAdds(); /** * Returns the number of times add() has returned false due to a collision. * @return The number of collisions. */ - int getNumCollisions(); + int getCollisions(); - /** - * Returns the current internal data structure. - * @return A string representing the currently used internal data structure. - */ - String getCurrentStructure() throws Exception; /** * Checks if two values would collide after being transformed by this store's transformation. @@ -139,9 +129,9 @@ public interface KeyLookupStore { long getMemorySizeCapInBytes(); /** - * Returns whether the store is at memory capacity + * Returns whether the store is at memory capacity and can't accept more entries */ - boolean isAtCapacity(); + boolean isFull(); /** * Deletes the internal data structure and regenerates it from the values passed in. diff --git a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java index c452b82521265..2b40ce3ec4c0c 100644 --- a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java @@ -32,196 +32,83 @@ package org.opensearch.indices; -import org.roaringbitmap.RoaringBitmap; +import java.util.HashSet; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantReadWriteLock; +public class RBMIntKeyLookupStore extends BaseRBMIntKeyLookupStore implements KeyLookupStore { + // The code for this class is almost the same as RemovableHybridIntKeyLookupStore, + // just with different superclasses. + // I considered changing the separate Removable classes into a CollisionHandler object + // which could be reused as a field of the KeyLookupStore objects, but since we will ultimately + // only use one of these four possible classes after doing performance testing, + // I don't think it's worth it to make the logic more complex just to avoid reusing code that might be deleted. -public class RBMIntKeyLookupStore implements KeyLookupStore { - // This class shares a lot of the same fields with HybridIntKeyLookupStore, but basically none of the logic - // besides getters, so I decided against making it the superclass to HybridIntKeyLookupStore - protected final int modulo; - protected int size; - protected long memSizeCapInBytes; - protected int numAddAttempts; - protected int numCollisions; - protected boolean guaranteesNoFalseNegatives; - protected RoaringBitmap rbm; - protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); - protected final Lock readLock = lock.readLock(); - protected final Lock writeLock = lock.writeLock(); - protected int maxNumEntries; - protected boolean atCapacity; + private HashSet collidedInts; RBMIntKeyLookupStore(int modulo, long memSizeCapInBytes) { - this.modulo = modulo; - this.size = 0; - this.numAddAttempts = 0; - this.numCollisions = 0; - this.guaranteesNoFalseNegatives = true; - this.memSizeCapInBytes = memSizeCapInBytes; // A cap of 0 means no cap - this.maxNumEntries = calculateMaxNumEntries(); - this.rbm = new RoaringBitmap(); - } - - protected int calculateMaxNumEntries() { - if (memSizeCapInBytes == 0) { - return Integer.MAX_VALUE; - } - return RBMSizeEstimator.getNumEntriesFromSizeInBytes(memSizeCapInBytes); - } - - protected final int transform(int value) { - return modulo == 0 ? value : value % modulo; + super(modulo, memSizeCapInBytes); + collidedInts = new HashSet<>(); } + @Override protected void handleCollisions(int transformedValue) { - numCollisions++; + stats.numCollisions.inc(); + collidedInts.add(transformedValue); } - @Override - public boolean add(Integer value) throws Exception { - if (value == null) { - return false; - } - writeLock.lock(); - numAddAttempts++; - try { - if (size == maxNumEntries) { - atCapacity = true; - return false; - } - int transformedValue = transform(value); - boolean alreadyContained = contains(transformedValue); - if (!alreadyContained) { - rbm.add(transformedValue); - size++; - return true; - } - handleCollisions(transformedValue); - return false; - } finally { - writeLock.unlock(); - } - } + // Check if the value to remove has had a collision, and if not, remove it @Override - public boolean contains(Integer value) throws Exception { + public boolean remove(Integer value) throws Exception { if (value == null) { return false; } int transformedValue = transform(value); readLock.lock(); try { - return rbm.contains(transformedValue); + if (!contains(value)) { + return false; + } + stats.numRemovalAttempts.inc(); + if (collidedInts.contains(transformedValue)) { + return false; + } } finally { readLock.unlock(); } - } - - @Override - public Integer getInternalRepresentation(Integer value) { - if (value == null) { - return 0; - } - return Integer.valueOf(transform(value)); - } - - @Override - public boolean remove(Integer value) throws Exception { - return false; - } - - @Override - public boolean supportsRemoval() { - return false; - } - - @Override - public void forceRemove(Integer value) throws Exception { - if (value == null) { - return; - } writeLock.lock(); - guaranteesNoFalseNegatives = false; try { - int transformedValue = transform(value); rbm.remove(transformedValue); - size--; + stats.size--; + stats.numSuccessfulRemovals.inc(); + return true; } finally { writeLock.unlock(); } } @Override - public boolean canHaveFalseNegatives() { - return !guaranteesNoFalseNegatives; - } - - @Override - public int getSize() { - readLock.lock(); - try { - return size; - } finally { - readLock.unlock(); - } + public long getMemorySizeInBytes() { + return super.getMemorySizeInBytes() + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size()); } @Override - public int getNumAddAttempts() { - return numAddAttempts; + public void regenerateStore(Integer[] newValues) throws Exception { + collidedInts = new HashSet<>(); + super.regenerateStore(newValues); } - @Override - public int getNumCollisions() { - return numCollisions; + public int getNumRemovalAttempts() { + return (int) stats.numRemovalAttempts.count(); } - @Override - public String getCurrentStructure() throws Exception { - return "RBM"; + public int getNumSuccessfulRemovals() { + return (int) stats.numSuccessfulRemovals.count(); } - @Override - public boolean isCollision(Integer value1, Integer value2) { - if (value1 == null || value2 == null) { + public boolean valueHasHadCollision(Integer value) { + if (value == null) { return false; } - return transform(value1) == transform(value2); - } - - @Override - public long getMemorySizeInBytes() { - return RBMSizeEstimator.getSizeInBytes(size); - } - - @Override - public long getMemorySizeCapInBytes() { - return memSizeCapInBytes; - } - - @Override - public boolean isAtCapacity() { - return atCapacity; - } - - @Override - public void regenerateStore(Integer[] newValues) throws Exception { - rbm.clear(); - size = 0; - this.numAddAttempts = 0; - this.numCollisions = 0; - this.guaranteesNoFalseNegatives = true; - for (int i = 0; i < newValues.length; i++) { - if (newValues[i] != null) { - add(newValues[i]); - } - } - } - - @Override - public void clear() throws Exception { - regenerateStore(new Integer[]{}); + return collidedInts.contains(transform(value)); } } diff --git a/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java index 363ceaddce74f..9b58ca0bf0498 100644 --- a/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java @@ -39,26 +39,18 @@ */ public class RemovableHybridIntKeyLookupStore extends HybridIntKeyLookupStore implements KeyLookupStore { private HashSet collidedInts; - private int numRemovalAttempts; - private int numSuccessfulRemovals; RemovableHybridIntKeyLookupStore(int modulo, long memSizeCapInBytes) { super(modulo, memSizeCapInBytes); collidedInts = new HashSet<>(); - numRemovalAttempts = 0; - numSuccessfulRemovals = 0; } @Override protected void handleCollisions(int transformedValue) { - numCollisions++; + stats.numCollisions.inc(); collidedInts.add(transformedValue); } - @Override - public boolean supportsRemoval() { - return true; - } // Check if the value to remove has had a collision, and if not, remove it @Override @@ -72,7 +64,7 @@ public boolean remove(Integer value) throws IllegalStateException { if (!contains(value)) { return false; } - numRemovalAttempts++; + stats.numRemovalAttempts.inc(); if (collidedInts.contains(transformedValue)) { return false; } @@ -82,7 +74,7 @@ public boolean remove(Integer value) throws IllegalStateException { writeLock.lock(); try { removeHelperFunction(transformedValue); - numSuccessfulRemovals++; + stats.numSuccessfulRemovals.inc(); return true; } finally { writeLock.unlock(); @@ -101,11 +93,11 @@ public void regenerateStore(Integer[] newValues) throws IllegalStateException { } public int getNumRemovalAttempts() { - return numRemovalAttempts; + return (int) stats.numRemovalAttempts.count(); } public int getNumSuccessfulRemovals() { - return numSuccessfulRemovals; + return (int) stats.numSuccessfulRemovals.count(); } public boolean valueHasHadCollision(Integer value) { diff --git a/server/src/main/java/org/opensearch/indices/RemovableRBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RemovableRBMIntKeyLookupStore.java deleted file mode 100644 index 1bbe021962335..0000000000000 --- a/server/src/main/java/org/opensearch/indices/RemovableRBMIntKeyLookupStore.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -package org.opensearch.indices; - -import org.roaringbitmap.RoaringBitmap; -import java.util.HashSet; - -public class RemovableRBMIntKeyLookupStore extends RBMIntKeyLookupStore implements KeyLookupStore { - // The code for this class is almost the same as RemovableHybridIntKeyLookupStore, - // just with different superclasses. - // I considered changing the separate Removable classes into a CollisionHandler object - // which could be reused as a field of the KeyLookupStore objects, but since we will ultimately - // only use one of these four possible classes after doing performance testing, - // I don't think it's worth it to make the logic more complex just to avoid reusing code that might be deleted. - - private HashSet collidedInts; - private int numRemovalAttempts; - private int numSuccessfulRemovals; - - RemovableRBMIntKeyLookupStore(int modulo, long memSizeCapInBytes) { - super(modulo, memSizeCapInBytes); - collidedInts = new HashSet<>(); - numRemovalAttempts = 0; - numSuccessfulRemovals = 0; - } - - @Override - protected void handleCollisions(int transformedValue) { - numCollisions++; - collidedInts.add(transformedValue); - } - - @Override - public boolean supportsRemoval() { - return true; - } - - // Check if the value to remove has had a collision, and if not, remove it - @Override - public boolean remove(Integer value) throws Exception { - if (value == null) { - return false; - } - int transformedValue = transform(value); - readLock.lock(); - try { - if (!contains(value)) { - return false; - } - numRemovalAttempts++; - if (collidedInts.contains(transformedValue)) { - return false; - } - } finally { - readLock.unlock(); - } - writeLock.lock(); - try { - rbm.remove(transformedValue); - size--; - numSuccessfulRemovals++; - return true; - } finally { - writeLock.unlock(); - } - } - - @Override - public long getMemorySizeInBytes() { - return super.getMemorySizeInBytes() + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size()); - } - - @Override - public void regenerateStore(Integer[] newValues) throws Exception { - collidedInts = new HashSet<>(); - super.regenerateStore(newValues); - } - - public int getNumRemovalAttempts() { - return numRemovalAttempts; - } - - public int getNumSuccessfulRemovals() { - return numSuccessfulRemovals; - } - - public boolean valueHasHadCollision(Integer value) { - if (value == null) { - return false; - } - return collidedInts.contains(transform(value)); - } -} diff --git a/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/BaseRBMIntKeyLookupStoreTests.java similarity index 77% rename from server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java rename to server/src/test/java/org/opensearch/indices/BaseRBMIntKeyLookupStoreTests.java index 27c0e069e4edb..7a26fe3b03965 100644 --- a/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/BaseRBMIntKeyLookupStoreTests.java @@ -40,22 +40,22 @@ import java.util.concurrent.Future; import java.util.concurrent.ThreadPoolExecutor; -public class RBMIntKeyLookupStoreTests extends OpenSearchTestCase { +public class BaseRBMIntKeyLookupStoreTests extends OpenSearchTestCase { // Tests mostly based on HybridIntKeyStoreTests.java public void testInit() { long memCap = 100 * RBMSizeEstimator.BYTES_IN_MB; - RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memCap); - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), memCap); - for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), memCap); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memCap); + for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { assertEquals(0, kls.getSize()); assertEquals(memCap, kls.getMemorySizeCapInBytes()); } } public void testTransformationLogic() throws Exception { int modulo = (int) Math.pow(2, 29); - RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { int offset = 3; for (int i = 0; i < 4; i++) { // after this we run into max value, but thats not a flaw with the class design int posValue = i * modulo + offset; @@ -73,9 +73,9 @@ public void testTransformationLogic() throws Exception { } public void testContainsAndForceRemove() throws Exception { - RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { for (int i = 0; i < 2000; i++) { kls.add(i); assertTrue(kls.contains(i)); @@ -91,20 +91,20 @@ public void testContainsAndForceRemove() throws Exception { public void testAddingStatsGetters() throws Exception { int modulo = (int) Math.pow(2, 15); - RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore(modulo, 0L); - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore(modulo, 0L); - for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore(modulo, 0L); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore(modulo, 0L); + for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { kls.add(15); kls.add(-15); - assertEquals(2, kls.getNumAddAttempts()); - assertEquals(0, kls.getNumCollisions()); + assertEquals(2, kls.getTotalAdds()); + assertEquals(0, kls.getCollisions()); int offset = 1; for (int i = 0; i < 10; i++) { kls.add(i * modulo + offset); } - assertEquals(12, kls.getNumAddAttempts()); - assertEquals(9, kls.getNumCollisions()); + assertEquals(12, kls.getTotalAdds()); + assertEquals(9, kls.getCollisions()); } } @@ -112,9 +112,9 @@ public void testAddingStatsGetters() throws Exception { public void testRegenerateStore() throws Exception { int numToAdd = 10000000; Random rand = Randomness.get(); - RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { for (int i = 0; i < numToAdd; i++) { kls.add(i); } @@ -133,9 +133,9 @@ public void testRegenerateStore() throws Exception { } public void testAddingDuplicates() throws Exception { - RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { int numToAdd = 4820411; for (int i = 0; i < numToAdd; i++) { kls.add(i); @@ -152,9 +152,9 @@ public void testMemoryCapBlocksAdd() throws Exception { int modulo = (int) Math.pow(2, 29); for (int maxEntries: new int[]{2342000, 1000, 100000}) { long memSizeCapInBytes = RBMSizeEstimator.getSizeInBytes(maxEntries); - RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memSizeCapInBytes); - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), memSizeCapInBytes); - for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), memSizeCapInBytes); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memSizeCapInBytes); + for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { for (int j = 0; j < maxEntries + 1000; j++) { kls.add(j); } @@ -170,9 +170,9 @@ public void testConcurrency() throws Exception { int modulo = (int) Math.pow(2, 29); long memCap = 100 * RBMSizeEstimator.BYTES_IN_MB; for (int j = 0; j < 5; j++) { // test with different numbers of threads - RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { int numThreads = rand.nextInt(50) + 1; ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads); // In this test we want to add the first 200K numbers and check they're all correctly there. @@ -226,7 +226,7 @@ public void testConcurrency() throws Exception { } assertEquals(amountToAdd, originalAdds + duplicateAdds); assertEquals(amountToAdd, kls.getSize()); - assertEquals(amountToAdd / 1000, kls.getNumCollisions()); + assertEquals(amountToAdd / 1000, kls.getCollisions()); executor.shutdown(); } } @@ -236,9 +236,8 @@ public void testRemoveNoCollisions() throws Exception { // only for RemovableRBMIntKeyLookupStore long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; int numToAdd = 195000; - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore(0, memCap); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore(0, memCap); // there should be no collisions for sequential positive numbers up to modulo - assertTrue(rkls.supportsRemoval()); for (int i = 0; i < numToAdd; i++) { rkls.add(i); } @@ -253,7 +252,7 @@ public void testRemoveNoCollisions() throws Exception { public void testRemoveWithCollisions() throws Exception { int modulo = (int) Math.pow(2, 26); long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore(modulo, memCap); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore(modulo, memCap); for (int i = 0; i < 10; i++) { rkls.add(i); if (i % 2 == 1) { @@ -296,9 +295,9 @@ public void testRemoveWithCollisions() throws Exception { } public void testNullInputs() throws Exception { - RBMIntKeyLookupStore base_kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableRBMIntKeyLookupStore rkls = new RemovableRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (RBMIntKeyLookupStore kls : new RBMIntKeyLookupStore[] { base_kls, rkls }) { + BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { assertFalse(kls.add(null)); assertFalse(kls.contains(null)); assertEquals(0, (int) kls.getInternalRepresentation(null)); @@ -306,7 +305,7 @@ public void testNullInputs() throws Exception { kls.forceRemove(null); assertFalse(kls.canHaveFalseNegatives()); assertFalse(kls.isCollision(null, null)); - assertEquals(0, kls.getNumAddAttempts()); + assertEquals(0, kls.getTotalAdds()); Integer[] newVals = new Integer[]{1, 17, -2, null, -4, null}; kls.regenerateStore(newVals); assertEquals(4, kls.getSize()); diff --git a/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java index a0159cff6d807..5f66f8dd86401 100644 --- a/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java @@ -48,7 +48,7 @@ public void testInit() throws Exception { HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { - assertEquals("HashSet", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.HASHSET, kls.getCurrentStructure()); assertEquals(0, kls.getSize()); } } @@ -60,12 +60,12 @@ public void testStructureTransitions() throws Exception { for (int i = 0; i < HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD; i++) { kls.add(i); } - assertEquals("intArr", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.INTARR, kls.getCurrentStructure()); assertEquals(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD, kls.getSize()); for (int i = HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD; i < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD; i++) { kls.add(i); } - assertEquals("RBM", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.RBM, kls.getCurrentStructure()); assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD, kls.getSize()); } } @@ -140,13 +140,13 @@ public void testContainsAndForceRemove() throws Exception { assertEquals(1, kls.getSize() - lastSize); lastSize = kls.getSize(); } - assertEquals("intArr", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.INTARR, kls.getCurrentStructure()); assertEquals(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD, kls.getSize()); for (int i = kls.getSize(); i < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + 1000; i++) { kls.add(i); assertTrue(kls.contains(i)); } - assertEquals("RBM", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.RBM, kls.getCurrentStructure()); assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + 1000, kls.getSize()); for (int i = 5000; i < 10000; i++) { kls.forceRemove(i); @@ -163,15 +163,15 @@ public void testAddingStatsGetters() throws Exception { for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { kls.add(15); kls.add(-15); - assertEquals(2, kls.getNumAddAttempts()); - assertEquals(1, kls.getNumCollisions()); + assertEquals(2, kls.getTotalAdds()); + assertEquals(1, kls.getCollisions()); int offset = 1; for (int i = 0; i < 10; i++) { kls.add(i * modulo + offset); } - assertEquals(12, kls.getNumAddAttempts()); - assertEquals(10, kls.getNumCollisions()); + assertEquals(12, kls.getTotalAdds()); + assertEquals(10, kls.getCollisions()); } } @@ -199,7 +199,7 @@ public void testRegenerateStore() throws Exception { } // test clear() kls.clear(); - assertEquals("HashSet", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.HASHSET, kls.getCurrentStructure()); assertEquals(0, kls.getSize()); } } @@ -255,23 +255,23 @@ public void testMemoryCapBlocksTransitions() throws Exception { assertTrue(didAdd); } // now try to add one more, which would cause a transition and push us past the memory cap - assertFalse(kls.isAtCapacity()); - assertEquals("HashSet", kls.getCurrentStructure()); + assertFalse(kls.isFull()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.HASHSET, kls.getCurrentStructure()); boolean didAdd = kls.add(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1); assertFalse(didAdd); - assertTrue(kls.isAtCapacity()); - assertEquals("HashSet", kls.getCurrentStructure()); + assertTrue(kls.isFull()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.HASHSET, kls.getCurrentStructure()); kls = new HybridIntKeyLookupStore(modulo, minRBMMemSize); for (int j = 0; j < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - 1; j++) { didAdd = kls.add(j); assertTrue(didAdd); } - assertFalse(kls.isAtCapacity()); + assertFalse(kls.isFull()); didAdd = kls.add(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD); assertFalse(didAdd); - assertTrue(kls.isAtCapacity()); - assertEquals("intArr", kls.getCurrentStructure()); + assertTrue(kls.isFull()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.INTARR, kls.getCurrentStructure()); } } @@ -287,7 +287,7 @@ public void testMemoryCapBlocksAdd() throws Exception { kls.add(j); } assertTrue(Math.abs(3000 - kls.getSize()) < 2); // double --> long conversion adds a bit of lossiness - assertEquals("HashSet", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.HASHSET, kls.getCurrentStructure()); // test where max number of entries should be 999,999 (bounded at intArr size) memSizeCapInBytes = HybridIntKeyLookupStore.getIntArrMemSizeInBytes(); @@ -296,7 +296,7 @@ public void testMemoryCapBlocksAdd() throws Exception { kls.add(j); } assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - 1, kls.getSize()); - assertEquals("intArr", kls.getCurrentStructure()); + assertEquals(HybridIntKeyLookupStore.StructureTypes.INTARR, kls.getCurrentStructure()); int maxEntries = 2342000; memSizeCapInBytes = RBMSizeEstimator.getSizeInBytes(maxEntries); @@ -369,7 +369,7 @@ public void testConcurrency() throws Exception { } assertEquals(amountToAdd, originalAdds + duplicateAdds); assertEquals(amountToAdd, kls.getSize()); - assertEquals(amountToAdd / 1000, kls.getNumCollisions()); + assertEquals(amountToAdd / 1000, kls.getCollisions()); executor.shutdown(); } } @@ -386,7 +386,7 @@ public void testNullInputs() throws Exception { kls.forceRemove(null); assertFalse(kls.canHaveFalseNegatives()); assertFalse(kls.isCollision(null, null)); - assertEquals(0, kls.getNumAddAttempts()); + assertEquals(0, kls.getTotalAdds()); Integer[] newVals = new Integer[]{1, 17, -2, null, -4, null}; kls.regenerateStore(newVals); assertEquals(4, kls.getSize()); diff --git a/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java index 9bec52aa51f28..975daa107ef39 100644 --- a/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java @@ -42,7 +42,6 @@ public void testRemoveNoCollisions() throws Exception { long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(0, memCap); // there should be no collisions for sequential positive numbers up to modulo - assertTrue(rkls.supportsRemoval()); for (int i = 0; i < HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1; i++) { rkls.add(i); } From e57f35daf318a2db2cb42cc5019dd079d603d1a9 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 2 Oct 2023 13:49:16 -0700 Subject: [PATCH 14/17] Removed unneeded import --- .../java/org/opensearch/indices/HybridIntKeyLookupStore.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java index 4d94cadce0b77..4eff15d0984a0 100644 --- a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java @@ -39,7 +39,6 @@ import org.opensearch.common.metrics.CounterMetric; import org.roaringbitmap.RoaringBitmap; -import org.w3c.dom.css.Counter; /** * A store which dynamically switches its internal data structure from hash set to sorted int array From 230194a8f3fafb2a0458a9b3c577b87b0d662aec Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 2 Oct 2023 14:49:53 -0700 Subject: [PATCH 15/17] Removed all implementations except RemovableRBMIntKeyLookupStore, which is renamed as RBMIntKeyLookupStore --- .../indices/BaseRBMIntKeyLookupStore.java | 234 --------- .../indices/HybridIntKeyLookupStore.java | 485 ------------------ .../indices/RBMIntKeyLookupStore.java | 196 ++++++- .../opensearch/indices/RBMSizeEstimator.java | 12 +- .../RemovableHybridIntKeyLookupStore.java | 110 ---- .../BaseRBMIntKeyLookupStoreTests.java | 314 ------------ .../indices/HybridIntKeyLookupStoreTests.java | 395 -------------- .../indices/RBMIntKeyLookupStoreTests.java | 285 ++++++++++ ...RemovableHybridIntKeyLookupStoreTests.java | 128 ----- 9 files changed, 477 insertions(+), 1682 deletions(-) delete mode 100644 server/src/main/java/org/opensearch/indices/BaseRBMIntKeyLookupStore.java delete mode 100644 server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java delete mode 100644 server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java delete mode 100644 server/src/test/java/org/opensearch/indices/BaseRBMIntKeyLookupStoreTests.java delete mode 100644 server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java create mode 100644 server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java delete mode 100644 server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java diff --git a/server/src/main/java/org/opensearch/indices/BaseRBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/BaseRBMIntKeyLookupStore.java deleted file mode 100644 index 24eb1a4736dfe..0000000000000 --- a/server/src/main/java/org/opensearch/indices/BaseRBMIntKeyLookupStore.java +++ /dev/null @@ -1,234 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -package org.opensearch.indices; - -import org.opensearch.common.metrics.CounterMetric; -import org.roaringbitmap.RoaringBitmap; - -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantReadWriteLock; - -public class BaseRBMIntKeyLookupStore implements KeyLookupStore { - // This class shares a lot of the same fields with HybridIntKeyLookupStore, but basically none of the logic - // besides getters, so I decided against making it the superclass to HybridIntKeyLookupStore - protected final int modulo; - protected class KeyStoreStats { - protected int size; - protected long memSizeCapInBytes; - protected CounterMetric numAddAttempts; - protected CounterMetric numCollisions; - protected boolean guaranteesNoFalseNegatives; - protected int maxNumEntries; - protected boolean atCapacity; - protected CounterMetric numRemovalAttempts; // used in removable classes - protected CounterMetric numSuccessfulRemovals; - protected KeyStoreStats(long memSizeCapInBytes, int maxNumEntries) { - this.size = 0; - this.numAddAttempts = new CounterMetric(); - this.numCollisions = new CounterMetric(); - this.guaranteesNoFalseNegatives = true; - this.memSizeCapInBytes = memSizeCapInBytes; - this.maxNumEntries = maxNumEntries; - this.atCapacity = false; - this.numRemovalAttempts = new CounterMetric(); - this.numSuccessfulRemovals = new CounterMetric(); - } - } - - protected KeyStoreStats stats; - protected RoaringBitmap rbm; - protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); - protected final Lock readLock = lock.readLock(); - protected final Lock writeLock = lock.writeLock(); - - BaseRBMIntKeyLookupStore(int modulo, long memSizeCapInBytes) { - this.modulo = modulo; - this.stats = new KeyStoreStats(memSizeCapInBytes, calculateMaxNumEntries(memSizeCapInBytes)); - this.rbm = new RoaringBitmap(); - } - - protected int calculateMaxNumEntries(long memSizeCapInBytes) { - if (memSizeCapInBytes == 0) { - return Integer.MAX_VALUE; - } - return RBMSizeEstimator.getNumEntriesFromSizeInBytes(memSizeCapInBytes); - } - - protected final int transform(int value) { - return modulo == 0 ? value : value % modulo; - } - - protected void handleCollisions(int transformedValue) { - stats.numCollisions.inc(); - } - - @Override - public boolean add(Integer value) throws Exception { - if (value == null) { - return false; - } - writeLock.lock(); - stats.numAddAttempts.inc(); - try { - if (stats.size == stats.maxNumEntries) { - stats.atCapacity = true; - return false; - } - int transformedValue = transform(value); - boolean alreadyContained = contains(value); - if (!alreadyContained) { - rbm.add(transformedValue); - stats.size++; - return true; - } - handleCollisions(transformedValue); - return false; - } finally { - writeLock.unlock(); - } - } - - @Override - public boolean contains(Integer value) throws Exception { - if (value == null) { - return false; - } - int transformedValue = transform(value); - readLock.lock(); - try { - return rbm.contains(transformedValue); - } finally { - readLock.unlock(); - } - } - - @Override - public Integer getInternalRepresentation(Integer value) { - if (value == null) { - return 0; - } - return Integer.valueOf(transform(value)); - } - - @Override - public boolean remove(Integer value) throws Exception { - return false; - } - - - @Override - public void forceRemove(Integer value) throws Exception { - if (value == null) { - return; - } - writeLock.lock(); - stats.guaranteesNoFalseNegatives = false; - try { - int transformedValue = transform(value); - rbm.remove(transformedValue); - stats.size--; - } finally { - writeLock.unlock(); - } - } - - @Override - public boolean canHaveFalseNegatives() { - return !stats.guaranteesNoFalseNegatives; - } - - @Override - public int getSize() { - readLock.lock(); - try { - return stats.size; - } finally { - readLock.unlock(); - } - } - - @Override - public int getTotalAdds() { - return (int) stats.numAddAttempts.count(); - } - - @Override - public int getCollisions() { - return (int) stats.numCollisions.count(); - } - - - @Override - public boolean isCollision(Integer value1, Integer value2) { - if (value1 == null || value2 == null) { - return false; - } - return transform(value1) == transform(value2); - } - - @Override - public long getMemorySizeInBytes() { - return RBMSizeEstimator.getSizeInBytes(stats.size); - } - - @Override - public long getMemorySizeCapInBytes() { - return stats.memSizeCapInBytes; - } - - @Override - public boolean isFull() { - return stats.atCapacity; - } - - @Override - public void regenerateStore(Integer[] newValues) throws Exception { - rbm.clear(); - stats.size = 0; - stats.numAddAttempts = new CounterMetric(); - stats.numCollisions = new CounterMetric(); - stats.guaranteesNoFalseNegatives = true; - stats.numRemovalAttempts = new CounterMetric(); - stats.numSuccessfulRemovals = new CounterMetric(); - for (int i = 0; i < newValues.length; i++) { - if (newValues[i] != null) { - add(newValues[i]); - } - } - } - - @Override - public void clear() throws Exception { - regenerateStore(new Integer[]{}); - } -} diff --git a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java deleted file mode 100644 index 4eff15d0984a0..0000000000000 --- a/server/src/main/java/org/opensearch/indices/HybridIntKeyLookupStore.java +++ /dev/null @@ -1,485 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -package org.opensearch.indices; - -import java.util.Arrays; -import java.util.HashSet; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantReadWriteLock; - -import org.opensearch.common.metrics.CounterMetric; -import org.roaringbitmap.RoaringBitmap; - -/** - * A store which dynamically switches its internal data structure from hash set to sorted int array - * to roaring bitmap. - */ -public class HybridIntKeyLookupStore implements KeyLookupStore { - public static final int HASHSET_TO_INTARR_THRESHOLD = 5000; - public static final int INTARR_SIZE = 100000; - public static final int INTARR_TO_RBM_THRESHOLD = INTARR_SIZE; - - /** - * Used to keep track of which structure is being used to store values. - */ - public enum StructureTypes { - HASHSET, - INTARR, - RBM - } - - protected class KeyStoreStats { - protected int size; - protected long memSizeCapInBytes; - protected CounterMetric numAddAttempts; - protected CounterMetric numCollisions; - protected boolean guaranteesNoFalseNegatives; - protected int maxNumEntries; - protected boolean atCapacity; - protected CounterMetric numRemovalAttempts; // used in removable classes - protected CounterMetric numSuccessfulRemovals; - protected KeyStoreStats(long memSizeCapInBytes, int maxNumEntries) { - this.size = 0; - this.numAddAttempts = new CounterMetric(); - this.numCollisions = new CounterMetric(); - this.guaranteesNoFalseNegatives = true; - this.memSizeCapInBytes = memSizeCapInBytes; - this.maxNumEntries = maxNumEntries; - this.atCapacity = false; - this.numRemovalAttempts = new CounterMetric(); - this.numSuccessfulRemovals = new CounterMetric(); - } - } - - protected KeyStoreStats stats; - protected StructureTypes currentStructure; - protected final int modulo; - - - protected HashSet hashset; - protected int[] intArr; - protected RoaringBitmap rbm; - protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); - protected final Lock readLock = lock.readLock(); - protected final Lock writeLock = lock.writeLock(); - - public HybridIntKeyLookupStore(int modulo, long memSizeCapInBytes) { - this.modulo = modulo; // A modulo of 0 means no modulo - this.hashset = new HashSet(); - this.currentStructure = StructureTypes.HASHSET; - this.stats = new KeyStoreStats(memSizeCapInBytes, calculateMaxNumEntries(memSizeCapInBytes)); - } - - protected final int customAbs(int value) { - if (value < 0 && value > Integer.MIN_VALUE) { - return -value; - } else if (value >= 0) { - return value; - } - return Integer.MAX_VALUE; - } - - protected final int transform(int value) { - // We only use negative numbers to simplify sorting the int array - return modulo == 0 ? -customAbs(value) : -customAbs(value % modulo); - } - - // Helper function for intArr operations - protected void intArrChecks(int value) throws IllegalStateException { - if (currentStructure != StructureTypes.INTARR) { - throw new IllegalStateException("Cannot run isInIntArr when currentStructure is not INTARR!!"); - } - if (value > 0) { - throw new IllegalStateException("Cannot use positive value " + Integer.toString(value) + " in isInIntArr"); - } - } - - /** Checks for presence of value in intArr. If doAdd is true and the value is not already there, adds it. - * Returns true if the value was already contained (and therefore not added again), false otherwise - */ - protected final boolean isInIntArr(int value, int arrSize, boolean doAdd) throws IllegalStateException { - Lock lock = doAdd ? writeLock : readLock; - lock.lock(); - try { - intArrChecks(value); - int index = Arrays.binarySearch(intArr, 0, arrSize, value); // only search in initialized part of array - if (index < 0) { - if (doAdd) { - int insertionPoint = -index - 1; - System.arraycopy(intArr, insertionPoint, intArr, insertionPoint + 1, arrSize - insertionPoint); - intArr[insertionPoint] = value; - } - return false; - } - return true; - } finally { - lock.unlock(); - } - } - - protected final void switchHashsetToIntArr() throws IllegalStateException { - writeLock.lock(); - try { - if (currentStructure == StructureTypes.HASHSET) { - stats.size = 0; - intArr = new int[INTARR_SIZE]; - currentStructure = StructureTypes.INTARR; - for (int value : hashset) { - boolean alreadyContained = isInIntArr(value, stats.size, true); - // should never be already contained, but just to be safe - if (!alreadyContained) { - stats.size++; - } - } - hashset = null; - } - } finally { - writeLock.unlock(); - } - } - - protected final void switchIntArrToRBM() { - writeLock.lock(); - try { - if (currentStructure == StructureTypes.INTARR) { - currentStructure = StructureTypes.RBM; - rbm = new RoaringBitmap(); - for (int i = 0; i < stats.size; i++) { - rbm.add(intArr[i]); - } - intArr = null; - } - } finally { - writeLock.unlock(); - } - } - - /** - * Checks if adding an additional value would require us to change data structures. - * If so, start that change. - */ - protected final void handleStructureSwitch() throws IllegalStateException { // write lock? - writeLock.lock(); - try { - if (stats.size == HASHSET_TO_INTARR_THRESHOLD - 1) { - if (stats.maxNumEntries <= HASHSET_TO_INTARR_THRESHOLD) { - stats.atCapacity = true; - return; - } - switchHashsetToIntArr(); - } else if (stats.size == INTARR_TO_RBM_THRESHOLD - 1) { - if (stats.maxNumEntries <= INTARR_TO_RBM_THRESHOLD) { - stats.atCapacity = true; - return; - } - switchIntArrToRBM(); - } - } finally { - writeLock.unlock(); - } - } - - protected final void removeFromIntArr(int value) throws IllegalStateException { - writeLock.lock(); - try { - intArrChecks(value); - int index = Arrays.binarySearch(intArr, 0, stats.size, value); - if (index >= 0) { - System.arraycopy(intArr, index + 1, intArr, index, stats.size - index - 1); - intArr[stats.size - 1] = 0; - stats.size--; - } - } finally { - writeLock.unlock(); - } - } - - protected void handleCollisions(int transformedValue) { - stats.numCollisions.inc(); - } - - @Override - public boolean add(Integer value) throws IllegalStateException { - if (value == null) { - return false; - } - writeLock.lock(); - try { - if (stats.size == stats.maxNumEntries) { - stats.atCapacity = true; - } - handleStructureSwitch(); // also might set atCapacity - if (!stats.atCapacity) { - - stats.numAddAttempts.inc(); - int transformedValue = transform(value); - boolean alreadyContained; - - switch (currentStructure) { - case HASHSET: - alreadyContained = !(hashset.add(transformedValue)); - break; - case INTARR: - alreadyContained = isInIntArr(transformedValue, stats.size, true); - break; - case RBM: - alreadyContained = containsTransformed(transformedValue); - if (!alreadyContained) { - rbm.add(transformedValue); - } - break; - default: - throw new IllegalStateException("currentStructure is none of possible values"); - } - if (alreadyContained) { - handleCollisions(transformedValue); - return false; - } - stats.size++; - return true; - } - return false; - } finally { - writeLock.unlock(); - } - } - - protected boolean containsTransformed(int transformedValue) throws IllegalStateException { - readLock.lock(); - try { - switch (currentStructure) { - case HASHSET: - return hashset.contains(transformedValue); - case INTARR: - return isInIntArr(transformedValue, stats.size, false); - case RBM: - return rbm.contains(transformedValue); - default: - throw new IllegalStateException("currentStructure is none of possible values"); - } - } finally { - readLock.unlock(); - } - } - - // Check the array is sorted with no duplicate elements (except 0) - protected boolean arrayCorrectlySorted() { - readLock.lock(); - try { - if (currentStructure == StructureTypes.INTARR) { - for (int j = 0; j < intArr.length - 1; j++) { - if (!((intArr[j] < intArr[j + 1]) || (intArr[j] == intArr[j + 1] && intArr[j + 1] == 0))) { - // left clause: check that array is sorted, right clause: check that values are unique unless they're zero - // (uninitialized) - return false; - } - } - } - return true; - } finally { - readLock.unlock(); - } - } - - @Override - public boolean contains(Integer value) throws IllegalStateException { - if (value == null) { - return false; - } - int transformedValue = transform(value); - return containsTransformed(transformedValue); - } - - @Override - public Integer getInternalRepresentation(Integer value) { - if (value == null) { - return 0; - } - return Integer.valueOf(transform(value)); - } - - @Override - public boolean remove(Integer value) throws IllegalStateException { - return false; - } - - - protected void removeHelperFunction(int transformedValue) throws IllegalStateException { - // allows code to be reused in forceRemove() of this class and remove() of inheriting class - // shouldn't be called on its own, or on a value that's not already inside the structure - switch (currentStructure) { - case HASHSET: - hashset.remove(transformedValue); - stats.size--; - return; - case INTARR: - removeFromIntArr(transformedValue); // size is decreased in this function already - return; - case RBM: - rbm.remove(transformedValue); - stats.size--; - } - } - - @Override - public void forceRemove(Integer value) throws IllegalStateException { - if (value == null) { - return; - } - writeLock.lock(); - stats.guaranteesNoFalseNegatives = false; - try { - int transformedValue = transform(value); - boolean alreadyContained = contains(transformedValue); - if (alreadyContained) { - removeHelperFunction(transformedValue); - } - } finally { - writeLock.unlock(); - } - } - - @Override - public boolean canHaveFalseNegatives() { - return !stats.guaranteesNoFalseNegatives; - } - - @Override - public int getSize() { - readLock.lock(); // needed because size is changed during switchHashsetToIntarr() - try { - return stats.size; - } finally { - readLock.unlock(); - } - } - - @Override - public int getTotalAdds() { - return (int) stats.numAddAttempts.count(); - } - - @Override - public int getCollisions() { - return (int) stats.numCollisions.count(); - } - - - public StructureTypes getCurrentStructure() throws IllegalStateException { - return currentStructure; - } - - @Override - public boolean isCollision(Integer value1, Integer value2) { - if (value1 == null || value2 == null) { - return false; - } - return transform(value1) == transform(value2); - } - - protected int calculateMaxNumEntries(long memSizeCapInBytes) { - double maxHashsetMemSize = RBMSizeEstimator.getHashsetMemSizeInBytes(HASHSET_TO_INTARR_THRESHOLD - 1); - double intArrMemSize = getIntArrMemSizeInBytes(); - double minRBMMemSize = RBMSizeEstimator.getSizeInBytes(INTARR_TO_RBM_THRESHOLD); - - if (memSizeCapInBytes == 0) { - return Integer.MAX_VALUE; - } - if (memSizeCapInBytes >= minRBMMemSize) { - // max number of elements will be when we have an RBM - return Math.max(RBMSizeEstimator.getNumEntriesFromSizeInBytes(memSizeCapInBytes), INTARR_TO_RBM_THRESHOLD); // there's some floating point weirdness, so we need the min to ensure we dont get values slightly below 100k - } - if (memSizeCapInBytes < intArrMemSize) { - // max number of elements will be when we have a hash set - return Math.min((int) (RBMSizeEstimator.convertBytesToMB(memSizeCapInBytes) / RBMSizeEstimator.HASHSET_MEM_SLOPE), HASHSET_TO_INTARR_THRESHOLD - 1); - } - // max number of elements will be when we have an intArr - return INTARR_TO_RBM_THRESHOLD - 1; - } - - protected static long getIntArrMemSizeInBytes() { - return (long) (4 * INTARR_SIZE + 24); - } - - @Override - public long getMemorySizeInBytes() { - switch (currentStructure) { - case HASHSET: - return RBMSizeEstimator.getHashsetMemSizeInBytes(stats.size); - case INTARR: - return getIntArrMemSizeInBytes(); - case RBM: - return RBMSizeEstimator.getSizeInBytes(stats.size); - } - return 0; - } - - @Override - public long getMemorySizeCapInBytes() { - return stats.memSizeCapInBytes; - } - - public int getMaxNumEntries() { - return stats.maxNumEntries; - } - - @Override - public boolean isFull() { - return stats.atCapacity; - } - - @Override - public void regenerateStore(Integer[] newValues) throws IllegalStateException { - intArr = null; - rbm = null; - stats.size = 0; - stats.numCollisions = new CounterMetric(); - stats.numAddAttempts = new CounterMetric(); - stats.guaranteesNoFalseNegatives = true; - stats.numRemovalAttempts = new CounterMetric(); - stats.numSuccessfulRemovals = new CounterMetric(); - currentStructure = StructureTypes.HASHSET; - hashset = new HashSet<>(); - - for (int i = 0; i < newValues.length; i++) { - if (newValues[i] != null) { - add(newValues[i]); - } - } - } - - @Override - public void clear() throws Exception { - regenerateStore(new Integer[]{}); - } -} diff --git a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java index 2b40ce3ec4c0c..00910b7fe6bcc 100644 --- a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java @@ -32,31 +32,125 @@ package org.opensearch.indices; +import org.opensearch.common.metrics.CounterMetric; +import org.roaringbitmap.RoaringBitmap; + import java.util.HashSet; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantReadWriteLock; -public class RBMIntKeyLookupStore extends BaseRBMIntKeyLookupStore implements KeyLookupStore { - // The code for this class is almost the same as RemovableHybridIntKeyLookupStore, - // just with different superclasses. - // I considered changing the separate Removable classes into a CollisionHandler object - // which could be reused as a field of the KeyLookupStore objects, but since we will ultimately - // only use one of these four possible classes after doing performance testing, - // I don't think it's worth it to make the logic more complex just to avoid reusing code that might be deleted. +/** + * This class implements KeyLookupStore using a roaring bitmap with a modulo applied to values. + * The modulo increases the density of values, which makes RBMs more memory-efficient. The recommended modulo is ~2^29. + * It also maintains a hash set of values which have had collisions. Values which haven't had collisions can be + * safely removed from the store. The fraction of collided values should be low, + * about 0.3% for a store with 10^7 values and a modulo of 2^29. + * The store estimates its memory footprint and will stop adding more values once it reaches its memory cap. + */ +public class RBMIntKeyLookupStore implements KeyLookupStore { + protected final int modulo; + protected class KeyStoreStats { + protected int size; + protected long memSizeCapInBytes; + protected CounterMetric numAddAttempts; + protected CounterMetric numCollisions; + protected boolean guaranteesNoFalseNegatives; + protected int maxNumEntries; + protected boolean atCapacity; + protected CounterMetric numRemovalAttempts; // used in removable classes + protected CounterMetric numSuccessfulRemovals; + protected KeyStoreStats(long memSizeCapInBytes, int maxNumEntries) { + this.size = 0; + this.numAddAttempts = new CounterMetric(); + this.numCollisions = new CounterMetric(); + this.guaranteesNoFalseNegatives = true; + this.memSizeCapInBytes = memSizeCapInBytes; + this.maxNumEntries = maxNumEntries; + this.atCapacity = false; + this.numRemovalAttempts = new CounterMetric(); + this.numSuccessfulRemovals = new CounterMetric(); + } + } + protected KeyStoreStats stats; + protected RoaringBitmap rbm; private HashSet collidedInts; + protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + protected final Lock readLock = lock.readLock(); + protected final Lock writeLock = lock.writeLock(); RBMIntKeyLookupStore(int modulo, long memSizeCapInBytes) { - super(modulo, memSizeCapInBytes); + this.modulo = modulo; + this.stats = new KeyStoreStats(memSizeCapInBytes, calculateMaxNumEntries(memSizeCapInBytes)); + this.rbm = new RoaringBitmap(); collidedInts = new HashSet<>(); + + } + + protected int calculateMaxNumEntries(long memSizeCapInBytes) { + if (memSizeCapInBytes == 0) { + return Integer.MAX_VALUE; + } + return RBMSizeEstimator.getNumEntriesFromSizeInBytes(memSizeCapInBytes); + } + + protected final int transform(int value) { + return modulo == 0 ? value : value % modulo; } - @Override protected void handleCollisions(int transformedValue) { stats.numCollisions.inc(); collidedInts.add(transformedValue); } + @Override + public boolean add(Integer value) throws Exception { + if (value == null) { + return false; + } + writeLock.lock(); + stats.numAddAttempts.inc(); + try { + if (stats.size == stats.maxNumEntries) { + stats.atCapacity = true; + return false; + } + int transformedValue = transform(value); + boolean alreadyContained = contains(value); + if (!alreadyContained) { + rbm.add(transformedValue); + stats.size++; + return true; + } + handleCollisions(transformedValue); + return false; + } finally { + writeLock.unlock(); + } + } + + @Override + public boolean contains(Integer value) throws Exception { + if (value == null) { + return false; + } + int transformedValue = transform(value); + readLock.lock(); + try { + return rbm.contains(transformedValue); + } finally { + readLock.unlock(); + } + } + + @Override + public Integer getInternalRepresentation(Integer value) { + if (value == null) { + return 0; + } + return Integer.valueOf(transform(value)); + } - // Check if the value to remove has had a collision, and if not, remove it @Override public boolean remove(Integer value) throws Exception { if (value == null) { @@ -86,17 +180,95 @@ public boolean remove(Integer value) throws Exception { } } + + @Override + public void forceRemove(Integer value) throws Exception { + if (value == null) { + return; + } + writeLock.lock(); + stats.guaranteesNoFalseNegatives = false; + try { + int transformedValue = transform(value); + rbm.remove(transformedValue); + stats.size--; + } finally { + writeLock.unlock(); + } + } + + @Override + public boolean canHaveFalseNegatives() { + return !stats.guaranteesNoFalseNegatives; + } + + @Override + public int getSize() { + readLock.lock(); + try { + return stats.size; + } finally { + readLock.unlock(); + } + } + + @Override + public int getTotalAdds() { + return (int) stats.numAddAttempts.count(); + } + + @Override + public int getCollisions() { + return (int) stats.numCollisions.count(); + } + + + @Override + public boolean isCollision(Integer value1, Integer value2) { + if (value1 == null || value2 == null) { + return false; + } + return transform(value1) == transform(value2); + } + @Override public long getMemorySizeInBytes() { - return super.getMemorySizeInBytes() + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size()); + return RBMSizeEstimator.getSizeInBytes(stats.size) + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size()); + } + + @Override + public long getMemorySizeCapInBytes() { + return stats.memSizeCapInBytes; + } + + @Override + public boolean isFull() { + return stats.atCapacity; } @Override public void regenerateStore(Integer[] newValues) throws Exception { + rbm.clear(); collidedInts = new HashSet<>(); - super.regenerateStore(newValues); + stats.size = 0; + stats.numAddAttempts = new CounterMetric(); + stats.numCollisions = new CounterMetric(); + stats.guaranteesNoFalseNegatives = true; + stats.numRemovalAttempts = new CounterMetric(); + stats.numSuccessfulRemovals = new CounterMetric(); + for (int i = 0; i < newValues.length; i++) { + if (newValues[i] != null) { + add(newValues[i]); + } + } } + + + @Override + public void clear() throws Exception { + regenerateStore(new Integer[]{}); + } public int getNumRemovalAttempts() { return (int) stats.numRemovalAttempts.count(); } diff --git a/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java b/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java index 08c9143f395b1..9c62a83bb8b76 100644 --- a/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java +++ b/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java @@ -34,8 +34,9 @@ /** * A class used to estimate roaring bitmap memory sizes (and hash set sizes). - * An instance is made with a particular modulo to avoid recomputing - * values. + * Values based on experiments with adding randomly distributed integers, which matches the use case for KeyLookupStore. + * In this use case, true values are much higher than an RBM's self-reported size, especially for small RBMs: see + * https://github.com/RoaringBitmap/RoaringBitmap/issues/257 */ public class RBMSizeEstimator { public static final int BYTES_IN_MB = 1048576; @@ -44,15 +45,18 @@ public class RBMSizeEstimator { public static final double bufferMultiplier = 1.5; public static final double intercept = 2.9; - RBMSizeEstimator() {} public static long getSizeInBytes(int numEntries) { + // Based on a linear fit in log-log space, so that we minimize the error as a proportion rather than as + // an absolute value. Should be within ~50% of the true value at worst, and should overestimate rather + // than underestimate the memory usage return (long) ((long) Math.pow(numEntries, slope) * (long) Math.pow(10, intercept) * bufferMultiplier); } public static int getNumEntriesFromSizeInBytes(long sizeInBytes) { - // This function has some precision issues especially when composed with its inverse: numEntries = getNumEntriesFromSizeInBytes(getSizeInBytes(numEntries)) + // This function has some precision issues especially when composed with its inverse: + // numEntries = getNumEntriesFromSizeInBytes(getSizeInBytes(numEntries)) // In this case the result can be off by up to a couple percent // However, this shouldn't really matter as both functions are based on memory estimates with higher errors than a couple percent // and this composition won't happen outside of tests diff --git a/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java deleted file mode 100644 index 9b58ca0bf0498..0000000000000 --- a/server/src/main/java/org/opensearch/indices/RemovableHybridIntKeyLookupStore.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -package org.opensearch.indices; - -import java.util.HashSet; - -/** - * A store which supports safe removal of keys by maintaining a hashset of values that have had collisions. - */ -public class RemovableHybridIntKeyLookupStore extends HybridIntKeyLookupStore implements KeyLookupStore { - private HashSet collidedInts; - - RemovableHybridIntKeyLookupStore(int modulo, long memSizeCapInBytes) { - super(modulo, memSizeCapInBytes); - collidedInts = new HashSet<>(); - } - - @Override - protected void handleCollisions(int transformedValue) { - stats.numCollisions.inc(); - collidedInts.add(transformedValue); - } - - - // Check if the value to remove has had a collision, and if not, remove it - @Override - public boolean remove(Integer value) throws IllegalStateException { - if (value == null) { - return false; - } - int transformedValue = transform(value); - readLock.lock(); - try { - if (!contains(value)) { - return false; - } - stats.numRemovalAttempts.inc(); - if (collidedInts.contains(transformedValue)) { - return false; - } - } finally { - readLock.unlock(); - } - writeLock.lock(); - try { - removeHelperFunction(transformedValue); - stats.numSuccessfulRemovals.inc(); - return true; - } finally { - writeLock.unlock(); - } - } - - @Override - public long getMemorySizeInBytes() { - return super.getMemorySizeInBytes() + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size()); - } - - @Override - public void regenerateStore(Integer[] newValues) throws IllegalStateException { - collidedInts = new HashSet<>(); - super.regenerateStore(newValues); - } - - public int getNumRemovalAttempts() { - return (int) stats.numRemovalAttempts.count(); - } - - public int getNumSuccessfulRemovals() { - return (int) stats.numSuccessfulRemovals.count(); - } - - public boolean valueHasHadCollision(Integer value) { - if (value == null) { - return false; - } - return collidedInts.contains(transform(value)); - } - -} diff --git a/server/src/test/java/org/opensearch/indices/BaseRBMIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/BaseRBMIntKeyLookupStoreTests.java deleted file mode 100644 index 7a26fe3b03965..0000000000000 --- a/server/src/test/java/org/opensearch/indices/BaseRBMIntKeyLookupStoreTests.java +++ /dev/null @@ -1,314 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -package org.opensearch.indices; - -import org.opensearch.common.Randomness; -import org.opensearch.test.OpenSearchTestCase; - -import java.util.ArrayList; -import java.util.Random; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.ThreadPoolExecutor; - -public class BaseRBMIntKeyLookupStoreTests extends OpenSearchTestCase { - // Tests mostly based on HybridIntKeyStoreTests.java - public void testInit() { - long memCap = 100 * RBMSizeEstimator.BYTES_IN_MB; - BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), memCap); - RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memCap); - for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { - assertEquals(0, kls.getSize()); - assertEquals(memCap, kls.getMemorySizeCapInBytes()); - } - } - public void testTransformationLogic() throws Exception { - int modulo = (int) Math.pow(2, 29); - BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { - int offset = 3; - for (int i = 0; i < 4; i++) { // after this we run into max value, but thats not a flaw with the class design - int posValue = i * modulo + offset; - kls.add(posValue); - int negValue = -(i * modulo + offset); - kls.add(negValue); - } - assertEquals(2, kls.getSize()); - int[] testVals = new int[]{0, 1, -1, -23495, 23058, modulo, -modulo, Integer.MAX_VALUE, Integer.MIN_VALUE}; - for (int value : testVals) { - assertTrue(kls.getInternalRepresentation(value) < modulo); - assertTrue(kls.getInternalRepresentation(value) > -modulo); - } - } - } - - public void testContainsAndForceRemove() throws Exception { - BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { - for (int i = 0; i < 2000; i++) { - kls.add(i); - assertTrue(kls.contains(i)); - } - assertFalse(kls.canHaveFalseNegatives()); - for (int i = 1900; i < 2000; i++) { - kls.forceRemove(i); - assertFalse(kls.contains(i)); - } - assertEquals(1900, kls.getSize()); - } - } - - public void testAddingStatsGetters() throws Exception { - int modulo = (int) Math.pow(2, 15); - BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore(modulo, 0L); - RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore(modulo, 0L); - for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { - kls.add(15); - kls.add(-15); - assertEquals(2, kls.getTotalAdds()); - assertEquals(0, kls.getCollisions()); - - int offset = 1; - for (int i = 0; i < 10; i++) { - kls.add(i * modulo + offset); - } - assertEquals(12, kls.getTotalAdds()); - assertEquals(9, kls.getCollisions()); - } - - } - - public void testRegenerateStore() throws Exception { - int numToAdd = 10000000; - Random rand = Randomness.get(); - BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { - for (int i = 0; i < numToAdd; i++) { - kls.add(i); - } - assertEquals(numToAdd, kls.getSize()); - Integer[] newVals = new Integer[1000]; // margin accounts for collisions - for (int j = 0; j < newVals.length; j++) { - newVals[j] = rand.nextInt(); - } - kls.regenerateStore(newVals); - assertTrue(Math.abs(kls.getSize() - newVals.length) < 3); // inexact due to collisions - - // test clear() - kls.clear(); - assertEquals(0, kls.getSize()); - } - } - - public void testAddingDuplicates() throws Exception { - BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { - int numToAdd = 4820411; - for (int i = 0; i < numToAdd; i++) { - kls.add(i); - kls.add(i); - } - for (int j = 0; j < 1000; j++) { - kls.add(577); - } - assertEquals(numToAdd, kls.getSize()); - } - } - - public void testMemoryCapBlocksAdd() throws Exception { - int modulo = (int) Math.pow(2, 29); - for (int maxEntries: new int[]{2342000, 1000, 100000}) { - long memSizeCapInBytes = RBMSizeEstimator.getSizeInBytes(maxEntries); - BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), memSizeCapInBytes); - RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memSizeCapInBytes); - for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { - for (int j = 0; j < maxEntries + 1000; j++) { - kls.add(j); - } - assertTrue(Math.abs(maxEntries - kls.getSize()) < (double) maxEntries / 25); - // exact cap varies a small amount bc of floating point, especially when we use bytes instead of MB for calculations - // precision gets much worse when we compose the two functions, as we do here, but this wouldn't happen in an actual use case - } - } - } - - public void testConcurrency() throws Exception { - Random rand = Randomness.get(); - int modulo = (int) Math.pow(2, 29); - long memCap = 100 * RBMSizeEstimator.BYTES_IN_MB; - for (int j = 0; j < 5; j++) { // test with different numbers of threads - BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { - int numThreads = rand.nextInt(50) + 1; - ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads); - // In this test we want to add the first 200K numbers and check they're all correctly there. - // We do some duplicates too to ensure those aren't incorrectly added. - int amountToAdd = 200000; - ArrayList> wasAdded = new ArrayList<>(amountToAdd); - ArrayList> duplicatesWasAdded = new ArrayList<>(); - for (int i = 0; i < amountToAdd; i++) { - wasAdded.add(null); - } - for (int i = 0; i < amountToAdd; i++) { - final int val = i; - Future fut = executor.submit(() -> { - boolean didAdd; - try { - didAdd = kls.add(val); - } catch (Exception e) { - throw new RuntimeException(e); - } - return didAdd; - }); - wasAdded.set(val, fut); - if (val % 1000 == 0) { - // do a duplicate add - Future duplicateFut = executor.submit(() -> { - boolean didAdd; - try { - didAdd = kls.add(val); - } catch (Exception e) { - throw new RuntimeException(e); - } - return didAdd; - }); - duplicatesWasAdded.add(duplicateFut); - } - } - int originalAdds = 0; - int duplicateAdds = 0; - for (Future fut : wasAdded) { - if (fut.get()) { - originalAdds++; - } - } - for (Future duplicateFut : duplicatesWasAdded) { - if (duplicateFut.get()) { - duplicateAdds++; - } - } - for (int i = 0; i < amountToAdd; i++) { - assertTrue(kls.contains(i)); - } - assertEquals(amountToAdd, originalAdds + duplicateAdds); - assertEquals(amountToAdd, kls.getSize()); - assertEquals(amountToAdd / 1000, kls.getCollisions()); - executor.shutdown(); - } - } - } - - public void testRemoveNoCollisions() throws Exception { - // only for RemovableRBMIntKeyLookupStore - long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; - int numToAdd = 195000; - RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore(0, memCap); - // there should be no collisions for sequential positive numbers up to modulo - for (int i = 0; i < numToAdd; i++) { - rkls.add(i); - } - for (int i = 0; i < 1000; i++) { - assertTrue(rkls.remove(i)); - assertFalse(rkls.contains(i)); - assertFalse(rkls.valueHasHadCollision(i)); - } - assertEquals(numToAdd - 1000, rkls.getSize()); - } - - public void testRemoveWithCollisions() throws Exception { - int modulo = (int) Math.pow(2, 26); - long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; - RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore(modulo, memCap); - for (int i = 0; i < 10; i++) { - rkls.add(i); - if (i % 2 == 1) { - rkls.add(-i); - assertFalse(rkls.valueHasHadCollision(i)); - rkls.add(i + modulo); - assertTrue(rkls.valueHasHadCollision(i)); - } else { - assertFalse(rkls.valueHasHadCollision(i)); - } - } - assertEquals(15, rkls.getSize()); - for (int i = 0; i < 10; i++) { - boolean didRemove = rkls.remove(i); - if (i % 2 == 1) { - // we expect a collision with i + modulo, so we can't remove - assertFalse(didRemove); - assertTrue(rkls.contains(i)); - // but we should be able to remove -i - boolean didRemoveNegative = rkls.remove(-i); - assertTrue(didRemoveNegative); - assertFalse(rkls.contains(-i)); - } else { - // we expect no collision - assertTrue(didRemove); - assertFalse(rkls.contains(i)); - assertFalse(rkls.valueHasHadCollision(i)); - } - } - assertEquals(5, rkls.getSize()); - int offset = 12; - rkls.add(offset); - for (int j = 1; j < 5; j++) { - rkls.add(offset + j * modulo); - } - assertEquals(6, rkls.getSize()); - assertFalse(rkls.remove(offset + modulo)); - assertTrue(rkls.valueHasHadCollision(offset + 15 * modulo)); - assertTrue(rkls.contains(offset + 17 * modulo)); - } - - public void testNullInputs() throws Exception { - BaseRBMIntKeyLookupStore base_kls = new BaseRBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RBMIntKeyLookupStore rkls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (BaseRBMIntKeyLookupStore kls : new BaseRBMIntKeyLookupStore[] { base_kls, rkls }) { - assertFalse(kls.add(null)); - assertFalse(kls.contains(null)); - assertEquals(0, (int) kls.getInternalRepresentation(null)); - assertFalse(kls.remove(null)); - kls.forceRemove(null); - assertFalse(kls.canHaveFalseNegatives()); - assertFalse(kls.isCollision(null, null)); - assertEquals(0, kls.getTotalAdds()); - Integer[] newVals = new Integer[]{1, 17, -2, null, -4, null}; - kls.regenerateStore(newVals); - assertEquals(4, kls.getSize()); - } - } -} diff --git a/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java deleted file mode 100644 index 5f66f8dd86401..0000000000000 --- a/server/src/test/java/org/opensearch/indices/HybridIntKeyLookupStoreTests.java +++ /dev/null @@ -1,395 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -package org.opensearch.indices; - -import org.opensearch.common.Randomness; -import org.opensearch.test.OpenSearchTestCase; - -import java.util.ArrayList; -import java.util.Random; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.ThreadPoolExecutor; - -// Tests base functionality of HybridIntKeyLookupStore for both that class and the inheriting -// RemovableHybridIntKeyLookupStore. - -public class HybridIntKeyLookupStoreTests extends OpenSearchTestCase { - public void testInit() throws Exception { - HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { - assertEquals(HybridIntKeyLookupStore.StructureTypes.HASHSET, kls.getCurrentStructure()); - assertEquals(0, kls.getSize()); - } - } - - public void testStructureTransitions() throws Exception { - HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { - for (int i = 0; i < HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD; i++) { - kls.add(i); - } - assertEquals(HybridIntKeyLookupStore.StructureTypes.INTARR, kls.getCurrentStructure()); - assertEquals(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD, kls.getSize()); - for (int i = HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD; i < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD; i++) { - kls.add(i); - } - assertEquals(HybridIntKeyLookupStore.StructureTypes.RBM, kls.getCurrentStructure()); - assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD, kls.getSize()); - } - } - - public void testArrayLogic() throws Exception { - HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { - Random rand = Randomness.get(); - int numToAdd = 50000; - int[] addedValues = new int[numToAdd]; - for (int i = 0; i < numToAdd; i++) { - int val = rand.nextInt(); - kls.add(val); - addedValues[i] = val; - } - assertTrue(kls.arrayCorrectlySorted()); // Not sure if this is really good as a public method - but idk how else to do it? - assertTrue(numToAdd - kls.getSize() < 20); // size should not be too different from numToAdd - exact number varies due to - // collisions - int numToRemove = 20000; - for (int j = 0; j < numToRemove; j++) { - kls.forceRemove(addedValues[j]); - } - assertTrue(numToAdd - numToRemove - kls.getSize() < 20); - assertTrue(kls.arrayCorrectlySorted()); - assertTrue(kls.canHaveFalseNegatives()); - } - } - - public void testTransformationLogic() throws Exception { - int modulo = (int) Math.pow(2, 29); - HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore(modulo, 0L); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(modulo, 0L); - for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { - int offset = 3; - for (int i = 0; i < 4; i++) { // after this we run into max value, but thats not a flaw with the class design - int posValue = i * modulo + offset; - kls.add(posValue); - int negValue = -(i * modulo + offset); - kls.add(negValue); - } - assertEquals(1, kls.getSize()); - - // test output is always in expected range - int[] testVals = new int[] { 0, 1, -1, -23495, 23058, modulo, -modulo, Integer.MAX_VALUE, Integer.MIN_VALUE }; - for (int value : testVals) { - assertTrue(kls.getInternalRepresentation(value) <= 0); - assertTrue(kls.getInternalRepresentation(value) > -modulo); - } - } - } - - public void testContainsAndForceRemove() throws Exception { - HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { - for (int i = 0; i < 2000; i++) { - kls.add(i); - assertTrue(kls.contains(i)); - } - assertFalse(kls.canHaveFalseNegatives()); - for (int i = 1900; i < 2000; i++) { - kls.forceRemove(i); - assertFalse(kls.contains(i)); - } - assertEquals(1900, kls.getSize()); - int lastSize = kls.getSize(); - for (int i = kls.getSize(); i < HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD; i++) { - assertFalse(kls.contains(i)); - kls.add(i); - assertTrue(kls.contains(i)); // intArr removal logic already tested in testArrayLogic() - assertEquals(1, kls.getSize() - lastSize); - lastSize = kls.getSize(); - } - assertEquals(HybridIntKeyLookupStore.StructureTypes.INTARR, kls.getCurrentStructure()); - assertEquals(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD, kls.getSize()); - for (int i = kls.getSize(); i < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + 1000; i++) { - kls.add(i); - assertTrue(kls.contains(i)); - } - assertEquals(HybridIntKeyLookupStore.StructureTypes.RBM, kls.getCurrentStructure()); - assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + 1000, kls.getSize()); - for (int i = 5000; i < 10000; i++) { - kls.forceRemove(i); - assertFalse(kls.contains(i)); - } - assertTrue(kls.canHaveFalseNegatives()); - } - } - - public void testAddingStatsGetters() throws Exception { - int modulo = (int) Math.pow(2, 15); - HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore(modulo, 0L); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(modulo, 0L); - for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { - kls.add(15); - kls.add(-15); - assertEquals(2, kls.getTotalAdds()); - assertEquals(1, kls.getCollisions()); - - int offset = 1; - for (int i = 0; i < 10; i++) { - kls.add(i * modulo + offset); - } - assertEquals(12, kls.getTotalAdds()); - assertEquals(10, kls.getCollisions()); - } - } - - public void testRegenerateStore() throws Exception { - HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { - Random rand = Randomness.get(); - int[] resetNumbers = new int[] { - HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD, - HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD, - HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + 10000 }; - // test reset starting from each of the 3 internal structure types - for (int resetNum : resetNumbers) { - for (int i = 0; i < resetNum; i++) { - kls.add(i); - } - Integer[] newVals = new Integer[(int) (resetNum * 1.1)]; // margin accounts for collisions - for (int j = 0; j < newVals.length; j++) { - newVals[j] = rand.nextInt(); - } - kls.regenerateStore(newVals); - assertTrue(kls.getSize() >= resetNum); - assertTrue(kls.getSize() <= newVals.length); - } - // test clear() - kls.clear(); - assertEquals(HybridIntKeyLookupStore.StructureTypes.HASHSET, kls.getCurrentStructure()); - assertEquals(0, kls.getSize()); - } - } - - public void testAddingDuplicates() throws Exception { - HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { - for (int i = 0; i < HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1; i++) { - kls.add(i); - kls.add(i); - } - for (int j = 0; j < 1000; j++) { - kls.add(577); - } - assertEquals(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1, kls.getSize()); - for (int i = HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1; i < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - - 1; i++) { - kls.add(i); - kls.add(i); - } - for (int j = 0; j < 1000; j++) { - kls.add(12342); - } - assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - 1, kls.getSize()); - for (int i = HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - 1; i < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - + 5000; i++) { - kls.add(i); - kls.add(i); - } - for (int j = 0; j < 1000; j++) { - kls.add(-10004); - } - assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + 5000, kls.getSize()); - } - } - - public void testMemoryCapBlocksTransitions() throws Exception { - double[] testModulos = new double[] { 0, Math.pow(2, 31), Math.pow(2, 29), Math.pow(2, 28), Math.pow(2, 26) }; - for (int i = 0; i < testModulos.length; i++) { - int modulo = (int) testModulos[i]; - long maxHashsetMemSize = RBMSizeEstimator.getHashsetMemSizeInBytes(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1); - long intArrMemSize = HybridIntKeyLookupStore.getIntArrMemSizeInBytes(); - long minRBMMemSize = RBMSizeEstimator.getSizeInBytes(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD); - - // test that transitions in data structure do indeed monotonically increase predicted memory size - assertTrue(maxHashsetMemSize < intArrMemSize); - assertTrue(intArrMemSize < minRBMMemSize); - - HybridIntKeyLookupStore kls = new HybridIntKeyLookupStore(modulo, intArrMemSize - 1000); - for (int j = 0; j < HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1; j++) { - boolean didAdd = kls.add(j); - assertTrue(didAdd); - } - // now try to add one more, which would cause a transition and push us past the memory cap - assertFalse(kls.isFull()); - assertEquals(HybridIntKeyLookupStore.StructureTypes.HASHSET, kls.getCurrentStructure()); - boolean didAdd = kls.add(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1); - assertFalse(didAdd); - assertTrue(kls.isFull()); - assertEquals(HybridIntKeyLookupStore.StructureTypes.HASHSET, kls.getCurrentStructure()); - - kls = new HybridIntKeyLookupStore(modulo, minRBMMemSize); - for (int j = 0; j < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - 1; j++) { - didAdd = kls.add(j); - assertTrue(didAdd); - } - assertFalse(kls.isFull()); - didAdd = kls.add(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD); - assertFalse(didAdd); - assertTrue(kls.isFull()); - assertEquals(HybridIntKeyLookupStore.StructureTypes.INTARR, kls.getCurrentStructure()); - } - } - - public void testMemoryCapBlocksAdd() throws Exception { - double[] testModulos = new double[] { 0, Math.pow(2, 31), Math.pow(2, 29), Math.pow(2, 28), Math.pow(2, 26) }; - for (int i = 0; i < testModulos.length; i++) { - int modulo = (int) testModulos[i]; - - // test where max number of entries should be 3000 - long memSizeCapInBytes = (long) (RBMSizeEstimator.HASHSET_MEM_SLOPE * 3000 * RBMSizeEstimator.BYTES_IN_MB); - HybridIntKeyLookupStore kls = new HybridIntKeyLookupStore(modulo, memSizeCapInBytes); - for (int j = 0; j < 3500; j++) { - kls.add(j); - } - assertTrue(Math.abs(3000 - kls.getSize()) < 2); // double --> long conversion adds a bit of lossiness - assertEquals(HybridIntKeyLookupStore.StructureTypes.HASHSET, kls.getCurrentStructure()); - - // test where max number of entries should be 999,999 (bounded at intArr size) - memSizeCapInBytes = HybridIntKeyLookupStore.getIntArrMemSizeInBytes(); - kls = new HybridIntKeyLookupStore(modulo, memSizeCapInBytes); - for (int j = 0; j < 105000; j++) { - kls.add(j); - } - assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - 1, kls.getSize()); - assertEquals(HybridIntKeyLookupStore.StructureTypes.INTARR, kls.getCurrentStructure()); - - int maxEntries = 2342000; - memSizeCapInBytes = RBMSizeEstimator.getSizeInBytes(maxEntries); - kls = new HybridIntKeyLookupStore(modulo, memSizeCapInBytes); - for (int j = 0; j < maxEntries + 1000; j++) { - kls.add(j); - } - assertTrue(Math.abs(maxEntries - kls.getSize()) < (double) maxEntries / 25); - // exact cap varies a small amount bc of floating point, especially when we use bytes instead of MB for calculations - // precision gets much worse when we compose the two functions, as we do here, but this wouldn't happen in an actual use case - } - } - - public void testConcurrency() throws Exception { - Random rand = Randomness.get(); - for (int j = 0; j < 5; j++) { // test with different numbers of threads - HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { - int numThreads = rand.nextInt(50) + 1; - ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads); - // In this test we want to add the first 200K numbers and check they're all correctly there. - // We do some duplicates too to ensure those aren't incorrectly added. - int amountToAdd = 200000; - ArrayList> wasAdded = new ArrayList<>(amountToAdd); // idk why i cant make an array??? - ArrayList> duplicatesWasAdded = new ArrayList<>(); - for (int i = 0; i < amountToAdd; i++) { - wasAdded.add(null); - } - for (int i = 0; i < amountToAdd; i++) { - final int val = i; - Future fut = executor.submit(() -> { - boolean didAdd; - try { - didAdd = kls.add(val); - } catch (Exception e) { - throw new RuntimeException(e); - } - return didAdd; - }); - wasAdded.set(val, fut); - if (val % 1000 == 0) { - // do a duplicate add - Future duplicateFut = executor.submit(() -> { - boolean didAdd; - try { - didAdd = kls.add(val); - } catch (Exception e) { - throw new RuntimeException(e); - } - return didAdd; - }); - duplicatesWasAdded.add(duplicateFut); - } - } - int originalAdds = 0; - int duplicateAdds = 0; - for (Future fut : wasAdded) { - if (fut.get()) { - originalAdds++; - } - } - for (Future duplicateFut : duplicatesWasAdded) { - if (duplicateFut.get()) { - duplicateAdds++; - } - } - for (int i = 0; i < amountToAdd; i++) { - assertTrue(kls.contains(i)); - } - assertEquals(amountToAdd, originalAdds + duplicateAdds); - assertEquals(amountToAdd, kls.getSize()); - assertEquals(amountToAdd / 1000, kls.getCollisions()); - executor.shutdown(); - } - } - } - - public void testNullInputs() throws Exception { - HybridIntKeyLookupStore base_kls = new HybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore((int) Math.pow(2, 29), 0L); - for (HybridIntKeyLookupStore kls : new HybridIntKeyLookupStore[] { base_kls, rkls }) { - assertFalse(kls.add(null)); - assertFalse(kls.contains(null)); - assertEquals(0, (int) kls.getInternalRepresentation(null)); - assertFalse(kls.remove(null)); - kls.forceRemove(null); - assertFalse(kls.canHaveFalseNegatives()); - assertFalse(kls.isCollision(null, null)); - assertEquals(0, kls.getTotalAdds()); - Integer[] newVals = new Integer[]{1, 17, -2, null, -4, null}; - kls.regenerateStore(newVals); - assertEquals(4, kls.getSize()); - } - } -} diff --git a/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java new file mode 100644 index 0000000000000..7993b73d68992 --- /dev/null +++ b/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java @@ -0,0 +1,285 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.indices; + +import org.opensearch.common.Randomness; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.ArrayList; +import java.util.Random; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadPoolExecutor; + +public class RBMIntKeyLookupStoreTests extends OpenSearchTestCase { + // Tests mostly based on HybridIntKeyStoreTests.java + public void testInit() { + long memCap = 100 * RBMSizeEstimator.BYTES_IN_MB; + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memCap); + assertEquals(0, kls.getSize()); + assertEquals(memCap, kls.getMemorySizeCapInBytes()); + } + public void testTransformationLogic() throws Exception { + int modulo = (int) Math.pow(2, 29); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + int offset = 3; + for (int i = 0; i < 4; i++) { // after this we run into max value, but thats not a flaw with the class design + int posValue = i * modulo + offset; + kls.add(posValue); + int negValue = -(i * modulo + offset); + kls.add(negValue); + } + assertEquals(2, kls.getSize()); + int[] testVals = new int[]{0, 1, -1, -23495, 23058, modulo, -modulo, Integer.MAX_VALUE, Integer.MIN_VALUE}; + for (int value : testVals) { + assertTrue(kls.getInternalRepresentation(value) < modulo); + assertTrue(kls.getInternalRepresentation(value) > -modulo); + } + } + + public void testContainsAndForceRemove() throws Exception { + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (int i = 0; i < 2000; i++) { + kls.add(i); + assertTrue(kls.contains(i)); + } + assertFalse(kls.canHaveFalseNegatives()); + for (int i = 1900; i < 2000; i++) { + kls.forceRemove(i); + assertFalse(kls.contains(i)); + } + assertEquals(1900, kls.getSize()); + } + + public void testAddingStatsGetters() throws Exception { + int modulo = (int) Math.pow(2, 15); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, 0L); + kls.add(15); + kls.add(-15); + assertEquals(2, kls.getTotalAdds()); + assertEquals(0, kls.getCollisions()); + + int offset = 1; + for (int i = 0; i < 10; i++) { + kls.add(i * modulo + offset); + } + assertEquals(12, kls.getTotalAdds()); + assertEquals(9, kls.getCollisions()); + } + + public void testRegenerateStore() throws Exception { + int numToAdd = 10000000; + Random rand = Randomness.get(); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (int i = 0; i < numToAdd; i++) { + kls.add(i); + } + assertEquals(numToAdd, kls.getSize()); + Integer[] newVals = new Integer[1000]; // margin accounts for collisions + for (int j = 0; j < newVals.length; j++) { + newVals[j] = rand.nextInt(); + } + kls.regenerateStore(newVals); + assertTrue(Math.abs(kls.getSize() - newVals.length) < 3); // inexact due to collisions + + // test clear() + kls.clear(); + assertEquals(0, kls.getSize()); + } + + public void testAddingDuplicates() throws Exception { + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + int numToAdd = 4820411; + for (int i = 0; i < numToAdd; i++) { + kls.add(i); + kls.add(i); + } + for (int j = 0; j < 1000; j++) { + kls.add(577); + } + assertEquals(numToAdd, kls.getSize()); + } + + public void testMemoryCapBlocksAdd() throws Exception { + int modulo = (int) Math.pow(2, 29); + for (int maxEntries: new int[]{2342000, 1000, 100000}) { + long memSizeCapInBytes = RBMSizeEstimator.getSizeInBytes(maxEntries); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memSizeCapInBytes); + for (int j = 0; j < maxEntries + 1000; j++) { + kls.add(j); + } + assertTrue(Math.abs(maxEntries - kls.getSize()) < (double) maxEntries / 25); + // exact cap varies a small amount bc of floating point, especially when we use bytes instead of MB for calculations + // precision gets much worse when we compose the two functions, as we do here, but this wouldn't happen in an actual use case + } + } + + public void testConcurrency() throws Exception { + Random rand = Randomness.get(); + int modulo = (int) Math.pow(2, 29); + long memCap = 100 * RBMSizeEstimator.BYTES_IN_MB; + for (int j = 0; j < 5; j++) { // test with different numbers of threads + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + int numThreads = rand.nextInt(50) + 1; + ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads); + // In this test we want to add the first 200K numbers and check they're all correctly there. + // We do some duplicates too to ensure those aren't incorrectly added. + int amountToAdd = 200000; + ArrayList> wasAdded = new ArrayList<>(amountToAdd); + ArrayList> duplicatesWasAdded = new ArrayList<>(); + for (int i = 0; i < amountToAdd; i++) { + wasAdded.add(null); + } + for (int i = 0; i < amountToAdd; i++) { + final int val = i; + Future fut = executor.submit(() -> { + boolean didAdd; + try { + didAdd = kls.add(val); + } catch (Exception e) { + throw new RuntimeException(e); + } + return didAdd; + }); + wasAdded.set(val, fut); + if (val % 1000 == 0) { + // do a duplicate add + Future duplicateFut = executor.submit(() -> { + boolean didAdd; + try { + didAdd = kls.add(val); + } catch (Exception e) { + throw new RuntimeException(e); + } + return didAdd; + }); + duplicatesWasAdded.add(duplicateFut); + } + } + int originalAdds = 0; + int duplicateAdds = 0; + for (Future fut : wasAdded) { + if (fut.get()) { + originalAdds++; + } + } + for (Future duplicateFut : duplicatesWasAdded) { + if (duplicateFut.get()) { + duplicateAdds++; + } + } + for (int i = 0; i < amountToAdd; i++) { + assertTrue(kls.contains(i)); + } + assertEquals(amountToAdd, originalAdds + duplicateAdds); + assertEquals(amountToAdd, kls.getSize()); + assertEquals(amountToAdd / 1000, kls.getCollisions()); + executor.shutdown(); + } + } + + public void testRemoveNoCollisions() throws Exception { + long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; + int numToAdd = 195000; + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(0, memCap); + // there should be no collisions for sequential positive numbers up to modulo + for (int i = 0; i < numToAdd; i++) { + kls.add(i); + } + for (int i = 0; i < 1000; i++) { + assertTrue(kls.remove(i)); + assertFalse(kls.contains(i)); + assertFalse(kls.valueHasHadCollision(i)); + } + assertEquals(numToAdd - 1000, kls.getSize()); + } + + public void testRemoveWithCollisions() throws Exception { + int modulo = (int) Math.pow(2, 26); + long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, memCap); + for (int i = 0; i < 10; i++) { + kls.add(i); + if (i % 2 == 1) { + kls.add(-i); + assertFalse(kls.valueHasHadCollision(i)); + kls.add(i + modulo); + assertTrue(kls.valueHasHadCollision(i)); + } else { + assertFalse(kls.valueHasHadCollision(i)); + } + } + assertEquals(15, kls.getSize()); + for (int i = 0; i < 10; i++) { + boolean didRemove = kls.remove(i); + if (i % 2 == 1) { + // we expect a collision with i + modulo, so we can't remove + assertFalse(didRemove); + assertTrue(kls.contains(i)); + // but we should be able to remove -i + boolean didRemoveNegative = kls.remove(-i); + assertTrue(didRemoveNegative); + assertFalse(kls.contains(-i)); + } else { + // we expect no collision + assertTrue(didRemove); + assertFalse(kls.contains(i)); + assertFalse(kls.valueHasHadCollision(i)); + } + } + assertEquals(5, kls.getSize()); + int offset = 12; + kls.add(offset); + for (int j = 1; j < 5; j++) { + kls.add(offset + j * modulo); + } + assertEquals(6, kls.getSize()); + assertFalse(kls.remove(offset + modulo)); + assertTrue(kls.valueHasHadCollision(offset + 15 * modulo)); + assertTrue(kls.contains(offset + 17 * modulo)); + } + + public void testNullInputs() throws Exception { + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + assertFalse(kls.add(null)); + assertFalse(kls.contains(null)); + assertEquals(0, (int) kls.getInternalRepresentation(null)); + assertFalse(kls.remove(null)); + kls.forceRemove(null); + assertFalse(kls.canHaveFalseNegatives()); + assertFalse(kls.isCollision(null, null)); + assertEquals(0, kls.getTotalAdds()); + Integer[] newVals = new Integer[]{1, 17, -2, null, -4, null}; + kls.regenerateStore(newVals); + assertEquals(4, kls.getSize()); + } +} diff --git a/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java deleted file mode 100644 index 975daa107ef39..0000000000000 --- a/server/src/test/java/org/opensearch/indices/RemovableHybridIntKeyLookupStoreTests.java +++ /dev/null @@ -1,128 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -package org.opensearch.indices; - -import org.opensearch.test.OpenSearchTestCase; - -// NOTE: Only new functionality is tested here. -// Inherited functionality is tested for both this class and the superclass in HybridIntKeyLookupStoreTests.java. - -public class RemovableHybridIntKeyLookupStoreTests extends OpenSearchTestCase { - public void testRemoveNoCollisions() throws Exception { - long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(0, memCap); - // there should be no collisions for sequential positive numbers up to modulo - for (int i = 0; i < HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1; i++) { - rkls.add(i); - } - assertEquals("HashSet", rkls.getCurrentStructure()); - for (int i = 0; i < 1000; i++) { - assertTrue(rkls.remove(i)); - assertFalse(rkls.contains(i)); - assertFalse(rkls.valueHasHadCollision(i)); - } - assertEquals(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1001, rkls.getSize()); - for (int i = 0; i < 1000; i++) { - rkls.add(i); - assertFalse(rkls.valueHasHadCollision(i)); - } - - assertEquals(HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1, rkls.getSize()); - for (int i = HybridIntKeyLookupStore.HASHSET_TO_INTARR_THRESHOLD - 1; i < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - - 1; i++) { - rkls.add(i); - } - assertEquals("intArr", rkls.getCurrentStructure()); - assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - 1, rkls.getSize()); - for (int i = 0; i < 1000; i++) { - assertTrue(rkls.remove(i)); - assertFalse(rkls.contains(i)); - assertFalse(rkls.valueHasHadCollision(i)); - } - for (int i = 0; i < 1000; i++) { - rkls.add(i); - assertFalse(rkls.valueHasHadCollision(i)); - } - - for (int i = HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD - 1; i < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + 1000; i++) { - rkls.add(i); - } - assertEquals("RBM", rkls.getCurrentStructure()); - assertEquals(HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + 1000, rkls.getSize()); - for (int i = 0; i < HybridIntKeyLookupStore.INTARR_TO_RBM_THRESHOLD + 1000; i++) { - assertTrue(rkls.remove(i)); - assertFalse(rkls.contains(i)); - assertFalse(rkls.valueHasHadCollision(i)); - } - assertEquals("RBM", rkls.getCurrentStructure()); - assertEquals(0, rkls.getSize()); - } - - public void testRemoveWithCollisions() throws Exception { - int modulo = (int) Math.pow(2, 26); - long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; - RemovableHybridIntKeyLookupStore rkls = new RemovableHybridIntKeyLookupStore(modulo, memCap); - for (int i = 0; i < 10; i++) { - rkls.add(i); - if (i % 2 == 0) { - rkls.add(-i); - assertTrue(rkls.valueHasHadCollision(i)); - } else { - assertFalse(rkls.valueHasHadCollision(i)); - } - } - assertEquals(10, rkls.getSize()); - for (int i = 0; i < 10; i++) { - boolean didRemove = rkls.remove(i); - if (i % 2 == 0) { - // we expect a collision with -i, so we can't remove - assertFalse(didRemove); - assertTrue(rkls.contains(i)); - } else { - // we expect no collision - assertTrue(didRemove); - assertFalse(rkls.contains(i)); - assertFalse(rkls.valueHasHadCollision(i)); - } - } - assertEquals(5, rkls.getSize()); - rkls.add(1); - for (int j = 1; j < 5; j++) { - rkls.add(1 + j * modulo); - } - assertEquals(6, rkls.getSize()); - assertFalse(rkls.remove(1 + modulo)); - assertTrue(rkls.valueHasHadCollision(1 + 15 * modulo)); - assertTrue(rkls.contains(1 + 17 * modulo)); - } -} From 75be1979192ede1a1c245fb58544b4ae26c4a972 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Wed, 4 Oct 2023 10:10:55 -0700 Subject: [PATCH 16/17] tweaked memory size estimator again --- .../indices/RBMIntKeyLookupStore.java | 11 ++-- .../opensearch/indices/RBMSizeEstimator.java | 65 +++++++++++++++++-- .../indices/RBMIntKeyLookupStoreTests.java | 27 +++++++- 3 files changed, 90 insertions(+), 13 deletions(-) diff --git a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java index 00910b7fe6bcc..88fcf133f3b00 100644 --- a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java @@ -41,10 +41,10 @@ /** * This class implements KeyLookupStore using a roaring bitmap with a modulo applied to values. - * The modulo increases the density of values, which makes RBMs more memory-efficient. The recommended modulo is ~2^29. + * The modulo increases the density of values, which makes RBMs more memory-efficient. The recommended modulo is ~2^28. * It also maintains a hash set of values which have had collisions. Values which haven't had collisions can be * safely removed from the store. The fraction of collided values should be low, - * about 0.3% for a store with 10^7 values and a modulo of 2^29. + * about 0.5% for a store with 10^7 values and a modulo of 2^28. * The store estimates its memory footprint and will stop adding more values once it reaches its memory cap. */ public class RBMIntKeyLookupStore implements KeyLookupStore { @@ -75,23 +75,24 @@ protected KeyStoreStats(long memSizeCapInBytes, int maxNumEntries) { protected KeyStoreStats stats; protected RoaringBitmap rbm; private HashSet collidedInts; + protected RBMSizeEstimator sizeEstimator; protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); protected final Lock readLock = lock.readLock(); protected final Lock writeLock = lock.writeLock(); RBMIntKeyLookupStore(int modulo, long memSizeCapInBytes) { this.modulo = modulo; + sizeEstimator = new RBMSizeEstimator(modulo); this.stats = new KeyStoreStats(memSizeCapInBytes, calculateMaxNumEntries(memSizeCapInBytes)); this.rbm = new RoaringBitmap(); collidedInts = new HashSet<>(); - } protected int calculateMaxNumEntries(long memSizeCapInBytes) { if (memSizeCapInBytes == 0) { return Integer.MAX_VALUE; } - return RBMSizeEstimator.getNumEntriesFromSizeInBytes(memSizeCapInBytes); + return sizeEstimator.getNumEntriesFromSizeInBytes(memSizeCapInBytes); } protected final int transform(int value) { @@ -233,7 +234,7 @@ public boolean isCollision(Integer value1, Integer value2) { @Override public long getMemorySizeInBytes() { - return RBMSizeEstimator.getSizeInBytes(stats.size) + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size()); + return sizeEstimator.getSizeInBytes(stats.size) + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size()); } @Override diff --git a/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java b/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java index 9c62a83bb8b76..6e3b8e581ba9f 100644 --- a/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java +++ b/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java @@ -41,20 +41,62 @@ public class RBMSizeEstimator { public static final int BYTES_IN_MB = 1048576; public static final double HASHSET_MEM_SLOPE = 6.46 * Math.pow(10, -5); - public static final double slope = 0.62; - public static final double bufferMultiplier = 1.5; - public static final double intercept = 2.9; + protected final double slope; + protected final double bufferMultiplier; + protected final double intercept; - RBMSizeEstimator() {} + RBMSizeEstimator(int modulo) { + double[] memValues = calculateMemoryCoefficients(modulo); + this.bufferMultiplier = memValues[0]; + this.slope = memValues[1]; + this.intercept = memValues[2]; + } + + public static double[] calculateMemoryCoefficients(int modulo) { + // Sets up values to help estimate RBM size given a modulo + // Returns an array of {bufferMultiplier, slope, intercept} - public static long getSizeInBytes(int numEntries) { + double modifiedModulo; + if (modulo == 0) { + modifiedModulo = 32.0; + } else { + modifiedModulo = Math.log(modulo) / Math.log(2); + } + // we "round up" the modulo to the nearest tested value + double highCutoff = 29.001; // Floating point makes 29 not work + double mediumCutoff = 28.0; + double lowCutoff = 26.0; + double bufferMultiplier = 1.0; + double slope; + double intercept; + if (modifiedModulo > highCutoff) { + // modulo > 2^29 + bufferMultiplier = 1.2; + slope = 0.637; + intercept = 3.091; + } else if (modifiedModulo > mediumCutoff) { + // 2^29 >= modulo > 2^28 + slope = 0.619; + intercept = 2.993; + } else if (modifiedModulo > lowCutoff) { + // 2^28 >= modulo > 2^26 + slope = 0.614; + intercept = 2.905; + } else { + slope = 0.628; + intercept = 2.603; + } + return new double[] { bufferMultiplier, slope, intercept }; + } + + public long getSizeInBytes(int numEntries) { // Based on a linear fit in log-log space, so that we minimize the error as a proportion rather than as // an absolute value. Should be within ~50% of the true value at worst, and should overestimate rather // than underestimate the memory usage return (long) ((long) Math.pow(numEntries, slope) * (long) Math.pow(10, intercept) * bufferMultiplier); } - public static int getNumEntriesFromSizeInBytes(long sizeInBytes) { + public int getNumEntriesFromSizeInBytes(long sizeInBytes) { // This function has some precision issues especially when composed with its inverse: // numEntries = getNumEntriesFromSizeInBytes(getSizeInBytes(numEntries)) // In this case the result can be off by up to a couple percent @@ -64,6 +106,17 @@ public static int getNumEntriesFromSizeInBytes(long sizeInBytes) { } + public static long getSizeInBytesWithModulo(int numEntries, int modulo) { + double[] memValues = calculateMemoryCoefficients(modulo); + return (long) ((long) Math.pow(numEntries, memValues[1]) * (long) Math.pow(10, memValues[2]) * memValues[0]); + } + + public static int getNumEntriesFromSizeInBytesWithModulo(long sizeInBytes, int modulo) { + double[] memValues = calculateMemoryCoefficients(modulo); + return (int) Math.pow(sizeInBytes / (memValues[0] * Math.pow(10, memValues[2])), 1 / memValues[1]); + } + + protected static long convertMBToBytes(double valMB) { return (long) (valMB * BYTES_IN_MB); } diff --git a/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java index 7993b73d68992..ea941a11ca7b3 100644 --- a/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java @@ -132,8 +132,8 @@ public void testAddingDuplicates() throws Exception { public void testMemoryCapBlocksAdd() throws Exception { int modulo = (int) Math.pow(2, 29); for (int maxEntries: new int[]{2342000, 1000, 100000}) { - long memSizeCapInBytes = RBMSizeEstimator.getSizeInBytes(maxEntries); - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memSizeCapInBytes); + long memSizeCapInBytes = RBMSizeEstimator.getSizeInBytesWithModulo(maxEntries, modulo); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, memSizeCapInBytes); for (int j = 0; j < maxEntries + 1000; j++) { kls.add(j); } @@ -282,4 +282,27 @@ public void testNullInputs() throws Exception { kls.regenerateStore(newVals); assertEquals(4, kls.getSize()); } + + public void testMemoryCapValueInitialization() { + double[] logModulos = new double[] { 0.0, 31.2, 30, 29, 28, 13 }; + double[] expectedMultipliers = new double[] { 1.2, 1.2, 1.2, 1, 1, 1 }; + double[] expectedSlopes = new double[] { 0.637, 0.637, 0.637, 0.619, 0.614, 0.629 }; + double[] expectedIntercepts = new double[] { 3.091, 3.091, 3.091, 2.993, 2.905, 2.603 }; + long memSizeCapInBytes = (long) 100.0 * RBMSizeEstimator.BYTES_IN_MB; + double delta = 0.01; + for (int i = 0; i < logModulos.length; i++) { + int modulo = 0; + if (logModulos[i] != 0) { + modulo = (int) Math.pow(2, logModulos[i]); + } + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, memSizeCapInBytes); + assertEquals(kls.stats.memSizeCapInBytes, kls.getMemorySizeCapInBytes(), 1.0); + assertEquals(expectedMultipliers[i], kls.sizeEstimator.bufferMultiplier, delta); + assertEquals(expectedSlopes[i], kls.sizeEstimator.slope, delta); + assertEquals(expectedIntercepts[i], kls.sizeEstimator.intercept, delta); + System.out.println("log modulo: " + logModulos[i]); + System.out.println("Estimated size at 10^6: " + kls.sizeEstimator.getSizeInBytes(1000000)); + } + + } } From 4abf6ea666b29f760a2f107ae1303eb1d477183d Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 23 Oct 2023 15:50:18 -0700 Subject: [PATCH 17/17] Removed forceRemove and guaranteesFalseNegatives --- .../opensearch/indices/KeyLookupStore.java | 14 ----------- .../indices/RBMIntKeyLookupStore.java | 25 +------------------ .../indices/RBMIntKeyLookupStoreTests.java | 15 +---------- 3 files changed, 2 insertions(+), 52 deletions(-) diff --git a/server/src/main/java/org/opensearch/indices/KeyLookupStore.java b/server/src/main/java/org/opensearch/indices/KeyLookupStore.java index 5d94bef417898..60e1386a460ec 100644 --- a/server/src/main/java/org/opensearch/indices/KeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/KeyLookupStore.java @@ -74,20 +74,6 @@ public interface KeyLookupStore { */ boolean remove(T value) throws Exception; - - /** - * Remove the transformed version of this value from the store. Calling this function may cause - * contains() to return false negatives for future values. - * @param value The value to forcibly remove. - */ - void forceRemove(T value) throws Exception; - - /** - * Check if the object currently guarantees having no false negatives when running contains(). - * @return false if there will not be false negatives, true if there could be false negatives. - */ - boolean canHaveFalseNegatives(); - /** * Returns the number of distinct values stored in the internal data structure. * Does not count values which weren't successfully added due to collisions. diff --git a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java index 88fcf133f3b00..3789989b5eaf1 100644 --- a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java @@ -57,13 +57,12 @@ protected class KeyStoreStats { protected boolean guaranteesNoFalseNegatives; protected int maxNumEntries; protected boolean atCapacity; - protected CounterMetric numRemovalAttempts; // used in removable classes + protected CounterMetric numRemovalAttempts; protected CounterMetric numSuccessfulRemovals; protected KeyStoreStats(long memSizeCapInBytes, int maxNumEntries) { this.size = 0; this.numAddAttempts = new CounterMetric(); this.numCollisions = new CounterMetric(); - this.guaranteesNoFalseNegatives = true; this.memSizeCapInBytes = memSizeCapInBytes; this.maxNumEntries = maxNumEntries; this.atCapacity = false; @@ -181,28 +180,6 @@ public boolean remove(Integer value) throws Exception { } } - - @Override - public void forceRemove(Integer value) throws Exception { - if (value == null) { - return; - } - writeLock.lock(); - stats.guaranteesNoFalseNegatives = false; - try { - int transformedValue = transform(value); - rbm.remove(transformedValue); - stats.size--; - } finally { - writeLock.unlock(); - } - } - - @Override - public boolean canHaveFalseNegatives() { - return !stats.guaranteesNoFalseNegatives; - } - @Override public int getSize() { readLock.lock(); diff --git a/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java index ea941a11ca7b3..c857c1ecab768 100644 --- a/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java @@ -41,7 +41,6 @@ import java.util.concurrent.ThreadPoolExecutor; public class RBMIntKeyLookupStoreTests extends OpenSearchTestCase { - // Tests mostly based on HybridIntKeyStoreTests.java public void testInit() { long memCap = 100 * RBMSizeEstimator.BYTES_IN_MB; RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memCap); @@ -66,18 +65,12 @@ public void testTransformationLogic() throws Exception { } } - public void testContainsAndForceRemove() throws Exception { + public void testContains() throws Exception { RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); for (int i = 0; i < 2000; i++) { kls.add(i); assertTrue(kls.contains(i)); } - assertFalse(kls.canHaveFalseNegatives()); - for (int i = 1900; i < 2000; i++) { - kls.forceRemove(i); - assertFalse(kls.contains(i)); - } - assertEquals(1900, kls.getSize()); } public void testAddingStatsGetters() throws Exception { @@ -145,8 +138,6 @@ public void testMemoryCapBlocksAdd() throws Exception { public void testConcurrency() throws Exception { Random rand = Randomness.get(); - int modulo = (int) Math.pow(2, 29); - long memCap = 100 * RBMSizeEstimator.BYTES_IN_MB; for (int j = 0; j < 5; j++) { // test with different numbers of threads RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); int numThreads = rand.nextInt(50) + 1; @@ -274,8 +265,6 @@ public void testNullInputs() throws Exception { assertFalse(kls.contains(null)); assertEquals(0, (int) kls.getInternalRepresentation(null)); assertFalse(kls.remove(null)); - kls.forceRemove(null); - assertFalse(kls.canHaveFalseNegatives()); assertFalse(kls.isCollision(null, null)); assertEquals(0, kls.getTotalAdds()); Integer[] newVals = new Integer[]{1, 17, -2, null, -4, null}; @@ -300,8 +289,6 @@ public void testMemoryCapValueInitialization() { assertEquals(expectedMultipliers[i], kls.sizeEstimator.bufferMultiplier, delta); assertEquals(expectedSlopes[i], kls.sizeEstimator.slope, delta); assertEquals(expectedIntercepts[i], kls.sizeEstimator.intercept, delta); - System.out.println("log modulo: " + logModulos[i]); - System.out.println("Estimated size at 10^6: " + kls.sizeEstimator.getSizeInBytes(1000000)); } }