From 806d719dcb0f795abaf5745c5b2cfede72890078 Mon Sep 17 00:00:00 2001 From: Andreas Steigmiller Date: Wed, 3 Jan 2024 19:29:15 +0000 Subject: [PATCH] Improved contains check for bulkset for elements via a transient attribute that represents whether all elements are of same type/class --- .../gremlin/process/traversal/Contains.java | 19 +++++- .../process/traversal/step/util/BulkSet.java | 63 ++++++++++++++++++ .../traversal/ContainsBulkSetTest.java | 65 +++++++++++++++++++ .../traversal/step/util/BulkSetTest.java | 53 ++++++++++++++- 4 files changed, 197 insertions(+), 3 deletions(-) create mode 100644 gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/ContainsBulkSetTest.java diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/Contains.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/Contains.java index 5a77811af08..72841a326d0 100644 --- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/Contains.java +++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/Contains.java @@ -18,10 +18,10 @@ */ package org.apache.tinkerpop.gremlin.process.traversal; -import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils; +import org.apache.tinkerpop.gremlin.process.traversal.step.util.BulkSet; +import org.apache.tinkerpop.gremlin.structure.Element; import java.util.Collection; -import java.util.Objects; import java.util.function.BiPredicate; /** @@ -52,6 +52,21 @@ public enum Contains implements BiPredicate { within { @Override public boolean test(final Object first, final Collection second) { + if (first instanceof Element && + second instanceof BulkSet && + ((BulkSet)second).allContainedElementsSameClass() && + ((BulkSet)second).getAllContainedElementsClass() != null && + Element.class.isAssignableFrom(((BulkSet)second).getAllContainedElementsClass()) && + first.getClass() == ((BulkSet)second).getAllContainedElementsClass()) { + /* + * For elements (i.e., vertices, edges, vertex properties) it is safe to use the contains check + * since the hash code computation and equals comparison give the same result as the Gremlin equality comparison + * (using GremlinValueComparator.COMPARABILITY.equals) based on the Gremlin comparison semantics + * (cf. ...). + * In both cases, we just compare the ids of the elements. Therefore, it is safe to use the contains check. + */ + return second.contains(first); + } GremlinTypeErrorException typeError = null; for (final Object o : second) { try { diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/util/BulkSet.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/util/BulkSet.java index 1368624406c..6c8dbafe61d 100644 --- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/util/BulkSet.java +++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/util/BulkSet.java @@ -42,6 +42,68 @@ public final class BulkSet extends AbstractSet implements Set, Serializable { private final Map map = new LinkedHashMap<>(); + + /** + * Represents the class/type of all added elements/objects in the bulk set if they are of the same class/type. + * If elements/objects of different types/classes are added, then the class is set to null (within the add) method. + * Note that it is not guaranteed that there some elements/objects are of different types/classes if the value is null. + * + * This is mainly used as an optimization in some cases. In fact, the contains within check can use this to improve + * the lookup whether vertices or edges are contained in the bulk set (see + * {@link org.apache.tinkerpop.gremlin.process.traversal.Contains#within Contains.within}). + * This works for elements (i.e., vertices, edges, vertex properties) since the + * hash code computation and equals comparison give the same result as the Gremlin equality comparison (using + * GremlinValueComparator.COMPARABILITY.equals) based on the Gremlin comparison semantics + * (cf. ...). + * For other types of objects, it is not completely clear, whether this would also result in the same results. + * + * The field is marked as transient such that it is not considered for serialization. + */ + private transient Class allContainedElementsClass = null; + private transient boolean allContainedElementsClassChecked = true; + + /** + * @return the class of all contained elements/objects if it is guaranteed that all are of the same type/class (but not + * necessarily, i.e., they can have the same type/class, but we may return null here if it was not analysed/identified). + * If no common class was identified, then null is returned. + */ + public Class getAllContainedElementsClass() { + if (allContainedElementsSameClass()) { + return allContainedElementsClass; + } else { + return null; + } + } + + /** + * @return true if it is guaranteed that all contained elements/objects are of the same type/class and not null (but not + * necessarily, i.e., they can have the same type/class, but we may return false here if it was not analysed/identified) + */ + public boolean allContainedElementsSameClass() { + if (!allContainedElementsClassChecked) { + allContainedElementsClass = null; + allContainedElementsClassChecked = true; + boolean hadNull = false; + for (final S key : this.map.keySet()) { + if ((key == null || key.getClass() == null)) { + if (allContainedElementsClass != null) { + allContainedElementsClass = null; + break; + } + hadNull = true; + } else if (hadNull) { + break; + } else if (allContainedElementsClass != null && !key.getClass().equals(allContainedElementsClass)) { + allContainedElementsClass = null; + break; + } else if (allContainedElementsClass == null) { + allContainedElementsClass = key.getClass(); + } + } + } + return allContainedElementsClass != null; + } + @Override public int size() { return (int) this.longSize(); @@ -89,6 +151,7 @@ public Map asBulk() { } public boolean add(final S s, final long bulk) { + allContainedElementsClassChecked = false; final Long current = this.map.get(s); if (current != null) { this.map.put(s, current + bulk); diff --git a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/ContainsBulkSetTest.java b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/ContainsBulkSetTest.java new file mode 100644 index 00000000000..85209ace6c6 --- /dev/null +++ b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/ContainsBulkSetTest.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.tinkerpop.gremlin.process.traversal; + +import org.apache.tinkerpop.gremlin.process.traversal.step.util.BulkSet; +import org.apache.tinkerpop.gremlin.structure.util.reference.ReferenceVertex; +import org.apache.tinkerpop.gremlin.structure.util.reference.ReferenceVertexProperty; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.runners.Enclosed; +import org.junit.rules.ExpectedException; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.mockito.Mockito; + +import static org.junit.Assert.assertEquals; +import static org.mockito.ArgumentMatchers.anyObject; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +@RunWith(Enclosed.class) +public class ContainsBulkSetTest { + + @Rule + public ExpectedException exceptionRule = ExpectedException.none(); + + @Test + public void bulkSetElements() { + final BulkSet bulkSet = Mockito.spy(new BulkSet<>()); + bulkSet.add(new ReferenceVertex("1"), 1); + bulkSet.add(new ReferenceVertex("2"), 1); + bulkSet.add(new ReferenceVertex("3"), 1); + + // If bulkset contains only one type of element, we should use contains + assertEquals(true, Contains.within.test(new ReferenceVertex("3"), bulkSet)); + assertEquals(false, Contains.within.test(new ReferenceVertex("4"), bulkSet)); + verify(bulkSet, times(2)).contains(anyObject()); + // we should also not use contains if we test for different type + assertEquals(false, Contains.within.test(new ReferenceVertexProperty<>("2", "label", "value"), bulkSet)); + verify(bulkSet, times(2)).contains(anyObject()); + + // If bulkset contains different types of elements, we can no longer use contains + bulkSet.add("test string", 1); + assertEquals(true, Contains.within.test(new ReferenceVertex("3"), bulkSet)); + assertEquals(false, Contains.within.test(new ReferenceVertex("4"), bulkSet)); + assertEquals(false, Contains.within.test(new ReferenceVertexProperty<>("2", "label", "value"), bulkSet)); + verify(bulkSet, times(2)).contains(anyObject()); + } +} diff --git a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/util/BulkSetTest.java b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/util/BulkSetTest.java index 9b4980ff089..43b13ae6815 100644 --- a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/util/BulkSetTest.java +++ b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/util/BulkSetTest.java @@ -18,7 +18,7 @@ */ package org.apache.tinkerpop.gremlin.process.traversal.step.util; -import org.apache.tinkerpop.gremlin.process.traversal.step.util.BulkSet; +import org.apache.tinkerpop.gremlin.structure.util.reference.ReferenceVertex; import org.junit.Test; import java.util.Iterator; @@ -28,6 +28,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; /** @@ -67,6 +68,56 @@ public void shouldHaveProperCountAndNotOutOfMemoryException() { assertEquals(10000000, list.size()); } + @Test + public void shouldHaveSameClassAsLongAsSameTypesAreAdded() { + final BulkSet bulkSet = new BulkSet<>(); + bulkSet.add(new ReferenceVertex("1"), 1); + bulkSet.add(new ReferenceVertex("2"), 1); + bulkSet.add(new ReferenceVertex("3"), 3); + assertTrue(bulkSet.allContainedElementsSameClass()); + assertEquals(bulkSet.getAllContainedElementsClass(), ReferenceVertex.class); + bulkSet.add(new ReferenceVertex("4"), 5); + assertEquals(bulkSet.getAllContainedElementsClass(), ReferenceVertex.class); + bulkSet.add(new Long(2), 5); + assertFalse(bulkSet.allContainedElementsSameClass()); + assertNull(bulkSet.getAllContainedElementsClass()); + } + + @Test + public void shouldNotHaveSameClassForDifferentTypes() { + final BulkSet bulkSet = new BulkSet<>(); + bulkSet.add(new ReferenceVertex("1"), 1); + bulkSet.add(new Long(2), 5); + bulkSet.add(new ReferenceVertex("3"), 3); + assertNull(bulkSet.getAllContainedElementsClass()); + assertFalse(bulkSet.allContainedElementsSameClass()); + bulkSet.add(new ReferenceVertex("4"), 5); + assertNull(bulkSet.getAllContainedElementsClass()); + assertFalse(bulkSet.allContainedElementsSameClass()); + } + + @Test + public void shouldNotHaveSameClassWhenEmpty() { + final BulkSet bulkSet = new BulkSet<>(); + assertNull(bulkSet.getAllContainedElementsClass()); + assertFalse(bulkSet.allContainedElementsSameClass()); + } + + @Test + public void shouldNotHaveSameClassForNull() { + final BulkSet bulkSet = new BulkSet<>(); + bulkSet.add(null, 5); + assertNull(bulkSet.getAllContainedElementsClass()); + assertFalse(bulkSet.allContainedElementsSameClass()); + assertFalse(bulkSet.isEmpty()); + bulkSet.add(new Long(2), 5); + assertNull(bulkSet.getAllContainedElementsClass()); + assertFalse(bulkSet.allContainedElementsSameClass()); + bulkSet.add(null, 3); + assertNull(bulkSet.getAllContainedElementsClass()); + assertFalse(bulkSet.allContainedElementsSameClass()); + } + @Test public void shouldHaveCorrectBulkCounts() { final BulkSet set = new BulkSet<>();