Skip to content

Commit

Permalink
Improved contains check for bulkset for elements via a transient attr…
Browse files Browse the repository at this point in the history
…ibute that represents whether all elements are of same type/class
  • Loading branch information
steigma committed Jan 4, 2024
1 parent 6486ef8 commit 806d719
Show file tree
Hide file tree
Showing 4 changed files with 197 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
*/
package org.apache.tinkerpop.gremlin.process.traversal;

import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils;
import org.apache.tinkerpop.gremlin.process.traversal.step.util.BulkSet;
import org.apache.tinkerpop.gremlin.structure.Element;

import java.util.Collection;
import java.util.Objects;
import java.util.function.BiPredicate;

/**
Expand Down Expand Up @@ -52,6 +52,21 @@ public enum Contains implements BiPredicate<Object, Collection> {
within {
@Override
public boolean test(final Object first, final Collection second) {
if (first instanceof Element &&
second instanceof BulkSet<?> &&
((BulkSet<?>)second).allContainedElementsSameClass() &&
((BulkSet<?>)second).getAllContainedElementsClass() != null &&
Element.class.isAssignableFrom(((BulkSet<?>)second).getAllContainedElementsClass()) &&
first.getClass() == ((BulkSet<?>)second).getAllContainedElementsClass()) {
/*
* For elements (i.e., vertices, edges, vertex properties) it is safe to use the contains check
* since the hash code computation and equals comparison give the same result as the Gremlin equality comparison
* (using GremlinValueComparator.COMPARABILITY.equals) based on the Gremlin comparison semantics
* (cf. <a href="https://tinkerpop.apache.org/docs/3.7.0/dev/provider/#gremlin-semantics-concepts">...</a>).
* In both cases, we just compare the ids of the elements. Therefore, it is safe to use the contains check.
*/
return second.contains(first);
}
GremlinTypeErrorException typeError = null;
for (final Object o : second) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,68 @@
public final class BulkSet<S> extends AbstractSet<S> implements Set<S>, Serializable {
private final Map<S, Long> map = new LinkedHashMap<>();


/**
* Represents the class/type of all added elements/objects in the bulk set if they are of the same class/type.
* If elements/objects of different types/classes are added, then the class is set to null (within the add) method.
* Note that it is not guaranteed that there some elements/objects are of different types/classes if the value is null.
*
* This is mainly used as an optimization in some cases. In fact, the contains within check can use this to improve
* the lookup whether vertices or edges are contained in the bulk set (see
* {@link org.apache.tinkerpop.gremlin.process.traversal.Contains#within Contains.within}).
* This works for elements (i.e., vertices, edges, vertex properties) since the
* hash code computation and equals comparison give the same result as the Gremlin equality comparison (using
* GremlinValueComparator.COMPARABILITY.equals) based on the Gremlin comparison semantics
* (cf. <a href="https://tinkerpop.apache.org/docs/3.7.0/dev/provider/#gremlin-semantics-concepts">...</a>).
* For other types of objects, it is not completely clear, whether this would also result in the same results.
*
* The field is marked as transient such that it is not considered for serialization.
*/
private transient Class<?> allContainedElementsClass = null;
private transient boolean allContainedElementsClassChecked = true;

/**
* @return the class of all contained elements/objects if it is guaranteed that all are of the same type/class (but not
* necessarily, i.e., they can have the same type/class, but we may return null here if it was not analysed/identified).
* If no common class was identified, then null is returned.
*/
public Class<?> getAllContainedElementsClass() {
if (allContainedElementsSameClass()) {
return allContainedElementsClass;
} else {
return null;
}
}

/**
* @return true if it is guaranteed that all contained elements/objects are of the same type/class and not null (but not
* necessarily, i.e., they can have the same type/class, but we may return false here if it was not analysed/identified)
*/
public boolean allContainedElementsSameClass() {
if (!allContainedElementsClassChecked) {
allContainedElementsClass = null;
allContainedElementsClassChecked = true;
boolean hadNull = false;
for (final S key : this.map.keySet()) {
if ((key == null || key.getClass() == null)) {
if (allContainedElementsClass != null) {
allContainedElementsClass = null;
break;
}
hadNull = true;
} else if (hadNull) {
break;
} else if (allContainedElementsClass != null && !key.getClass().equals(allContainedElementsClass)) {
allContainedElementsClass = null;
break;
} else if (allContainedElementsClass == null) {
allContainedElementsClass = key.getClass();
}
}
}
return allContainedElementsClass != null;
}

@Override
public int size() {
return (int) this.longSize();
Expand Down Expand Up @@ -89,6 +151,7 @@ public Map<S, Long> asBulk() {
}

public boolean add(final S s, final long bulk) {
allContainedElementsClassChecked = false;
final Long current = this.map.get(s);
if (current != null) {
this.map.put(s, current + bulk);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.tinkerpop.gremlin.process.traversal;

import org.apache.tinkerpop.gremlin.process.traversal.step.util.BulkSet;
import org.apache.tinkerpop.gremlin.structure.util.reference.ReferenceVertex;
import org.apache.tinkerpop.gremlin.structure.util.reference.ReferenceVertexProperty;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.runners.Enclosed;
import org.junit.rules.ExpectedException;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.mockito.Mockito;

import static org.junit.Assert.assertEquals;
import static org.mockito.ArgumentMatchers.anyObject;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;

@RunWith(Enclosed.class)
public class ContainsBulkSetTest {

@Rule
public ExpectedException exceptionRule = ExpectedException.none();

@Test
public void bulkSetElements() {
final BulkSet<Object> bulkSet = Mockito.spy(new BulkSet<>());
bulkSet.add(new ReferenceVertex("1"), 1);
bulkSet.add(new ReferenceVertex("2"), 1);
bulkSet.add(new ReferenceVertex("3"), 1);

// If bulkset contains only one type of element, we should use contains
assertEquals(true, Contains.within.test(new ReferenceVertex("3"), bulkSet));
assertEquals(false, Contains.within.test(new ReferenceVertex("4"), bulkSet));
verify(bulkSet, times(2)).contains(anyObject());
// we should also not use contains if we test for different type
assertEquals(false, Contains.within.test(new ReferenceVertexProperty<>("2", "label", "value"), bulkSet));
verify(bulkSet, times(2)).contains(anyObject());

// If bulkset contains different types of elements, we can no longer use contains
bulkSet.add("test string", 1);
assertEquals(true, Contains.within.test(new ReferenceVertex("3"), bulkSet));
assertEquals(false, Contains.within.test(new ReferenceVertex("4"), bulkSet));
assertEquals(false, Contains.within.test(new ReferenceVertexProperty<>("2", "label", "value"), bulkSet));
verify(bulkSet, times(2)).contains(anyObject());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
*/
package org.apache.tinkerpop.gremlin.process.traversal.step.util;

import org.apache.tinkerpop.gremlin.process.traversal.step.util.BulkSet;
import org.apache.tinkerpop.gremlin.structure.util.reference.ReferenceVertex;
import org.junit.Test;

import java.util.Iterator;
Expand All @@ -28,6 +28,7 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;

/**
Expand Down Expand Up @@ -67,6 +68,56 @@ public void shouldHaveProperCountAndNotOutOfMemoryException() {
assertEquals(10000000, list.size());
}

@Test
public void shouldHaveSameClassAsLongAsSameTypesAreAdded() {
final BulkSet<Object> bulkSet = new BulkSet<>();
bulkSet.add(new ReferenceVertex("1"), 1);
bulkSet.add(new ReferenceVertex("2"), 1);
bulkSet.add(new ReferenceVertex("3"), 3);
assertTrue(bulkSet.allContainedElementsSameClass());
assertEquals(bulkSet.getAllContainedElementsClass(), ReferenceVertex.class);
bulkSet.add(new ReferenceVertex("4"), 5);
assertEquals(bulkSet.getAllContainedElementsClass(), ReferenceVertex.class);
bulkSet.add(new Long(2), 5);
assertFalse(bulkSet.allContainedElementsSameClass());
assertNull(bulkSet.getAllContainedElementsClass());
}

@Test
public void shouldNotHaveSameClassForDifferentTypes() {
final BulkSet<Object> bulkSet = new BulkSet<>();
bulkSet.add(new ReferenceVertex("1"), 1);
bulkSet.add(new Long(2), 5);
bulkSet.add(new ReferenceVertex("3"), 3);
assertNull(bulkSet.getAllContainedElementsClass());
assertFalse(bulkSet.allContainedElementsSameClass());
bulkSet.add(new ReferenceVertex("4"), 5);
assertNull(bulkSet.getAllContainedElementsClass());
assertFalse(bulkSet.allContainedElementsSameClass());
}

@Test
public void shouldNotHaveSameClassWhenEmpty() {
final BulkSet<Object> bulkSet = new BulkSet<>();
assertNull(bulkSet.getAllContainedElementsClass());
assertFalse(bulkSet.allContainedElementsSameClass());
}

@Test
public void shouldNotHaveSameClassForNull() {
final BulkSet<Object> bulkSet = new BulkSet<>();
bulkSet.add(null, 5);
assertNull(bulkSet.getAllContainedElementsClass());
assertFalse(bulkSet.allContainedElementsSameClass());
assertFalse(bulkSet.isEmpty());
bulkSet.add(new Long(2), 5);
assertNull(bulkSet.getAllContainedElementsClass());
assertFalse(bulkSet.allContainedElementsSameClass());
bulkSet.add(null, 3);
assertNull(bulkSet.getAllContainedElementsClass());
assertFalse(bulkSet.allContainedElementsSameClass());
}

@Test
public void shouldHaveCorrectBulkCounts() {
final BulkSet<String> set = new BulkSet<>();
Expand Down

0 comments on commit 806d719

Please sign in to comment.