From b1ce6080776758de8daf4ab2abdc88e4754235b8 Mon Sep 17 00:00:00 2001 From: Grzegorz Piwowarek Date: Wed, 28 Aug 2024 17:46:06 +0200 Subject: [PATCH] Improve hashCode() implementation for Tuple2 (#2803) The current implementation of the hashCode() method for `Tuple2` has a significant flaw that can easily lead to hash collisions, particularly in data structures like `LinkedHashMap`. ```java LinkedHashMap m1 = LinkedHashMap.of("a", 1, "b", 2); LinkedHashMap m2 = LinkedHashMap.of("a", 2, "b", 1); System.out.println(m1.hashCode() == m2.hashCode()); // true ``` In this case, _m1_ and _m2_ should ideally produce different hash codes, but they don't. This is because the current implementation of `Tuple#hashCode()` simply sums the hash codes of all elements, which can result in identical hash codes for different tuples. A potential solution is to apply the XOR (^) operator to the hash codes of all elements. XOR is order-sensitive, so it would resolve the issue in the example above, where the order of elements differs between tuples. However, XOR has its limitations and is only a suitable solution for tuples with up to two elements. This is because XOR is a commutative and associative operation, meaning that the XOR of multiple elements can still result in rather bad collisions: ```java Tuple3 t1 = Tuple.of(1, 2, 3); Tuple3 t2 = Tuple.of(1, 3, 2); System.out.println(t1.hashCode() == t2.hashCode()); // true ``` The new implementation is not perfect as well: ```java HashMap hm1 = HashMap.of(0, 1, 1, 2); HashMap hm2 = HashMap.of(0, 2, 1, 3); System.out.println(hm1.hashCode() == hm2.hashCode()); // true ``` But it is imperfect in the same way as standard library: ```java java.util.HashMap jhm1 = new java.util.HashMap<>(); jhm1.put(0, 1); jhm1.put(1, 2); java.util.HashMap jhm2 = new java.util.HashMap<>(); jhm2.put(0, 2); jhm2.put(1, 3); System.out.println(jhm1.hashCode() == jhm2.hashCode()); // true ``` ---- Related: https://github.com/vavr-io/vavr/issues/2733 --- generator/Generator.sc | 4 ++-- src-gen/main/java/io/vavr/Tuple2.java | 2 +- src-gen/test/java/io/vavr/Tuple2Test.java | 4 ++-- src/test/java/io/vavr/TupleTest.java | 2 +- .../java/io/vavr/collection/AbstractMapTest.java | 7 +++++++ .../io/vavr/collection/AbstractMultimapTest.java | 9 +++++++++ .../java/io/vavr/collection/HashMultimapTest.java | 14 ++++++++++++++ .../io/vavr/collection/LinkedHashMultimapTest.java | 14 ++++++++++++++ .../java/io/vavr/collection/TreeMultimapTest.java | 14 ++++++++++++++ 9 files changed, 64 insertions(+), 6 deletions(-) diff --git a/generator/Generator.sc b/generator/Generator.sc index 5f42a1ad4b..fc0441cfb3 100644 --- a/generator/Generator.sc +++ b/generator/Generator.sc @@ -2236,7 +2236,7 @@ def generateMainClasses(): Unit = { @Override public int hashCode() { - return ${if (i == 0) "1" else if (i == 1) "Objects.hashCode(_1)" else s"""Objects.hash(${(1 to i).gen(j => s"_$j")(", ")})"""}; + return ${if (i == 0) "1" else if (i == 1) "Objects.hashCode(_1)" else if (i == 2) "Objects.hashCode(_1) ^ Objects.hashCode(_2)" else s"""Objects.hash(${(1 to i).gen(j => s"_$j")(", ")})"""}; } @Override @@ -3715,7 +3715,7 @@ def generateTestClasses(): Unit = { @$test public void shouldComputeCorrectHashCode() { final int actual = createTuple().hashCode(); - final int expected = ${im.getType("java.util.Objects")}.${if (i == 1) "hashCode" else "hash"}($nullArgs); + final int expected = ${if (i == 2) s"new ${im.getType("java.util.AbstractMap.SimpleEntry")}<>($nullArgs)" else im.getType("java.util.Objects")}.${if (i == 2) "hashCode()" else if (i == 1) s"hashCode($nullArgs)" else s"hash($nullArgs)"}; $assertThat(actual).isEqualTo(expected); } diff --git a/src-gen/main/java/io/vavr/Tuple2.java b/src-gen/main/java/io/vavr/Tuple2.java index 8258f6a527..314673b788 100644 --- a/src-gen/main/java/io/vavr/Tuple2.java +++ b/src-gen/main/java/io/vavr/Tuple2.java @@ -394,7 +394,7 @@ public boolean equals(Object o) { @Override public int hashCode() { - return Objects.hash(_1, _2); + return Objects.hashCode(_1) ^ Objects.hashCode(_2); } @Override diff --git a/src-gen/test/java/io/vavr/Tuple2Test.java b/src-gen/test/java/io/vavr/Tuple2Test.java index 17200e6a77..160ca58cdc 100644 --- a/src-gen/test/java/io/vavr/Tuple2Test.java +++ b/src-gen/test/java/io/vavr/Tuple2Test.java @@ -36,9 +36,9 @@ import io.vavr.collection.Seq; import io.vavr.collection.Stream; import java.util.AbstractMap; +import java.util.AbstractMap.SimpleEntry; import java.util.Comparator; import java.util.Map; -import java.util.Objects; import org.junit.Test; public class Tuple2Test { @@ -237,7 +237,7 @@ public void shouldRecognizeNonEqualityPerComponent() { @Test public void shouldComputeCorrectHashCode() { final int actual = createTuple().hashCode(); - final int expected = Objects.hash(null, null); + final int expected = new SimpleEntry<>(null, null).hashCode(); assertThat(actual).isEqualTo(expected); } diff --git a/src/test/java/io/vavr/TupleTest.java b/src/test/java/io/vavr/TupleTest.java index 82fde37bbb..9ae3589fac 100644 --- a/src/test/java/io/vavr/TupleTest.java +++ b/src/test/java/io/vavr/TupleTest.java @@ -164,7 +164,7 @@ public void shouldCreateTuple2FromEntry() { @Test public void shouldHashTuple2() { final Tuple2 t = tuple2(); - assertThat(t.hashCode()).isEqualTo(Objects.hash(t._1, t._2)); + assertThat(t.hashCode()).isEqualTo(new AbstractMap.SimpleEntry<>(t._1, t._2).hashCode()); } @Test diff --git a/src/test/java/io/vavr/collection/AbstractMapTest.java b/src/test/java/io/vavr/collection/AbstractMapTest.java index ccd2245fbe..51272a351b 100644 --- a/src/test/java/io/vavr/collection/AbstractMapTest.java +++ b/src/test/java/io/vavr/collection/AbstractMapTest.java @@ -1441,4 +1441,11 @@ public void shouldNonNilGroupByIdentity() { assertThat(actual).isEqualTo(expected); } + // -- hashCode + + @Override + @Test + public void shouldCalculateDifferentHashCodesForDifferentTraversables() { + assertThat(mapOf('a', 2, 'b', 1).hashCode()).isNotEqualTo(mapOf('a', 1, 'b', 2).hashCode()); + } } diff --git a/src/test/java/io/vavr/collection/AbstractMultimapTest.java b/src/test/java/io/vavr/collection/AbstractMultimapTest.java index fd4a5788b3..b2abb2c7d8 100644 --- a/src/test/java/io/vavr/collection/AbstractMultimapTest.java +++ b/src/test/java/io/vavr/collection/AbstractMultimapTest.java @@ -183,6 +183,8 @@ protected , T2> Multimap emptyMap() { abstract protected , V> Multimap mapOfPairs(K k1, V v1, K k, V v2, K k3, V v3); + abstract protected , V> Multimap mapOf(K k1, V v1, K k2, V v2); + abstract protected , V> Multimap mapOf(K key, V value); abstract protected , V> Multimap mapOf(java.util.Map map); @@ -1198,4 +1200,11 @@ public void shouldCollectUsingMultimap() { // java.lang.ClassCastException: io.vavr.collection.List$Cons cannot be cast to java.lang.Comparable } + // -- hashCode + + @Override + @Test + public void shouldCalculateDifferentHashCodesForDifferentTraversables() { + assertThat(mapOf("a", 2, "b", 1).hashCode()).isNotEqualTo(mapOf("a", 1, "b", 2).hashCode()); + } } diff --git a/src/test/java/io/vavr/collection/HashMultimapTest.java b/src/test/java/io/vavr/collection/HashMultimapTest.java index 31cf940350..1a5e44a6d9 100644 --- a/src/test/java/io/vavr/collection/HashMultimapTest.java +++ b/src/test/java/io/vavr/collection/HashMultimapTest.java @@ -98,6 +98,20 @@ protected , V> HashMultimap mapOfPairs(K k1, V v1, } } + @Override + protected , V> Multimap mapOf(K k1, V v1, K k2, V v2) { + switch (containerType) { + case SEQ: + return HashMultimap.withSeq().of(k1, v1, k2, v2); + case SET: + return HashMultimap.withSet().of(k1, v1, k2, v2); + case SORTED_SET: + return HashMultimap.withSortedSet(Comparators.naturalComparator()).of(k1, v1, k2, v2); + default: + throw new RuntimeException(); + } + } + @Override protected , V> HashMultimap mapOf(K key, V value) { switch (containerType) { diff --git a/src/test/java/io/vavr/collection/LinkedHashMultimapTest.java b/src/test/java/io/vavr/collection/LinkedHashMultimapTest.java index d3aca69932..5212e5b724 100644 --- a/src/test/java/io/vavr/collection/LinkedHashMultimapTest.java +++ b/src/test/java/io/vavr/collection/LinkedHashMultimapTest.java @@ -98,6 +98,20 @@ protected , V> LinkedHashMultimap mapOfPairs(K k1, } } + @Override + protected , V> Multimap mapOf(K k1, V v1, K k2, V v2) { + switch (containerType) { + case SEQ: + return LinkedHashMultimap.withSeq().of(k1, v1, k2, v2); + case SET: + return LinkedHashMultimap.withSet().of(k1, v1, k2, v2); + case SORTED_SET: + return LinkedHashMultimap.withSortedSet(Comparators.naturalComparator()).of(k1, v1, k2, v2); + default: + throw new RuntimeException(); + } + } + @Override protected , V> LinkedHashMultimap mapOf(K key, V value) { switch (containerType) { diff --git a/src/test/java/io/vavr/collection/TreeMultimapTest.java b/src/test/java/io/vavr/collection/TreeMultimapTest.java index e5b53c9d69..3a3051666c 100644 --- a/src/test/java/io/vavr/collection/TreeMultimapTest.java +++ b/src/test/java/io/vavr/collection/TreeMultimapTest.java @@ -98,6 +98,20 @@ protected , V> TreeMultimap mapOfPairs(K k1, V v1, } } + @Override + protected , V> Multimap mapOf(K k1, V v1, K k2, V v2) { + switch (containerType) { + case SEQ: + return TreeMultimap.withSeq().of(k1, v1, k2, v2); + case SET: + return TreeMultimap.withSet().of(k1, v1, k2, v2); + case SORTED_SET: + return TreeMultimap.withSortedSet(Comparators.naturalComparator()).of(k1, v1, k2, v2); + default: + throw new RuntimeException(); + } + } + @Override protected , V> TreeMultimap mapOf(K key, V value) { switch (containerType) {