From a23021d05d882f6c7b621159e151b4b076a7e98b Mon Sep 17 00:00:00 2001 From: Eduard Tudenhoefner Date: Mon, 22 Apr 2024 09:42:18 +0200 Subject: [PATCH] Core: Lazily compute & cache hashCode in CharSequenceWrapper (#10023) --- .../org/apache/iceberg/types/JavaHashes.java | 4 + .../iceberg/util/CharSequenceWrapper.java | 25 ++++- .../iceberg/util/TestCharSequenceMap.java | 7 ++ .../iceberg/util/TestCharSequenceSet.java | 6 ++ .../iceberg/util/TestCharSequenceWrapper.java | 93 +++++++++++++++++++ 5 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 api/src/test/java/org/apache/iceberg/util/TestCharSequenceWrapper.java diff --git a/api/src/main/java/org/apache/iceberg/types/JavaHashes.java b/api/src/main/java/org/apache/iceberg/types/JavaHashes.java index c25198990013..9a14f7639f07 100644 --- a/api/src/main/java/org/apache/iceberg/types/JavaHashes.java +++ b/api/src/main/java/org/apache/iceberg/types/JavaHashes.java @@ -26,6 +26,10 @@ public class JavaHashes { private JavaHashes() {} public static int hashCode(CharSequence str) { + if (null == str) { + return 0; + } + int result = 177; for (int i = 0; i < str.length(); i += 1) { char ch = str.charAt(i); diff --git a/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java b/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java index fcccb9eac090..854264c1ae21 100644 --- a/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java +++ b/api/src/main/java/org/apache/iceberg/util/CharSequenceWrapper.java @@ -29,6 +29,11 @@ public static CharSequenceWrapper wrap(CharSequence seq) { } private CharSequence wrapped; + // lazily computed & cached hashCode + private transient int hashCode = 0; + // tracks if the hash has been calculated as actually being zero to avoid re-calculating the hash. + // this follows the hashCode() implementation from java.lang.String + private transient boolean hashIsZero = false; private CharSequenceWrapper(CharSequence wrapped) { this.wrapped = wrapped; @@ -36,6 +41,8 @@ private CharSequenceWrapper(CharSequence wrapped) { public CharSequenceWrapper set(CharSequence newWrapped) { this.wrapped = newWrapped; + this.hashCode = 0; + this.hashIsZero = false; return this; } @@ -58,6 +65,10 @@ public boolean equals(Object other) { return wrapped.equals(that.wrapped); } + if (null == wrapped && null == that.wrapped) { + return true; + } + if (length() != that.length()) { return false; } @@ -67,7 +78,19 @@ public boolean equals(Object other) { @Override public int hashCode() { - return JavaHashes.hashCode(wrapped); + int hash = hashCode; + + // don't recalculate if the hash is actually 0 + if (hash == 0 && !hashIsZero) { + hash = JavaHashes.hashCode(wrapped); + if (hash == 0) { + hashIsZero = true; + } else { + this.hashCode = hash; + } + } + + return hash; } @Override diff --git a/api/src/test/java/org/apache/iceberg/util/TestCharSequenceMap.java b/api/src/test/java/org/apache/iceberg/util/TestCharSequenceMap.java index 47d686d3abbe..8ca7889b4717 100644 --- a/api/src/test/java/org/apache/iceberg/util/TestCharSequenceMap.java +++ b/api/src/test/java/org/apache/iceberg/util/TestCharSequenceMap.java @@ -25,10 +25,17 @@ import java.util.concurrent.TimeUnit; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; public class TestCharSequenceMap { + @Test + public void nullString() { + Assertions.assertThat(CharSequenceMap.create()).doesNotContainKey((String) null); + Assertions.assertThat(CharSequenceMap.create()).doesNotContainValue((String) null); + } + @Test public void testEmptyMap() { CharSequenceMap map = CharSequenceMap.create(); diff --git a/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java b/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java index 9420548ca9aa..b0f242c177f6 100644 --- a/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java +++ b/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java @@ -39,6 +39,12 @@ public void testSearchingInCharSequenceCollection() { Assertions.assertThat(set.contains("def")).isTrue(); } + @Test + public void nullString() { + Assertions.assertThat(CharSequenceSet.of(Arrays.asList((String) null))).contains((String) null); + Assertions.assertThat(CharSequenceSet.empty()).doesNotContain((String) null); + } + @Test public void testRetainAll() { CharSequenceSet set = CharSequenceSet.of(ImmutableList.of("123", "456")); diff --git a/api/src/test/java/org/apache/iceberg/util/TestCharSequenceWrapper.java b/api/src/test/java/org/apache/iceberg/util/TestCharSequenceWrapper.java new file mode 100644 index 000000000000..cdf46ee8d7fa --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/util/TestCharSequenceWrapper.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.util; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.Test; + +public class TestCharSequenceWrapper { + + @Test + public void nullWrapper() { + CharSequenceWrapper one = CharSequenceWrapper.wrap(null); + CharSequenceWrapper two = CharSequenceWrapper.wrap(null); + + // at this point hashCode is not computed yet + assertThat(one).isEqualTo(two); + + // hashCode is lazily computed and stored + assertThat(one.hashCode()).isEqualTo(two.hashCode()).isEqualTo(0); + + assertThat(one).isEqualTo(two); + } + + @Test + public void equalsWithLazyHashCode() { + CharSequenceWrapper string = CharSequenceWrapper.wrap("v1"); + CharSequenceWrapper buffer = CharSequenceWrapper.wrap(new StringBuffer("v1")); + CharSequenceWrapper builder = CharSequenceWrapper.wrap(new StringBuilder("v1")); + + // at this point hashCode is 0 for all + assertThat(string).isEqualTo(buffer).isEqualTo(builder); + + // hashCode is lazily computed and stored + assertThat(string.hashCode()).isEqualTo(buffer.hashCode()).isEqualTo(builder.hashCode()); + + assertThat(string).isEqualTo(buffer).isEqualTo(builder); + } + + @Test + public void notEqualsWithLazyHashCode() { + CharSequenceWrapper v1 = CharSequenceWrapper.wrap("v1"); + CharSequenceWrapper v2 = CharSequenceWrapper.wrap("v2"); + + // at this point hashCode is 0 for all + assertThat(v1).isNotEqualTo(v2); + + // hashCode is lazily computed and stored + assertThat(v1.hashCode()).isNotEqualTo(v2.hashCode()); + + assertThat(v1).isNotEqualTo(v2); + } + + @Test + public void hashCodeIsRecomputed() { + CharSequenceWrapper wrapper = CharSequenceWrapper.wrap("v1"); + assertThat(wrapper.hashCode()).isEqualTo(173804); + + wrapper.set("v2"); + assertThat(wrapper.hashCode()).isEqualTo(173805); + + wrapper.set(new StringBuffer("v2")); + assertThat(wrapper.hashCode()).isEqualTo(173805); + + wrapper.set(new StringBuilder("v2")); + assertThat(wrapper.hashCode()).isEqualTo(173805); + + wrapper.set("v3"); + assertThat(wrapper.hashCode()).isEqualTo(173806); + + wrapper.set(null); + assertThat(wrapper.hashCode()).isEqualTo(0); + + wrapper.set("v2"); + assertThat(wrapper.hashCode()).isEqualTo(173805); + } +}