From 4c1d50d8e8cdd49f4f5011d6647add599baf2dac Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 24 Jul 2024 21:53:30 +0200 Subject: [PATCH] Save allocating some zero length byte arrays (#13608) Something I found in a heap dump. For large numbers of `FieldReader` where the minimum term is an empty string, we allocate MBs worth of empty `byte[]` in ES. Worth adding the conditional here I think. --- .../lucene90/blocktree/Lucene90BlockTreeTermsReader.java | 3 +-- lucene/core/src/java/org/apache/lucene/util/BytesRef.java | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java index 3871945e0a49..c2ff50215a3f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java @@ -270,9 +270,8 @@ private static BytesRef readBytesRef(IndexInput in) throws IOException { throw new CorruptIndexException("invalid bytes length: " + numBytes, in); } - BytesRef bytes = new BytesRef(); + BytesRef bytes = new BytesRef(numBytes); bytes.length = numBytes; - bytes.bytes = new byte[numBytes]; in.readBytes(bytes.bytes, 0, numBytes); return bytes; diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRef.java b/lucene/core/src/java/org/apache/lucene/util/BytesRef.java index 351259e87b51..4298668468f9 100644 --- a/lucene/core/src/java/org/apache/lucene/util/BytesRef.java +++ b/lucene/core/src/java/org/apache/lucene/util/BytesRef.java @@ -68,7 +68,7 @@ public BytesRef(byte[] bytes) { * both be zero. */ public BytesRef(int capacity) { - this.bytes = new byte[capacity]; + this.bytes = capacity == 0 ? EMPTY_BYTES : new byte[capacity]; } /** @@ -77,7 +77,7 @@ public BytesRef(int capacity) { * @param text This must be well-formed unicode text, with no unpaired surrogates. */ public BytesRef(CharSequence text) { - this(new byte[UnicodeUtil.maxUTF8Length(text.length())]); + this(UnicodeUtil.maxUTF8Length(text.length())); length = UnicodeUtil.UTF16toUTF8(text, 0, text.length(), bytes); }