From 4c1d50d8e8cdd49f4f5011d6647add599baf2dac Mon Sep 17 00:00:00 2001
From: Armin Braun <me@obrown.io>
Date: Wed, 24 Jul 2024 21:53:30 +0200
Subject: [PATCH] Save allocating some zero length byte arrays (#13608)

Something I found in a heap dump. For large numbers of `FieldReader`
where the minimum term is an empty string, we allocate MBs worth of
empty `byte[]` in ES. Worth adding the conditional here I think.
---
 .../lucene90/blocktree/Lucene90BlockTreeTermsReader.java      | 3 +--
 lucene/core/src/java/org/apache/lucene/util/BytesRef.java     | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java
index 3871945e0a49..c2ff50215a3f 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java
@@ -270,9 +270,8 @@ private static BytesRef readBytesRef(IndexInput in) throws IOException {
       throw new CorruptIndexException("invalid bytes length: " + numBytes, in);
     }
 
-    BytesRef bytes = new BytesRef();
+    BytesRef bytes = new BytesRef(numBytes);
     bytes.length = numBytes;
-    bytes.bytes = new byte[numBytes];
     in.readBytes(bytes.bytes, 0, numBytes);
 
     return bytes;
diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRef.java b/lucene/core/src/java/org/apache/lucene/util/BytesRef.java
index 351259e87b51..4298668468f9 100644
--- a/lucene/core/src/java/org/apache/lucene/util/BytesRef.java
+++ b/lucene/core/src/java/org/apache/lucene/util/BytesRef.java
@@ -68,7 +68,7 @@ public BytesRef(byte[] bytes) {
    * both be zero.
    */
   public BytesRef(int capacity) {
-    this.bytes = new byte[capacity];
+    this.bytes = capacity == 0 ? EMPTY_BYTES : new byte[capacity];
   }
 
   /**
@@ -77,7 +77,7 @@ public BytesRef(int capacity) {
    * @param text This must be well-formed unicode text, with no unpaired surrogates.
    */
   public BytesRef(CharSequence text) {
-    this(new byte[UnicodeUtil.maxUTF8Length(text.length())]);
+    this(UnicodeUtil.maxUTF8Length(text.length()));
     length = UnicodeUtil.UTF16toUTF8(text, 0, text.length(), bytes);
   }