Skip to content

Commit

Permalink
Java: Add blocking direct binary encoder
Browse files Browse the repository at this point in the history
  • Loading branch information
Fokko committed Sep 25, 2023
1 parent 3bbde59 commit 6cf27d9
Show file tree
Hide file tree
Showing 6 changed files with 244 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public void writeString(Utf8 utf8) throws IOException {

@Override
public void writeString(String string) throws IOException {
if (0 == string.length()) {
if (string.isEmpty()) {
writeZero();
return;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro.io;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;

/**
* An {@link Encoder} for Avro's binary encoding that does not buffer output.
* <p/>
* This encoder does not buffer writes, and as a result is slower than
* {@link BufferedBinaryEncoder}. However, it is lighter-weight and useful when
* the buffering in BufferedBinaryEncoder is not desired and/or the Encoder is
* very short-lived.
* <p/>
* To construct, use
* {@link EncoderFactory#directBinaryEncoder(OutputStream, BinaryEncoder)}
* <p/>
* DirectBinaryEncoder is not thread-safe
*
* @see BinaryEncoder
* @see EncoderFactory
* @see Encoder
* @see Decoder
*/
public class BlockingDirectBinaryEncoder extends DirectBinaryEncoder {
private OutputStream originalStream;

private final ByteArrayOutputStream buffer;

private boolean inBlock = false;

private long blockItemCount;

/**
* Create a writer that sends its output to the underlying stream
* <code>out</code>.
*
* @param out The Outputstream to write to
*/
public BlockingDirectBinaryEncoder(OutputStream out) {
super(out);
buffer = new ByteArrayOutputStream();
}

private void startBlock() {
if (inBlock) {
throw new RuntimeException("Nested Maps/Arrays are not supported by the BlockingDirectBinaryEncoder");
}
originalStream = out;
out = buffer;
inBlock = true;
}

private void endBlock() {
if (!inBlock) {
throw new RuntimeException("Called endBlock, while not buffering a block");
}
out = originalStream;
if (blockItemCount > 0) {
try {
// Make it negative, so the reader knows that the number of bytes is coming
writeLong(-blockItemCount);
writeLong(buffer.size());
writeBytes(buffer.toByteArray());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
inBlock = false;
buffer.reset();
}

@Override
public void setItemCount(long itemCount) throws IOException {
blockItemCount = itemCount;
}

@Override
public void writeArrayStart() throws IOException {
startBlock();
}

@Override
public void writeArrayEnd() throws IOException {
endBlock();
// Writes another zero to indicate that this is the last block
super.writeArrayEnd();
}

@Override
public void writeMapStart() throws IOException {
startBlock();
}

@Override
public void writeMapEnd() throws IOException {
endBlock();
// Writes another zero to indicate that this is the last block
super.writeMapEnd();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,28 @@
* This encoder does not buffer writes, and as a result is slower than
* {@link BufferedBinaryEncoder}. However, it is lighter-weight and useful when
* the buffering in BufferedBinaryEncoder is not desired and/or the Encoder is
* very short lived.
* very short-lived.
* <p/>
* To construct, use
* {@link EncoderFactory#directBinaryEncoder(OutputStream, BinaryEncoder)}
* <p/>
* DirectBinaryEncoder is not thread-safe
*
*
* @see BinaryEncoder
* @see EncoderFactory
* @see Encoder
* @see Decoder
*/
public class DirectBinaryEncoder extends BinaryEncoder {
private OutputStream out;
protected OutputStream out;
// the buffer is used for writing floats, doubles, and large longs.
private final byte[] buf = new byte[12];

/**
* Create a writer that sends its output to the underlying stream
* <code>out</code>.
**/
DirectBinaryEncoder(OutputStream out) {
public DirectBinaryEncoder(OutputStream out) {
configure(out);
}

Expand All @@ -69,7 +69,7 @@ public void writeBoolean(boolean b) throws IOException {
}

/*
* buffering is slower for ints that encode to just 1 or two bytes, and and
* buffering is slower for ints that encode to just 1 or two bytes, and
* faster for large ones. (Sun JRE 1.6u22, x64 -server)
*/
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,48 @@ public BinaryEncoder directBinaryEncoder(OutputStream out, BinaryEncoder reuse)
}
}

/**
* Creates or reinitializes a {@link BlockingDirectBinaryEncoder} with the OutputStream
* provided as the destination for written data. If <i>reuse</i> is provided, an
* attempt will be made to reconfigure <i>reuse</i> rather than construct a new
* instance, but this is not guaranteed, a new instance may be returned.
* <p/>
* The {@link BinaryEncoder} implementation returned does not buffer its output,
* calling {@link Encoder#flush()} will simply cause the wrapped OutputStream to
* be flushed.
* <p/>
* The {@link BlockingDirectBinaryEncoder} will write the block sizes for the
* arrays and maps so efficient skipping can be done.
* <p/>
* Performance of unbuffered writes can be significantly slower than buffered
* writes. {@link #binaryEncoder(OutputStream, BinaryEncoder)} returns
* BinaryEncoder instances that are tuned for performance but may buffer output.
* The unbuffered, 'direct' encoder may be desired when buffering semantics are
* problematic, or if the lifetime of the encoder is so short that the buffer
* would not be useful.
* <p/>
* {@link BinaryEncoder} instances returned by this method are not thread-safe.
*
* @param out The OutputStream to initialize to. Cannot be null.
* @param reuse The BinaryEncoder to <i>attempt</i> to reuse given the factory
* configuration. A BinaryEncoder implementation may not be
* compatible with reuse, causing a new instance to be returned. If
* null, a new instance is returned.
* @return A BinaryEncoder that uses <i>out</i> as its data output. If
* <i>reuse</i> is null, this will be a new instance. If <i>reuse</i> is
* not null, then the returned instance may be a new instance or
* <i>reuse</i> reconfigured to use <i>out</i>.
* @see DirectBinaryEncoder
* @see Encoder
*/
public BinaryEncoder blockingDirectBinaryEncoder(OutputStream out, BinaryEncoder reuse) {
if (null == reuse || !reuse.getClass().equals(BlockingDirectBinaryEncoder.class)) {
return new BlockingDirectBinaryEncoder(out);
} else {
return ((DirectBinaryEncoder) reuse).configure(out);
}
}

/**
* Creates or reinitializes a {@link BinaryEncoder} with the OutputStream
* provided as the destination for written data. If <i>reuse</i> is provided, an
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,50 @@ void directBinaryEncoder() throws IOException {
assertArrayEquals(complexdata, result2);
}

@Test
void blockingDirectBinaryEncoder() throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
BinaryEncoder e = factory.blockingDirectBinaryEncoder(baos, null);
generateData(e, true);

byte[] result = baos.toByteArray();
assertEquals(legacydata.length, result.length);
assertArrayEquals(legacydata, result);
baos.reset();

generateComplexData(e);
byte[] result2 = baos.toByteArray();
// blocking will cause different length, should be four bytes larger
assertEquals(complexdata.length + 4, result2.length);
// the first byte is the array start, with the count of items negative
assertEquals(complexdata[0] >>> 1, result2[0]);
baos.reset();

e.writeArrayStart();
e.setItemCount(1);
e.startItem();
e.writeInt(1);
e.writeArrayEnd();

// 1: 1 element in the array
// 2: 2 bytes for the int
// 2-3: data
// 4: 0 elements in the next block
assertArrayEquals(baos.toByteArray(), new byte[]{1, 2, 2, 2, 0});
baos.reset();

e.writeArrayStart();
e.setItemCount(0);
e.writeArrayEnd();

// This is correct
// 0: 0 elements in the block
assertArrayEquals(baos.toByteArray(), new byte[]{0});
baos.reset();

baos.reset();
}

@Test
void blockingBinaryEncoder() throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
Expand Down
Loading

0 comments on commit 6cf27d9

Please sign in to comment.