Skip to content

Commit

Permalink
Rename RLEv1/v2 reader/writer to decoder/encoder
Browse files Browse the repository at this point in the history
  • Loading branch information
Jefffrey committed Sep 29, 2024
1 parent 5569e0e commit be8e1f1
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 26 deletions.
14 changes: 8 additions & 6 deletions src/encoding/integer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ use std::{
};

use num::{traits::CheckedShl, PrimInt, Signed};
use rle_v1::RleReaderV1;
use rle_v2::RleReaderV2;
use rle_v1::RleV1Decoder;
use rle_v2::RleV2Decoder;
use snafu::ResultExt;
use util::{
get_closest_aligned_bit_width, signed_msb_decode, signed_zigzag_decode, signed_zigzag_encode,
Expand All @@ -52,10 +52,10 @@ pub fn get_unsigned_rle_reader<R: Read + Send + 'static>(
) -> Box<dyn PrimitiveValueDecoder<i64> + Send> {
match column.encoding().kind() {
ProtoColumnKind::Direct | ProtoColumnKind::Dictionary => {
Box::new(RleReaderV1::<i64, _, UnsignedEncoding>::new(reader))
Box::new(RleV1Decoder::<i64, _, UnsignedEncoding>::new(reader))
}
ProtoColumnKind::DirectV2 | ProtoColumnKind::DictionaryV2 => {
Box::new(RleReaderV2::<i64, _, UnsignedEncoding>::new(reader))
Box::new(RleV2Decoder::<i64, _, UnsignedEncoding>::new(reader))
}
}
}
Expand All @@ -65,8 +65,10 @@ pub fn get_rle_reader<N: NInt, R: Read + Send + 'static>(
reader: R,
) -> Result<Box<dyn PrimitiveValueDecoder<N> + Send>> {
match column.encoding().kind() {
ProtoColumnKind::Direct => Ok(Box::new(RleReaderV1::<N, _, SignedEncoding>::new(reader))),
ProtoColumnKind::DirectV2 => Ok(Box::new(RleReaderV2::<N, _, SignedEncoding>::new(reader))),
ProtoColumnKind::Direct => Ok(Box::new(RleV1Decoder::<N, _, SignedEncoding>::new(reader))),
ProtoColumnKind::DirectV2 => {
Ok(Box::new(RleV2Decoder::<N, _, SignedEncoding>::new(reader)))
}
k => InvalidColumnEncodingSnafu {
name: column.name(),
encoding: k,
Expand Down
8 changes: 4 additions & 4 deletions src/encoding/integer/rle_v1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,14 @@ impl EncodingType {
}

/// Decodes a stream of Integer Run Length Encoded version 1 bytes.
pub struct RleReaderV1<N: NInt, R: Read, S: EncodingSign> {
pub struct RleV1Decoder<N: NInt, R: Read, S: EncodingSign> {
reader: R,
decoded_ints: Vec<N>,
current_head: usize,
sign: PhantomData<S>,
}

impl<N: NInt, R: Read, S: EncodingSign> RleReaderV1<N, R, S> {
impl<N: NInt, R: Read, S: EncodingSign> RleV1Decoder<N, R, S> {
pub fn new(reader: R) -> Self {
Self {
reader,
Expand Down Expand Up @@ -137,7 +137,7 @@ fn read_run<N: NInt, R: Read, S: EncodingSign>(
Ok(())
}

impl<N: NInt, R: Read, S: EncodingSign> PrimitiveValueDecoder<N> for RleReaderV1<N, R, S> {
impl<N: NInt, R: Read, S: EncodingSign> PrimitiveValueDecoder<N> for RleV1Decoder<N, R, S> {
// TODO: this is exact duplicate from RLEv2 version; deduplicate it
fn decode(&mut self, out: &mut [N]) -> Result<()> {
let available = &self.decoded_ints[self.current_head..];
Expand Down Expand Up @@ -190,7 +190,7 @@ mod tests {
use super::*;

fn test_helper(data: &[u8], expected: &[i64]) {
let mut reader = RleReaderV1::<i64, _, UnsignedEncoding>::new(Cursor::new(data));
let mut reader = RleV1Decoder::<i64, _, UnsignedEncoding>::new(Cursor::new(data));
let mut actual = vec![0; expected.len()];
reader.decode(&mut actual).unwrap();
assert_eq!(actual, expected);
Expand Down
20 changes: 10 additions & 10 deletions src/encoding/integer/rle_v2/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ impl EncodingType {
}
}

pub struct RleReaderV2<N: NInt, R: Read, S: EncodingSign> {
pub struct RleV2Decoder<N: NInt, R: Read, S: EncodingSign> {
reader: R,
decoded_ints: Vec<N>,
/// Indexes into decoded_ints to make it act like a queue
Expand All @@ -88,7 +88,7 @@ pub struct RleReaderV2<N: NInt, R: Read, S: EncodingSign> {
sign: PhantomData<S>,
}

impl<N: NInt, R: Read, S: EncodingSign> RleReaderV2<N, R, S> {
impl<N: NInt, R: Read, S: EncodingSign> RleV2Decoder<N, R, S> {
pub fn new(reader: R) -> Self {
Self {
reader,
Expand Down Expand Up @@ -131,7 +131,7 @@ impl<N: NInt, R: Read, S: EncodingSign> RleReaderV2<N, R, S> {
}
}

impl<N: NInt, R: Read, S: EncodingSign> PrimitiveValueDecoder<N> for RleReaderV2<N, R, S> {
impl<N: NInt, R: Read, S: EncodingSign> PrimitiveValueDecoder<N> for RleV2Decoder<N, R, S> {
fn decode(&mut self, out: &mut [N]) -> Result<()> {
let available = &self.decoded_ints[self.current_head..];
// If we have enough in buffer to copy over
Expand Down Expand Up @@ -256,7 +256,7 @@ impl<N: NInt> Default for RleV2EncodingState<N> {
}
}

pub struct RleWriterV2<N: NInt, S: EncodingSign> {
pub struct RleV2Encoder<N: NInt, S: EncodingSign> {
/// Stores the run length encoded sequences.
data: BytesMut,
/// Used in state machine for determining which sub-encoding
Expand All @@ -265,7 +265,7 @@ pub struct RleWriterV2<N: NInt, S: EncodingSign> {
phantom: PhantomData<S>,
}

impl<N: NInt, S: EncodingSign> RleWriterV2<N, S> {
impl<N: NInt, S: EncodingSign> RleV2Encoder<N, S> {
// Algorithm adapted from:
// https://github.com/apache/orc/blob/main/java/core/src/java/org/apache/orc/impl/RunLengthIntegerWriterV2.java

Expand Down Expand Up @@ -398,13 +398,13 @@ impl<N: NInt, S: EncodingSign> RleWriterV2<N, S> {
}
}

impl<N: NInt, S: EncodingSign> EstimateMemory for RleWriterV2<N, S> {
impl<N: NInt, S: EncodingSign> EstimateMemory for RleV2Encoder<N, S> {
fn estimate_memory_size(&self) -> usize {
self.data.len()
}
}

impl<N: NInt, S: EncodingSign> PrimitiveValueEncoder<N> for RleWriterV2<N, S> {
impl<N: NInt, S: EncodingSign> PrimitiveValueEncoder<N> for RleV2Encoder<N, S> {
fn new() -> Self {
Self {
data: BytesMut::new(),
Expand Down Expand Up @@ -549,7 +549,7 @@ mod tests {
// multiple times

fn test_helper<S: EncodingSign>(data: &[u8], expected: &[i64]) {
let mut reader = RleReaderV2::<i64, _, S>::new(Cursor::new(data));
let mut reader = RleV2Decoder::<i64, _, S>::new(Cursor::new(data));
let mut actual = vec![0; expected.len()];
reader.decode(&mut actual).unwrap();
assert_eq!(actual, expected);
Expand Down Expand Up @@ -668,11 +668,11 @@ mod tests {
// currently 99% of the time here the subencoding will be Direct due to random generation

fn roundtrip_helper<N: NInt, S: EncodingSign>(values: &[N]) -> Result<Vec<N>> {
let mut writer = RleWriterV2::<N, S>::new();
let mut writer = RleV2Encoder::<N, S>::new();
writer.write_slice(values);
let data = writer.take_inner();

let mut reader = RleReaderV2::<N, _, S>::new(Cursor::new(data));
let mut reader = RleV2Decoder::<N, _, S>::new(Cursor::new(data));
let mut actual = vec![N::zero(); values.len()];
reader.decode(&mut actual).unwrap();

Expand Down
12 changes: 6 additions & 6 deletions src/writer/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ use crate::{
boolean::BooleanEncoder,
byte::ByteRleEncoder,
float::FloatValueEncoder,
integer::{rle_v2::RleWriterV2, NInt, SignedEncoding, UnsignedEncoding},
integer::{rle_v2::RleV2Encoder, NInt, SignedEncoding, UnsignedEncoding},
PrimitiveValueEncoder,
},
error::Result,
Expand Down Expand Up @@ -263,7 +263,7 @@ where
T::Offset: NInt,
{
string_bytes: BytesMut,
length_encoder: RleWriterV2<T::Offset, UnsignedEncoding>,
length_encoder: RleV2Encoder<T::Offset, UnsignedEncoding>,
present: Option<BooleanEncoder>,
encoded_count: usize,
}
Expand All @@ -275,7 +275,7 @@ where
pub fn new() -> Self {
Self {
string_bytes: BytesMut::new(),
length_encoder: RleWriterV2::new(),
length_encoder: RleV2Encoder::new(),
present: None,
encoded_count: 0,
}
Expand Down Expand Up @@ -393,9 +393,9 @@ where
pub type FloatColumnEncoder = PrimitiveColumnEncoder<Float32Type, FloatValueEncoder<Float32Type>>;
pub type DoubleColumnEncoder = PrimitiveColumnEncoder<Float64Type, FloatValueEncoder<Float64Type>>;
pub type ByteColumnEncoder = PrimitiveColumnEncoder<Int8Type, ByteRleEncoder>;
pub type Int16ColumnEncoder = PrimitiveColumnEncoder<Int16Type, RleWriterV2<i16, SignedEncoding>>;
pub type Int32ColumnEncoder = PrimitiveColumnEncoder<Int32Type, RleWriterV2<i32, SignedEncoding>>;
pub type Int64ColumnEncoder = PrimitiveColumnEncoder<Int64Type, RleWriterV2<i64, SignedEncoding>>;
pub type Int16ColumnEncoder = PrimitiveColumnEncoder<Int16Type, RleV2Encoder<i16, SignedEncoding>>;
pub type Int32ColumnEncoder = PrimitiveColumnEncoder<Int32Type, RleV2Encoder<i32, SignedEncoding>>;
pub type Int64ColumnEncoder = PrimitiveColumnEncoder<Int64Type, RleV2Encoder<i64, SignedEncoding>>;
pub type StringColumnEncoder = GenericBinaryColumnEncoder<GenericStringType<i32>>;
pub type LargeStringColumnEncoder = GenericBinaryColumnEncoder<GenericStringType<i64>>;
pub type BinaryColumnEncoder = GenericBinaryColumnEncoder<GenericBinaryType<i32>>;
Expand Down

0 comments on commit be8e1f1

Please sign in to comment.