Skip to content

Commit

Permalink
Add 1.1 binary reader support for strings and integers. (amazon-ion#754)
Browse files Browse the repository at this point in the history
* Add reader support for ints

* Add reader support for strings

* Address feedback; remove silly size_of test, add Invalid opcode type, simplify sequence of matches, and use value_body to handle offsets

* Address feedback; implement From<FixedInt> for Int

* Update src/lazy/binary/raw/v1_1/value.rs

Co-authored-by: Zack Slayton <[email protected]>

---------

Co-authored-by: Zack Slayton <[email protected]>
  • Loading branch information
nirosys and zslayton authored Apr 30, 2024
1 parent 849de8e commit b087e7f
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 31 deletions.
9 changes: 9 additions & 0 deletions src/lazy/binary/raw/v1_1/immutable_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::binary::var_uint::VarUInt;
use crate::lazy::binary::encoded_value::EncodedValue;
use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1;
use crate::lazy::binary::raw::v1_1::{Header, LengthType, Opcode, ION_1_1_OPCODES};
use crate::lazy::encoder::binary::v1_1::fixed_int::FixedInt;
use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt;
use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt;
use crate::result::IonFailure;
Expand Down Expand Up @@ -330,6 +331,14 @@ impl<'a> ImmutableBuffer<'a> {
Ok(lazy_value)
}

pub fn read_fixed_int(self, length: usize) -> ParseResult<'a, FixedInt> {
let int_bytes = self
.peek_n_bytes(length)
.ok_or_else(|| IonError::incomplete("a FixedInt", self.offset()))?;
let fixed_int = FixedInt::read(int_bytes, length, 0)?;
Ok((fixed_int, self.consume(length)))
}

/// Reads an annotations wrapper and its associated value from the buffer. The caller must confirm
/// that the next byte in the buffer (`type_descriptor`) begins an annotations wrapper.
fn read_annotated_value(
Expand Down
100 changes: 100 additions & 0 deletions src/lazy/binary/raw/v1_1/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,4 +200,104 @@ mod tests {

Ok(())
}

#[test]
fn integers() -> IonResult<()> {
use num_bigint::BigInt;

#[rustfmt::skip]
let data: Vec<u8> = vec![
// IVM
0xE0, 0x01, 0x01, 0xEA,

// Integer: 0
0x50,

// Integer: 17
0x51, 0x11,

// Integer: -944
0x52, 0x50, 0xFC,

// Integer: 1
0xF5, 0x03, 0x01,

// Integer: 147573952589676412929
0xF5, 0x13, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,
];

let mut reader = LazyRawBinaryReader_1_1::new(&data);
let _ivm = reader.next()?.expect_ivm()?;

assert_eq!(
reader.next()?.expect_value()?.read()?.expect_int()?,
0.into()
);
assert_eq!(
reader.next()?.expect_value()?.read()?.expect_int()?,
17.into()
);
assert_eq!(
reader.next()?.expect_value()?.read()?.expect_int()?,
(-944).into()
);

assert_eq!(
reader.next()?.expect_value()?.read()?.expect_int()?,
1.into()
);

assert_eq!(
reader.next()?.expect_value()?.read()?.expect_int()?,
BigInt::parse_bytes(b"147573952589676412929", 10)
.unwrap()
.into()
);
Ok(())
}

#[test]
fn strings() -> IonResult<()> {
#[rustfmt::skip]
let data: Vec<u8> = vec![
// IVM
0xe0, 0x01, 0x01, 0xea,

// String: ""
0x80,

// String: "hello"
0x85, 0x68, 0x65, 0x6c, 0x6c, 0x6f,

// String: "fourteen bytes"
0x8E, 0x66, 0x6F, 0x75, 0x72, 0x74, 0x65, 0x65, 0x6E, 0x20, 0x62, 0x79, 0x74, 0x65,
0x73,

// String: "variable length encoding"
0xF8, 0x31, 0x76, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6C, 0x65, 0x20, 0x6C, 0x65,
0x6E, 0x67, 0x74, 0x68, 0x20, 0x65, 0x6E, 0x63, 0x6f, 0x64, 0x69, 0x6E, 0x67,
];

let mut reader = LazyRawBinaryReader_1_1::new(&data);
let _ivm = reader.next()?.expect_ivm()?;

assert_eq!(reader.next()?.expect_value()?.read()?.expect_string()?, "");

assert_eq!(
reader.next()?.expect_value()?.read()?.expect_string()?,
"hello"
);

assert_eq!(
reader.next()?.expect_value()?.read()?.expect_string()?,
"fourteen bytes"
);

assert_eq!(
reader.next()?.expect_value()?.read()?.expect_string()?,
"variable length encoding"
);

Ok(())
}
}
29 changes: 15 additions & 14 deletions src/lazy/binary/raw/v1_1/type_code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,16 @@ use crate::IonType;
pub enum OpcodeType {
EExpressionWithAddress, // 0x00-0x4F -
EExpressionAddressFollows, // 0x40-0x4F -

Integer, // 0x50-0x58 - Integer up to 8 bytes wide.
Float, // 0x5A-0x5D -
Boolean, // 0x5E-0x5F -
Decimal, // 0x60-0x6F -
Timestamp, // 0x70-0x7F -
String, // 0x80-0x80 -
InlineSymbol, // 0x90-0x9F -
List, // 0xA0-0xAF -
SExpression, // 0xB0-0xBF -
StructEmpty, // 0xC0 -
Integer, // 0x50-0x58 - Integer up to 8 bytes wide
Float, // 0x5A-0x5D -
Boolean, // 0x5E-0x5F -
Decimal, // 0x60-0x6F -
Timestamp, // 0x70-0x7F -
String, // 0x80-0x80 -
InlineSymbol, // 0x90-0x9F -
List, // 0xA0-0xAF -
SExpression, // 0xB0-0xBF -
StructEmpty, // 0xC0 -
// reserved
StructSymAddress, // 0xD2-0xDF -
// reserved
Expand All @@ -40,9 +39,11 @@ pub enum OpcodeType {
Nop, // 0xEC-0xED -
// Reserved
SystemMacroInvoke, // 0xEF -
// delimited container end
// delimited list start
// delimited s-expression start
// delimited container end
// delimited list start
// delimited s-expression start
LargeInteger, // 0xF5 - Integer preceeded by FlexUInt length
Invalid, // Represents an encoded value that does not match a defined opcode.
}

impl TryFrom<OpcodeType> for IonType {
Expand Down
27 changes: 13 additions & 14 deletions src/lazy/binary/raw/v1_1/type_descriptor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,24 +39,21 @@ impl Opcode {
let (high_nibble, low_nibble) = (byte >> 4, byte & 0x0F);
use OpcodeType::*;

let opcode_type = match (high_nibble, low_nibble) {
(0x5, 0xE..=0xF) => Boolean,
(0xE, 0x0) => IonVersionMarker,
(0xE, 0xA) => NullNull,
(0xE, 0xC..=0xD) => Nop,
_ => Boolean, // Temporary, until everything is implemented to satisfy the LUT.
};
let ion_type = match opcode_type {
NullNull => Some(IonType::Null),
Nop => None,
IonVersionMarker => None,
Boolean => Some(IonType::Bool),
_ => panic!("the provided ion type code is either not implemented, or invalid"),
let (opcode_type, length_code, ion_type) = match (high_nibble, low_nibble) {
(0x5, 0x0..=0x8) => (Integer, low_nibble, Some(IonType::Int)),
(0x5, 0xE..=0xF) => (Boolean, low_nibble, Some(IonType::Bool)),
(0x8, _) => (String, low_nibble, Some(IonType::String)),
(0xE, 0x0) => (IonVersionMarker, low_nibble, None),
(0xE, 0xA) => (NullNull, low_nibble, Some(IonType::Null)),
(0xE, 0xC..=0xD) => (Nop, low_nibble, None),
(0xF, 0x5) => (LargeInteger, low_nibble, Some(IonType::Int)),
(0xF, 0x8) => (String, 0xFF, Some(IonType::String)), // 0xFF indicates >15 byte string.
_ => (Invalid, low_nibble, None),
};
Opcode {
ion_type,
opcode_type,
length_code: low_nibble,
length_code,
}
}

Expand Down Expand Up @@ -112,8 +109,10 @@ impl Header {
use LengthType::*;
match (self.ion_type_code, self.length_code) {
(OpcodeType::Boolean, 0xE..=0xF) => InOpcode(0),
(OpcodeType::Integer, n) => InOpcode(n),
(OpcodeType::Nop, 0xC) => InOpcode(0),
(OpcodeType::NullNull, 0xA) => InOpcode(0),
(OpcodeType::String, 0..=15) => InOpcode(self.length_code),
_ => FlexUIntFollows,
}
}
Expand Down
30 changes: 27 additions & 3 deletions src/lazy/binary/raw/v1_1/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use crate::{
},
result::IonFailure,
types::SymbolId,
IonResult, IonType, RawSymbolTokenRef,
IonError, IonResult, IonType, RawSymbolTokenRef,
};

#[derive(Debug, Copy, Clone)]
Expand Down Expand Up @@ -199,7 +199,25 @@ impl<'top> LazyRawBinaryValue_1_1<'top> {

/// Helper method called by [`Self::read`]. Reads the current value as an int.
fn read_int(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> {
todo!();
use crate::lazy::encoder::binary::v1_1::fixed_int::FixedInt;
debug_assert!(self.encoded_value.ion_type() == IonType::Int);

let header = &self.encoded_value.header();
let representation = header.type_code();
let value = match (representation, header.length_code as usize) {
(OpcodeType::Integer, 0x0) => 0.into(),
(OpcodeType::Integer, n) => {
// We have n bytes following that make up our integer.
self.input.consume(1).read_fixed_int(n)?.0.into()
}
(OpcodeType::LargeInteger, 0x5) => {
// We have a FlexUInt size, then big int.
let value_bytes = self.value_body()?;
FixedInt::read(value_bytes, value_bytes.len(), 0)?.into()
}
_ => unreachable!("integer encoding with illegal length_code found"),
};
Ok(RawValueRef::Int(value))
}

/// Helper method called by [`Self::read`]. Reads the current value as a float.
Expand Down Expand Up @@ -229,7 +247,13 @@ impl<'top> LazyRawBinaryValue_1_1<'top> {

/// Helper method called by [`Self::read`]. Reads the current value as a string.
fn read_string(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> {
todo!();
use crate::lazy::str_ref::StrRef;

debug_assert!(self.encoded_value.ion_type() == IonType::String);
let raw_bytes = self.value_body()?;
let text = std::str::from_utf8(raw_bytes)
.map_err(|_| IonError::decoding_error("found string with invalid UTF-8 data"))?;
Ok(RawValueRef::String(StrRef::from(text)))
}

/// Helper method called by [`Self::read`]. Reads the current value as a blob.
Expand Down
6 changes: 6 additions & 0 deletions src/lazy/encoder/binary/v1_1/fixed_int.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,12 @@ impl FixedInt {
}
}

impl From<FixedInt> for Int {
fn from(other: FixedInt) -> Self {
other.value
}
}

#[cfg(test)]
mod tests {
use num_bigint::BigInt;
Expand Down

0 comments on commit b087e7f

Please sign in to comment.