Skip to content

Commit

Permalink
Adds read/write support for v1.1 system symbols (#847)
Browse files Browse the repository at this point in the history
  • Loading branch information
zslayton authored Nov 6, 2024
1 parent 8a6a09d commit 89d3206
Show file tree
Hide file tree
Showing 41 changed files with 884 additions and 335 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ experimental = [
]

# Feature for indicating explicit opt-in to Ion 1.1
experimental-ion-1-1 = [ "experimental-reader-writer" ]
experimental-ion-1-1 = ["experimental-reader-writer"]

# Access to the streaming Reader and Writer types.
# These APIs are functional and well-tested, but are not yet stable.
Expand Down Expand Up @@ -65,6 +65,7 @@ bumpalo = { version = "3.15.3", features = ["collections", "std"] }
digest = { version = "0.9", optional = true }
ice_code = "0.1.4"
rustc-hash = "2.0.0"
phf = { version = "0.11.2", features = ["macros"] }
sha2 = { version = "0.9", optional = true }
serde = { version = "1.0", features = ["derive"], optional = true }
serde_with = { version = "3.7.0", optional = true }
Expand Down
204 changes: 193 additions & 11 deletions src/constants.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
pub(crate) mod v1_0 {
pub const SYSTEM_SYMBOLS: &[Option<&str>] = &[
None, // $0
Some("$ion"), // $1
Some("$ion_1_0"), // $2
Some("$ion_symbol_table"), // $3
Some("name"), // $4
Some("version"), // $5
Some("imports"), // $6
Some("symbols"), // $7
Some("max_id"), // $8
Some("$ion_shared_symbol_table"), // $9
use phf::phf_map;

// The indexes in this slice are off by one relative to the corresponding Ion symbol ID.
// This is because it does not contain symbol ID `0`-the symbol with unknown text.
pub static SYSTEM_SYMBOLS: &[&str] = &[
// <unknown text> $0
"$ion", // $1
"$ion_1_0", // $2
"$ion_symbol_table", // $3
"name", // $4
"version", // $5
"imports", // $6
"symbols", // $7
"max_id", // $8
"$ion_shared_symbol_table", // $9
];

pub(crate) mod system_symbol_ids {
Expand All @@ -23,4 +27,182 @@ pub(crate) mod v1_0 {
pub const MAX_ID: usize = 8;
pub const ION_SHARED_SYMBOL_TABLE: usize = 9;
}

/// A static, read-only map of text to Ion v1.0 system symbol addresses.
/// Because the set of string keys is known at compile time, this map is able to use a
/// perfect hashing function (PHF) to optimize lookup operations for those keys.
pub(crate) static SYSTEM_SYMBOL_TEXT_TO_ID: phf::Map<&str, usize> = phf_map! {
"$ion" => 1,
"$ion_1_0" => 2,
"$ion_symbol_table" => 3,
"name" => 4,
"version" => 5,
"imports" => 6,
"symbols" => 7,
"max_id" => 8,
"$ion_shared_symbol_table" => 9,
};
}

pub(crate) mod v1_1 {
use crate::types::SymbolAddress;
use phf::phf_map;

pub static SYSTEM_SYMBOLS: &[&str] = &[
// <unknown text> $0
"$ion", // $1
"$ion_1_0", // $2
"$ion_symbol_table", // $3
"name", // $4
"version", // $5
"imports", // $6
"symbols", // $7
"max_id", // $8
"$ion_shared_symbol_table", // $9
"$ion_encoding", // $10
"$ion_literal", // $11
"$ion_shared_module", // $12
"macro", // $13
"macro_table", // $14
"symbol_table", // $15
"module", // $16
"<REMOVE>", // $17 (see: ion-docs#345)
"export", // $18
"<REMOVE>", // $19 (see: ion-docs#345)
"import", // $20
"", // $21 (empty text)
"literal", // $22
"if_none", // $23
"if_some", // $24
"if_single", // $25
"if_multi", // $26
"for", // $27
"default", // $28
"values", // $29
"annotate", // $30
"make_string", // $31
"make_symbol", // $32
"make_blob", // $33
"make_decimal", // $34
"make_timestamp", // $35
"make_list", // $36
"make_sexp", // $37
"make_struct", // $38
"parse_ion", // $39
"repeat", // $40
"delta", // $41
"flatten", // $42
"sum", // $43
"set_symbols", // $44
"add_symbols", // $45
"set_macros", // $46
"add_macros", // $47
"use", // $48
"meta", // $49
"flex_symbol", // $50
"flex_int", // $51
"flex_uint", // $52
"uint8", // $53
"uint16", // $54
"uint32", // $55
"uint64", // $56
"int8", // $57
"int16", // $58
"int32", // $59
"int64", // $60
"float16", // $61
"float32", // $62
"float64", // $63
"none", // $64
"make_field", // $65
];

pub mod system_symbols {
use crate::raw_symbol_ref::SystemSymbol_1_1;

pub const ION_ENCODING: SystemSymbol_1_1 = SystemSymbol_1_1::new_unchecked(10);
pub const SYMBOL_TABLE: SystemSymbol_1_1 = SystemSymbol_1_1::new_unchecked(15);
pub const EMPTY_TEXT: SystemSymbol_1_1 = SystemSymbol_1_1::new_unchecked(21);
pub const ADD_SYMBOLS: SystemSymbol_1_1 = SystemSymbol_1_1::new_unchecked(45);
pub const ADD_MACROS: SystemSymbol_1_1 = SystemSymbol_1_1::new_unchecked(47);
}

/// A static, read-only map of text to Ion v1.1 system symbol addresses.
/// Because the set of string keys is known at compile time, this map is able to use a
/// perfect hashing function (PHF) to optimize lookup operations for those keys.
pub(crate) static SYSTEM_SYMBOL_TEXT_TO_ID: phf::Map<&str, usize> = phf_map! {
"$ion" => 1,
"$ion_1_0" => 2,
"$ion_symbol_table" => 3,
"name" => 4,
"version" => 5,
"imports" => 6,
"symbols" => 7,
"max_id" => 8,
"$ion_shared_symbol_table" => 9,
"$ion_encoding" => 10,
"$ion_literal" => 11,
"$ion_shared_module" => 12,
"macro" => 13,
"macro_table" => 14,
"symbol_table" => 15,
"module" => 16,
// ion-docs#345 => 17,
"export" => 18,
// ion-docs#345 => 19,
"import" => 20,
"" => 21,
"literal" => 22,
"if_none" => 23,
"if_some" => 24,
"if_single" => 25,
"if_multi" => 26,
"for" => 27,
"default" => 28,
"values" => 29,
"annotate" => 30,
"make_string" => 31,
"make_symbol" => 32,
"make_blob" => 33,
"make_decimal" => 34,
"make_timestamp" => 35,
"make_list" => 36,
"make_sexp" => 37,
"make_struct" => 38,
"parse_ion" => 39,
"repeat" => 40,
"delta" => 41,
"flatten" => 42,
"sum" => 43,
"set_symbols" => 44,
"add_symbols" => 45,
"set_macros" => 46,
"add_macros" => 47,
"use" => 48,
"meta" => 49,
"flex_symbol" => 50,
"flex_int" => 51,
"flex_uint" => 52,
"uint8" => 53,
"uint16" => 54,
"uint32" => 55,
"uint64" => 56,
"int8" => 57,
"int16" => 58,
"int32" => 59,
"int64" => 60,
"float16" => 61,
"float32" => 62,
"float64" => 63,
"none" => 64,
"make_field" => 65,
};

pub fn address_for_text(text: &str) -> Option<usize> {
SYSTEM_SYMBOL_TEXT_TO_ID.get(text).copied()
}

pub fn symbol_text_for_address(address: SymbolAddress) -> Option<&'static str> {
SYSTEM_SYMBOLS.get(address - 1).copied()
}
}
27 changes: 22 additions & 5 deletions src/lazy/any_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ use crate::lazy::text::value::{
LazyRawTextValue_1_0, LazyRawTextValue_1_1, LazyRawTextVersionMarker_1_0,
LazyRawTextVersionMarker_1_1, RawTextAnnotationsIterator,
};
use crate::symbol_table::{SystemSymbolTable, SYSTEM_SYMBOLS_1_0, SYSTEM_SYMBOLS_1_1};
use crate::{try_next, Encoding, IonResult, IonType, RawStreamItem, RawSymbolRef};

/// An implementation of the `LazyDecoder` trait that can read any encoding of Ion.
Expand All @@ -74,6 +75,10 @@ pub struct AnyEncoding;
// within each type. Trait methods are implemented by forwarding the call to the appropriate
// underlying type.
impl Decoder for AnyEncoding {
// Before a reader using `AnyEncoding` begins reading, it expects text Ion v1.0.
// At the outset of the stream, it inspects the first bytes to see if the stream is binary or text.
// If it encounters a version marker, the expected version will change.
const INITIAL_ENCODING_EXPECTED: IonEncoding = IonEncoding::Text_1_0;
type Reader<'data> = LazyRawAnyReader<'data>;
type Value<'top> = LazyRawAnyValue<'top>;
type SExp<'top> = LazyRawAnySExp<'top>;
Expand Down Expand Up @@ -538,7 +543,7 @@ impl IonEncoding {
}
}

#[derive(Debug, Default, Copy, Clone, PartialEq)]
#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)]
pub enum IonVersion {
#[default]
v1_0,
Expand All @@ -553,6 +558,14 @@ impl IonVersion {
v1_1 => (1, 1),
}
}

/// Returns the system symbol table associated with this Ion version.
pub fn system_symbol_table(&self) -> &'static SystemSymbolTable {
match self {
IonVersion::v1_0 => SYSTEM_SYMBOLS_1_0,
IonVersion::v1_1 => SYSTEM_SYMBOLS_1_1,
}
}
}

impl<'data> From<LazyRawTextReader_1_0<'data>> for LazyRawAnyReader<'data> {
Expand Down Expand Up @@ -1861,8 +1874,10 @@ mod tests {
expect_int(context_ref, &mut reader, IonEncoding::Text_1_0, 2)?;

if cfg!(not(feature = "experimental-ion-1-1")) {
reader.next(context_ref).expect_err("Ion 1.1 IVM should return an error.");
return Ok(())
reader
.next(context_ref)
.expect_err("Ion 1.1 IVM should return an error.");
return Ok(());
}

// This IVM changes the encoding from 1.0 text to 1.1 text
Expand Down Expand Up @@ -1928,8 +1943,10 @@ mod tests {
expect_int(context_ref, &mut reader, IonEncoding::Binary_1_0, 2)?;

if cfg!(not(feature = "experimental-ion-1-1")) {
reader.next(context_ref).expect_err("Ion 1.1 IVM should return an error.");
return Ok(())
reader
.next(context_ref)
.expect_err("Ion 1.1 IVM should return an error.");
return Ok(());
}

// This IVM changes the encoding from 1.0 binary to 1.1 binary
Expand Down
10 changes: 5 additions & 5 deletions src/lazy/binary/raw/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ mod tests {
let lazy_struct = value.read()?.expect_struct()?;
let mut fields = lazy_struct.iter();
let (name, _value) = fields.next().expect("field 1")?.expect_name_value()?;
assert_eq!(name.read()?, 4.as_raw_symbol_token_ref()); // 'name'
assert_eq!(name.read()?, 4.as_raw_symbol_ref()); // 'name'
Ok(())
}

Expand Down Expand Up @@ -349,7 +349,7 @@ mod tests {
.annotations()
.collect::<IonResult<Vec<RawSymbolRef<'_>>>>()?;
assert_eq!(annotations.len(), 1);
assert_eq!(annotations[0], 3.as_raw_symbol_token_ref());
assert_eq!(annotations[0], 3.as_raw_symbol_ref());

// Read annotations from foo::bar::baz::7
let int = reader.next()?.expect_value()?;
Expand All @@ -358,9 +358,9 @@ mod tests {
.annotations()
.collect::<IonResult<Vec<RawSymbolRef<'_>>>>()?;
assert_eq!(annotations.len(), 3);
assert_eq!(annotations[0], 10.as_raw_symbol_token_ref());
assert_eq!(annotations[1], 11.as_raw_symbol_token_ref());
assert_eq!(annotations[2], 12.as_raw_symbol_token_ref());
assert_eq!(annotations[0], 10.as_raw_symbol_ref());
assert_eq!(annotations[1], 11.as_raw_symbol_ref());
assert_eq!(annotations[2], 12.as_raw_symbol_ref());
Ok(())
}

Expand Down
2 changes: 1 addition & 1 deletion src/lazy/binary/raw/v1_1/annotations_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ impl<'a> Iterator for RawBinaryAnnotationsIterator_1_1<'a> {
};
let raw_symbol = match flex_sym.value() {
FlexSymValue::SymbolRef(raw_symbol) => raw_symbol,
FlexSymValue::Opcode(_) => {
FlexSymValue::Opcode(_opcode) => {
todo!("FlexSym escapes in annotation sequences")
}
};
Expand Down
Loading

0 comments on commit 89d3206

Please sign in to comment.