Skip to content

Commit

Permalink
Treat end-of-stream as a potential unstarted, incomplete value in Str…
Browse files Browse the repository at this point in the history
…eamingRawReader
  • Loading branch information
popematt committed Oct 28, 2024
1 parent f6bb59b commit fbe3aaf
Showing 1 changed file with 70 additions and 66 deletions.
136 changes: 70 additions & 66 deletions src/lazy/streaming_raw_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,79 +156,83 @@ impl<Encoding: Decoder, Input: IonInput> StreamingRawReader<Encoding, Input> {

let bytes_read = end_position - starting_position;
let input = unsafe { &mut *self.input.get() };
// If we ran out of data before we could get a result...
if matches!(result, Err(IonError::Incomplete(_))) {
// ...try to pull more data from the data source. It's ok to modify the buffer in
// this case because `result` (which holds a reference to the buffer) will be
// discarded.
if input.fill_buffer()? > 0 {
// If we get more data, try again.
continue;

match &result {
// If we ran out of data before we could get a result...
Err(IonError::Incomplete(_)) |
Ok(LazyRawStreamItem::EndOfStream(_)) => {

Check failure on line 163 in src/lazy/streaming_raw_reader.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu, default)

type annotations needed

Check failure on line 163 in src/lazy/streaming_raw_reader.rs

View workflow job for this annotation

GitHub Actions / Build and Test (macos, default)

type annotations needed

Check failure on line 163 in src/lazy/streaming_raw_reader.rs

View workflow job for this annotation

GitHub Actions / Build and Test (macos, all)

type annotations needed

Check failure on line 163 in src/lazy/streaming_raw_reader.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

type annotations needed

Check failure on line 163 in src/lazy/streaming_raw_reader.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

type annotations needed
// ...try to pull more data from the data source. It's ok to modify the buffer in
// this case because `result` (which holds a reference to the buffer) will be
// discarded.
if input.fill_buffer()? > 0 {
// If we get more data, try again.
continue;
}
// If there's nothing available, return the result we got.
}
// If there's nothing available, return the result we got.
} else if let Ok(ref item) = result {
// We have successfully read something from the buffer.
//
// In binary encodings, stream items contain enough data for the reader to tell
// whether they are complete.
//
// In text encodings, it's possible for the buffer to end with data that looks like
// a complete item but is not. The only way to be certain is to try to read again
// from the input source to confirm there's no more data. Consider the following
// examples in which Ion is being pulled from a `File` into a `Vec<u8>`:
//
// foo /* comment */ ::bar::baz::1000
// └────────┬───────┘ └────────┬───────┘
// buffer contents remaining in File
//
// $ion _1_0
// └────────┬───────┘ └────────┬───────┘
// buffer contents remaining in File
//
// 75 1.20
// └────────┬───────┘ └────────┬───────┘
// buffer contents remaining in File
//
// To avoid this, we perform a final check for text readers who have emptied their
// buffer: we do not consider the item complete unless the input source is exhausted.
if old_encoding.is_text()
&& bytes_read == available_bytes.len()
&& !input_source_exhausted
{
use crate::lazy::raw_stream_item::RawStreamItem::*;
match item {
// Text containers and e-expressions have closing delimiters that allow us
// to tell that they're complete.
Value(v) if v.ion_type().is_container() => {}
EExp(_eexp) => {}
// IVMs (which look like symbols), scalar values, and the end of the
// stream are all cases where the reader looking at a fixed slice of the
// buffer may reach the wrong conclusion.
_ => {
// Try to pull more data from the input source. This invalidates the `result`
// variable because `fill_buffer()` may cause the buffer to be reallocated,
// so we start this iteration over. This results in the last value being parsed
// a second time from the (potentially updated) buffer.
if input.fill_buffer()? == 0 {
input_source_exhausted = true;
Ok(ref item) => {
// We have successfully read something from the buffer.
//
// In binary encodings, stream items contain enough data for the reader to tell
// whether they are complete.
//
// In text encodings, it's possible for the buffer to end with data that looks like
// a complete item but is not. The only way to be certain is to try to read again
// from the input source to confirm there's no more data. Consider the following
// examples in which Ion is being pulled from a `File` into a `Vec<u8>`:
//
// foo /* comment */ ::bar::baz::1000
// └────────┬───────┘ └────────┬───────┘
// buffer contents remaining in File
//
// $ion _1_0
// └────────┬───────┘ └────────┬───────┘
// buffer contents remaining in File
//
// 75 1.20
// └────────┬───────┘ └────────┬───────┘
// buffer contents remaining in File
//
// To avoid this, we perform a final check for text readers who have emptied their
// buffer: we do not consider the item complete unless the input source is exhausted.
if old_encoding.is_text()
&& bytes_read == available_bytes.len()
&& !input_source_exhausted
{
use crate::lazy::raw_stream_item::RawStreamItem::*;
match item {
// Text containers and e-expressions have closing delimiters that allow us
// to tell that they're complete.
Value(v) if v.ion_type().is_container() => {}
EExp(_eexp) => {}
// IVMs (which look like symbols), scalar values, and the end of the
// stream are all cases where the reader looking at a fixed slice of the
// buffer may reach the wrong conclusion.
_ => {
// Try to pull more data from the input source. This invalidates the `result`
// variable because `fill_buffer()` may cause the buffer to be reallocated,
// so we start this iteration over. This results in the last value being parsed
// a second time from the (potentially updated) buffer.
if input.fill_buffer()? == 0 {
input_source_exhausted = true;
}
continue;
}
continue;
}
}
}

// If this isn't just a peek, update our state to remember what we've already read.
if !is_peek {
// Mark those input bytes as having been consumed so they are not read again.
input.consume(bytes_read);
// Update the streaming reader's position to reflect the number of bytes we
// just read.
self.stream_position = end_position;
// If the item read was an IVM, this will be a new value.
self.detected_encoding = new_encoding;
// If this isn't just a peek, update our state to remember what we've already read.
if !is_peek {
// Mark those input bytes as having been consumed so they are not read again.
input.consume(bytes_read);
// Update the streaming reader's position to reflect the number of bytes we
// just read.
self.stream_position = end_position;
// If the item read was an IVM, this will be a new value.
self.detected_encoding = new_encoding;
}
}
}

return result;
}
}
Expand Down

0 comments on commit fbe3aaf

Please sign in to comment.