From 70bc278da2eb0a9885478b5de91c545f33e283d8 Mon Sep 17 00:00:00 2001 From: aawsome <37850842+aawsome@users.noreply.github.com> Date: Wed, 11 Dec 2024 00:50:26 +0100 Subject: [PATCH] fix!: Don't panic when reading empty files (#381) There was a bug when using partition_point which in that case does not gave guarentees about its behavior. Hence the whole logic was refactored and unit/integration tests were added. closes https://github.com/rustic-rs/rustic/issues/1380 --- crates/core/src/repository.rs | 2 +- crates/core/src/vfs.rs | 151 ++++++++++++++++++++----------- crates/core/tests/integration.rs | 8 +- 3 files changed, 107 insertions(+), 54 deletions(-) diff --git a/crates/core/src/repository.rs b/crates/core/src/repository.rs index d15d8f21..703490ed 100644 --- a/crates/core/src/repository.rs +++ b/crates/core/src/repository.rs @@ -1605,7 +1605,7 @@ impl Repository { /// // TODO: Document errors pub fn open_file(&self, node: &Node) -> RusticResult { - Ok(OpenFile::from_node(self, node)) + OpenFile::from_node(self, node) } /// Reads an opened file at the given position diff --git a/crates/core/src/vfs.rs b/crates/core/src/vfs.rs index c6a618ab..8a94140d 100644 --- a/crates/core/src/vfs.rs +++ b/crates/core/src/vfs.rs @@ -455,17 +455,8 @@ impl Vfs { #[derive(Debug)] pub struct OpenFile { // The list of blobs - content: Vec, -} - -// Information about the blob: 1) The id 2) The cumulated sizes of all blobs prior to this one, a.k.a the starting point of this blob. -#[derive(Debug)] -struct BlobInfo { - // [`Id`] of the blob - id: DataId, - - // the start position of this blob within the file - starts_at: usize, + content: Vec, + startpoints: ContentStartpoints, } impl OpenFile { @@ -477,37 +468,32 @@ impl OpenFile { /// * `node` - The `Node` to create the `OpenFile` for /// /// # Errors - /// - // TODO: Document errors + /// - If the index for the needed data blobs cannot be read /// /// # Returns /// /// The created `OpenFile` - /// - /// # Panics - /// - /// * Panics if the `Node` has no content - pub fn from_node(repo: &Repository, node: &Node) -> Self { - let mut start = 0; - let mut content: Vec<_> = node - .content - .as_ref() - .unwrap() - .iter() - .map(|id| { - let starts_at = start; - start += repo.index().get_data(id).unwrap().data_length() as usize; - BlobInfo { id: *id, starts_at } - }) - .collect(); - - // content is assumed to be partitioned, so we add a starts_at:MAX entry - content.push(BlobInfo { - id: DataId::default(), - starts_at: usize::MAX, - }); - - Self { content } + pub(crate) fn from_node( + repo: &Repository, + node: &Node, + ) -> RusticResult { + let content: Vec<_> = node.content.clone().unwrap_or_default(); + + let startpoints = ContentStartpoints::from_sizes(content.iter().map(|id| { + Ok(repo + .index() + .get_data(id) + .ok_or_else(|| { + RusticError::new(ErrorKind::Vfs, "blob {blob} is not contained in index") + .attach_context("blob", id.to_string()) + })? + .data_length() as usize) + }))?; + + Ok(Self { + content, + startpoints, + }) } /// Read the `OpenFile` at the given `offset` from the `repo`. @@ -520,7 +506,7 @@ impl OpenFile { /// /// # Errors /// - // TODO: Document errors + /// - if reading the needed blob(s) from the backend fails /// /// # Returns /// @@ -530,19 +516,16 @@ impl OpenFile { pub fn read_at( &self, repo: &Repository, - mut offset: usize, + offset: usize, mut length: usize, ) -> RusticResult { - // find the start of relevant blobs => find the largest index such that self.content[i].starts_at <= offset, but - // self.content[i+1] > offset (note that a last dummy element has been added) - let mut i = self.content.partition_point(|c| c.starts_at <= offset) - 1; - - offset -= self.content[i].starts_at; + let (mut i, mut offset) = self.startpoints.compute_start(offset); let mut result = BytesMut::with_capacity(length); - while length > 0 && i < self.content.len() - 1 { - let data = repo.get_blob_cached(&BlobId::from(*self.content[i].id), BlobType::Data)?; + // The case of empty node.content is also correctly handled here + while length > 0 && i < self.content.len() { + let data = repo.get_blob_cached(&BlobId::from(self.content[i]), BlobType::Data)?; if offset > data.len() { // we cannot read behind the blob. This only happens if offset is too large to fit in the last blob @@ -550,16 +533,82 @@ impl OpenFile { } let to_copy = (data.len() - offset).min(length); - result.extend_from_slice(&data[offset..offset + to_copy]); - offset = 0; - length -= to_copy; - i += 1; } Ok(result.into()) } } + +// helper struct holding blob startpoints of the content +#[derive(Debug)] +struct ContentStartpoints(Vec); + +impl ContentStartpoints { + fn from_sizes(sizes: impl IntoIterator>) -> RusticResult { + let mut start = 0; + let mut offsets: Vec<_> = sizes + .into_iter() + .map(|size| -> RusticResult<_> { + let starts_at = start; + start += size?; + Ok(starts_at) + }) + .collect::>()?; + + if !offsets.is_empty() { + // offsets is assumed to be partitioned, so we add a starts_at:MAX entry + offsets.push(usize::MAX); + } + Ok(Self(offsets)) + } + + // compute the correct blobid and effective offset from a file offset + fn compute_start(&self, mut offset: usize) -> (usize, usize) { + if self.0.is_empty() { + return (0, 0); + } + // find the start of relevant blobs => find the largest index such that self.offsets[i] <= offset, but + // self.offsets[i+1] > offset (note that a last dummy element with usize::MAX has been added to ensure we always have two partitions) + // If offsets is non-empty, then offsets[0] = 0, hence partition_point returns an index >=1. + let i = self.0.partition_point(|o| o <= &offset) - 1; + offset -= self.0[i]; + (i, offset) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // helper func + fn startpoints_from_ok_sizes(sizes: impl IntoIterator) -> ContentStartpoints { + ContentStartpoints::from_sizes(sizes.into_iter().map(Ok)).unwrap() + } + + #[test] + fn content_offsets_empty_sizes() { + let offsets = startpoints_from_ok_sizes([]); + assert_eq!(offsets.compute_start(0), (0, 0)); + assert_eq!(offsets.compute_start(42), (0, 0)); + } + + #[test] + fn content_offsets_size() { + let offsets = startpoints_from_ok_sizes([15]); + assert_eq!(offsets.compute_start(0), (0, 0)); + assert_eq!(offsets.compute_start(5), (0, 5)); + assert_eq!(offsets.compute_start(20), (0, 20)); + } + #[test] + fn content_offsets_sizes() { + let offsets = startpoints_from_ok_sizes([15, 24]); + assert_eq!(offsets.compute_start(0), (0, 0)); + assert_eq!(offsets.compute_start(5), (0, 5)); + assert_eq!(offsets.compute_start(20), (1, 5)); + assert_eq!(offsets.compute_start(42), (1, 27)); + } +} diff --git a/crates/core/tests/integration.rs b/crates/core/tests/integration.rs index 57d5a63e..2356c47a 100644 --- a/crates/core/tests/integration.rs +++ b/crates/core/tests/integration.rs @@ -456,11 +456,15 @@ fn test_ls_and_read( let data = repo.read_file_at(&file, 0, 21)?; // read full content assert_eq!(Bytes::from("This is a test file.\n"), &data); - let data2 = repo.read_file_at(&file, 0, 4096)?; // read beyond file end - assert_eq!(data2, &data); + assert_eq!(data, repo.read_file_at(&file, 0, 4096)?); // read beyond file end assert_eq!(Bytes::new(), repo.read_file_at(&file, 25, 1)?); // offset beyond file end assert_eq!(Bytes::from("test"), repo.read_file_at(&file, 10, 4)?); // read partial content + // test reading an empty file from the repository + let path: PathBuf = ["test", "0", "tests", "empty-file"].iter().collect(); + let node = entries.get(&path).unwrap(); + let file = repo.open_file(node)?; + assert_eq!(Bytes::new(), repo.read_file_at(&file, 0, 0)?); // empty files Ok(()) }