From 70bc278da2eb0a9885478b5de91c545f33e283d8 Mon Sep 17 00:00:00 2001
From: aawsome <37850842+aawsome@users.noreply.github.com>
Date: Wed, 11 Dec 2024 00:50:26 +0100
Subject: [PATCH] fix!: Don't panic when reading empty files (#381)
There was a bug when using partition_point which in that case does not
gave guarentees about its behavior. Hence the whole logic was refactored
and unit/integration tests were added.
closes https://github.com/rustic-rs/rustic/issues/1380
---
crates/core/src/repository.rs | 2 +-
crates/core/src/vfs.rs | 151 ++++++++++++++++++++-----------
crates/core/tests/integration.rs | 8 +-
3 files changed, 107 insertions(+), 54 deletions(-)
diff --git a/crates/core/src/repository.rs b/crates/core/src/repository.rs
index d15d8f21..703490ed 100644
--- a/crates/core/src/repository.rs
+++ b/crates/core/src/repository.rs
@@ -1605,7 +1605,7 @@ impl
Repository
{
///
// TODO: Document errors
pub fn open_file(&self, node: &Node) -> RusticResult {
- Ok(OpenFile::from_node(self, node))
+ OpenFile::from_node(self, node)
}
/// Reads an opened file at the given position
diff --git a/crates/core/src/vfs.rs b/crates/core/src/vfs.rs
index c6a618ab..8a94140d 100644
--- a/crates/core/src/vfs.rs
+++ b/crates/core/src/vfs.rs
@@ -455,17 +455,8 @@ impl Vfs {
#[derive(Debug)]
pub struct OpenFile {
// The list of blobs
- content: Vec,
-}
-
-// Information about the blob: 1) The id 2) The cumulated sizes of all blobs prior to this one, a.k.a the starting point of this blob.
-#[derive(Debug)]
-struct BlobInfo {
- // [`Id`] of the blob
- id: DataId,
-
- // the start position of this blob within the file
- starts_at: usize,
+ content: Vec,
+ startpoints: ContentStartpoints,
}
impl OpenFile {
@@ -477,37 +468,32 @@ impl OpenFile {
/// * `node` - The `Node` to create the `OpenFile` for
///
/// # Errors
- ///
- // TODO: Document errors
+ /// - If the index for the needed data blobs cannot be read
///
/// # Returns
///
/// The created `OpenFile`
- ///
- /// # Panics
- ///
- /// * Panics if the `Node` has no content
- pub fn from_node(repo: &Repository
, node: &Node) -> Self {
- let mut start = 0;
- let mut content: Vec<_> = node
- .content
- .as_ref()
- .unwrap()
- .iter()
- .map(|id| {
- let starts_at = start;
- start += repo.index().get_data(id).unwrap().data_length() as usize;
- BlobInfo { id: *id, starts_at }
- })
- .collect();
-
- // content is assumed to be partitioned, so we add a starts_at:MAX entry
- content.push(BlobInfo {
- id: DataId::default(),
- starts_at: usize::MAX,
- });
-
- Self { content }
+ pub(crate) fn from_node
(
+ repo: &Repository
,
+ node: &Node,
+ ) -> RusticResult {
+ let content: Vec<_> = node.content.clone().unwrap_or_default();
+
+ let startpoints = ContentStartpoints::from_sizes(content.iter().map(|id| {
+ Ok(repo
+ .index()
+ .get_data(id)
+ .ok_or_else(|| {
+ RusticError::new(ErrorKind::Vfs, "blob {blob} is not contained in index")
+ .attach_context("blob", id.to_string())
+ })?
+ .data_length() as usize)
+ }))?;
+
+ Ok(Self {
+ content,
+ startpoints,
+ })
}
/// Read the `OpenFile` at the given `offset` from the `repo`.
@@ -520,7 +506,7 @@ impl OpenFile {
///
/// # Errors
///
- // TODO: Document errors
+ /// - if reading the needed blob(s) from the backend fails
///
/// # Returns
///
@@ -530,19 +516,16 @@ impl OpenFile {
pub fn read_at(
&self,
repo: &Repository
,
- mut offset: usize,
+ offset: usize,
mut length: usize,
) -> RusticResult {
- // find the start of relevant blobs => find the largest index such that self.content[i].starts_at <= offset, but
- // self.content[i+1] > offset (note that a last dummy element has been added)
- let mut i = self.content.partition_point(|c| c.starts_at <= offset) - 1;
-
- offset -= self.content[i].starts_at;
+ let (mut i, mut offset) = self.startpoints.compute_start(offset);
let mut result = BytesMut::with_capacity(length);
- while length > 0 && i < self.content.len() - 1 {
- let data = repo.get_blob_cached(&BlobId::from(*self.content[i].id), BlobType::Data)?;
+ // The case of empty node.content is also correctly handled here
+ while length > 0 && i < self.content.len() {
+ let data = repo.get_blob_cached(&BlobId::from(self.content[i]), BlobType::Data)?;
if offset > data.len() {
// we cannot read behind the blob. This only happens if offset is too large to fit in the last blob
@@ -550,16 +533,82 @@ impl OpenFile {
}
let to_copy = (data.len() - offset).min(length);
-
result.extend_from_slice(&data[offset..offset + to_copy]);
-
offset = 0;
-
length -= to_copy;
-
i += 1;
}
Ok(result.into())
}
}
+
+// helper struct holding blob startpoints of the content
+#[derive(Debug)]
+struct ContentStartpoints(Vec);
+
+impl ContentStartpoints {
+ fn from_sizes(sizes: impl IntoIterator- >) -> RusticResult {
+ let mut start = 0;
+ let mut offsets: Vec<_> = sizes
+ .into_iter()
+ .map(|size| -> RusticResult<_> {
+ let starts_at = start;
+ start += size?;
+ Ok(starts_at)
+ })
+ .collect::>()?;
+
+ if !offsets.is_empty() {
+ // offsets is assumed to be partitioned, so we add a starts_at:MAX entry
+ offsets.push(usize::MAX);
+ }
+ Ok(Self(offsets))
+ }
+
+ // compute the correct blobid and effective offset from a file offset
+ fn compute_start(&self, mut offset: usize) -> (usize, usize) {
+ if self.0.is_empty() {
+ return (0, 0);
+ }
+ // find the start of relevant blobs => find the largest index such that self.offsets[i] <= offset, but
+ // self.offsets[i+1] > offset (note that a last dummy element with usize::MAX has been added to ensure we always have two partitions)
+ // If offsets is non-empty, then offsets[0] = 0, hence partition_point returns an index >=1.
+ let i = self.0.partition_point(|o| o <= &offset) - 1;
+ offset -= self.0[i];
+ (i, offset)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ // helper func
+ fn startpoints_from_ok_sizes(sizes: impl IntoIterator
- ) -> ContentStartpoints {
+ ContentStartpoints::from_sizes(sizes.into_iter().map(Ok)).unwrap()
+ }
+
+ #[test]
+ fn content_offsets_empty_sizes() {
+ let offsets = startpoints_from_ok_sizes([]);
+ assert_eq!(offsets.compute_start(0), (0, 0));
+ assert_eq!(offsets.compute_start(42), (0, 0));
+ }
+
+ #[test]
+ fn content_offsets_size() {
+ let offsets = startpoints_from_ok_sizes([15]);
+ assert_eq!(offsets.compute_start(0), (0, 0));
+ assert_eq!(offsets.compute_start(5), (0, 5));
+ assert_eq!(offsets.compute_start(20), (0, 20));
+ }
+ #[test]
+ fn content_offsets_sizes() {
+ let offsets = startpoints_from_ok_sizes([15, 24]);
+ assert_eq!(offsets.compute_start(0), (0, 0));
+ assert_eq!(offsets.compute_start(5), (0, 5));
+ assert_eq!(offsets.compute_start(20), (1, 5));
+ assert_eq!(offsets.compute_start(42), (1, 27));
+ }
+}
diff --git a/crates/core/tests/integration.rs b/crates/core/tests/integration.rs
index 57d5a63e..2356c47a 100644
--- a/crates/core/tests/integration.rs
+++ b/crates/core/tests/integration.rs
@@ -456,11 +456,15 @@ fn test_ls_and_read(
let data = repo.read_file_at(&file, 0, 21)?; // read full content
assert_eq!(Bytes::from("This is a test file.\n"), &data);
- let data2 = repo.read_file_at(&file, 0, 4096)?; // read beyond file end
- assert_eq!(data2, &data);
+ assert_eq!(data, repo.read_file_at(&file, 0, 4096)?); // read beyond file end
assert_eq!(Bytes::new(), repo.read_file_at(&file, 25, 1)?); // offset beyond file end
assert_eq!(Bytes::from("test"), repo.read_file_at(&file, 10, 4)?); // read partial content
+ // test reading an empty file from the repository
+ let path: PathBuf = ["test", "0", "tests", "empty-file"].iter().collect();
+ let node = entries.get(&path).unwrap();
+ let file = repo.open_file(node)?;
+ assert_eq!(Bytes::new(), repo.read_file_at(&file, 0, 0)?); // empty files
Ok(())
}