Skip to content

Commit

Permalink
fix!: Don't panic when reading empty files (#381)
Browse files Browse the repository at this point in the history
There was a bug when using partition_point which in that case does not
gave guarentees about its behavior. Hence the whole logic was refactored
and unit/integration tests were added.

closes rustic-rs/rustic#1380
  • Loading branch information
aawsome authored Dec 10, 2024
1 parent bf6c7d0 commit 70bc278
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 54 deletions.
2 changes: 1 addition & 1 deletion crates/core/src/repository.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1605,7 +1605,7 @@ impl<P, S: IndexedFull> Repository<P, S> {
///
// TODO: Document errors
pub fn open_file(&self, node: &Node) -> RusticResult<OpenFile> {
Ok(OpenFile::from_node(self, node))
OpenFile::from_node(self, node)
}

/// Reads an opened file at the given position
Expand Down
151 changes: 100 additions & 51 deletions crates/core/src/vfs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -455,17 +455,8 @@ impl Vfs {
#[derive(Debug)]
pub struct OpenFile {
// The list of blobs
content: Vec<BlobInfo>,
}

// Information about the blob: 1) The id 2) The cumulated sizes of all blobs prior to this one, a.k.a the starting point of this blob.
#[derive(Debug)]
struct BlobInfo {
// [`Id`] of the blob
id: DataId,

// the start position of this blob within the file
starts_at: usize,
content: Vec<DataId>,
startpoints: ContentStartpoints,
}

impl OpenFile {
Expand All @@ -477,37 +468,32 @@ impl OpenFile {
/// * `node` - The `Node` to create the `OpenFile` for
///
/// # Errors
///
// TODO: Document errors
/// - If the index for the needed data blobs cannot be read
///
/// # Returns
///
/// The created `OpenFile`
///
/// # Panics
///
/// * Panics if the `Node` has no content
pub fn from_node<P, S: IndexedFull>(repo: &Repository<P, S>, node: &Node) -> Self {
let mut start = 0;
let mut content: Vec<_> = node
.content
.as_ref()
.unwrap()
.iter()
.map(|id| {
let starts_at = start;
start += repo.index().get_data(id).unwrap().data_length() as usize;
BlobInfo { id: *id, starts_at }
})
.collect();

// content is assumed to be partitioned, so we add a starts_at:MAX entry
content.push(BlobInfo {
id: DataId::default(),
starts_at: usize::MAX,
});

Self { content }
pub(crate) fn from_node<P, S: IndexedFull>(
repo: &Repository<P, S>,
node: &Node,
) -> RusticResult<Self> {
let content: Vec<_> = node.content.clone().unwrap_or_default();

let startpoints = ContentStartpoints::from_sizes(content.iter().map(|id| {
Ok(repo
.index()
.get_data(id)
.ok_or_else(|| {
RusticError::new(ErrorKind::Vfs, "blob {blob} is not contained in index")
.attach_context("blob", id.to_string())
})?
.data_length() as usize)
}))?;

Ok(Self {
content,
startpoints,
})
}

/// Read the `OpenFile` at the given `offset` from the `repo`.
Expand All @@ -520,7 +506,7 @@ impl OpenFile {
///
/// # Errors
///
// TODO: Document errors
/// - if reading the needed blob(s) from the backend fails
///
/// # Returns
///
Expand All @@ -530,36 +516,99 @@ impl OpenFile {
pub fn read_at<P, S: IndexedFull>(
&self,
repo: &Repository<P, S>,
mut offset: usize,
offset: usize,
mut length: usize,
) -> RusticResult<Bytes> {
// find the start of relevant blobs => find the largest index such that self.content[i].starts_at <= offset, but
// self.content[i+1] > offset (note that a last dummy element has been added)
let mut i = self.content.partition_point(|c| c.starts_at <= offset) - 1;

offset -= self.content[i].starts_at;
let (mut i, mut offset) = self.startpoints.compute_start(offset);

let mut result = BytesMut::with_capacity(length);

while length > 0 && i < self.content.len() - 1 {
let data = repo.get_blob_cached(&BlobId::from(*self.content[i].id), BlobType::Data)?;
// The case of empty node.content is also correctly handled here
while length > 0 && i < self.content.len() {
let data = repo.get_blob_cached(&BlobId::from(self.content[i]), BlobType::Data)?;

if offset > data.len() {
// we cannot read behind the blob. This only happens if offset is too large to fit in the last blob
break;
}

let to_copy = (data.len() - offset).min(length);

result.extend_from_slice(&data[offset..offset + to_copy]);

offset = 0;

length -= to_copy;

i += 1;
}

Ok(result.into())
}
}

// helper struct holding blob startpoints of the content
#[derive(Debug)]
struct ContentStartpoints(Vec<usize>);

impl ContentStartpoints {
fn from_sizes(sizes: impl IntoIterator<Item = RusticResult<usize>>) -> RusticResult<Self> {
let mut start = 0;
let mut offsets: Vec<_> = sizes
.into_iter()
.map(|size| -> RusticResult<_> {
let starts_at = start;
start += size?;
Ok(starts_at)
})
.collect::<RusticResult<_>>()?;

if !offsets.is_empty() {
// offsets is assumed to be partitioned, so we add a starts_at:MAX entry
offsets.push(usize::MAX);
}
Ok(Self(offsets))
}

// compute the correct blobid and effective offset from a file offset
fn compute_start(&self, mut offset: usize) -> (usize, usize) {
if self.0.is_empty() {
return (0, 0);
}
// find the start of relevant blobs => find the largest index such that self.offsets[i] <= offset, but
// self.offsets[i+1] > offset (note that a last dummy element with usize::MAX has been added to ensure we always have two partitions)
// If offsets is non-empty, then offsets[0] = 0, hence partition_point returns an index >=1.
let i = self.0.partition_point(|o| o <= &offset) - 1;
offset -= self.0[i];
(i, offset)
}
}

#[cfg(test)]
mod tests {
use super::*;

// helper func
fn startpoints_from_ok_sizes(sizes: impl IntoIterator<Item = usize>) -> ContentStartpoints {
ContentStartpoints::from_sizes(sizes.into_iter().map(Ok)).unwrap()
}

#[test]
fn content_offsets_empty_sizes() {
let offsets = startpoints_from_ok_sizes([]);
assert_eq!(offsets.compute_start(0), (0, 0));
assert_eq!(offsets.compute_start(42), (0, 0));
}

#[test]
fn content_offsets_size() {
let offsets = startpoints_from_ok_sizes([15]);
assert_eq!(offsets.compute_start(0), (0, 0));
assert_eq!(offsets.compute_start(5), (0, 5));
assert_eq!(offsets.compute_start(20), (0, 20));
}
#[test]
fn content_offsets_sizes() {
let offsets = startpoints_from_ok_sizes([15, 24]);
assert_eq!(offsets.compute_start(0), (0, 0));
assert_eq!(offsets.compute_start(5), (0, 5));
assert_eq!(offsets.compute_start(20), (1, 5));
assert_eq!(offsets.compute_start(42), (1, 27));
}
}
8 changes: 6 additions & 2 deletions crates/core/tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -456,11 +456,15 @@ fn test_ls_and_read(

let data = repo.read_file_at(&file, 0, 21)?; // read full content
assert_eq!(Bytes::from("This is a test file.\n"), &data);
let data2 = repo.read_file_at(&file, 0, 4096)?; // read beyond file end
assert_eq!(data2, &data);
assert_eq!(data, repo.read_file_at(&file, 0, 4096)?); // read beyond file end
assert_eq!(Bytes::new(), repo.read_file_at(&file, 25, 1)?); // offset beyond file end
assert_eq!(Bytes::from("test"), repo.read_file_at(&file, 10, 4)?); // read partial content

// test reading an empty file from the repository
let path: PathBuf = ["test", "0", "tests", "empty-file"].iter().collect();
let node = entries.get(&path).unwrap();
let file = repo.open_file(node)?;
assert_eq!(Bytes::new(), repo.read_file_at(&file, 0, 0)?); // empty files
Ok(())
}

Expand Down

0 comments on commit 70bc278

Please sign in to comment.