From 3baf513bfb4a71b87b431ff597b6a7a467b19567 Mon Sep 17 00:00:00 2001 From: Alexander Weiss Date: Mon, 6 May 2024 21:17:08 +0200 Subject: [PATCH 1/5] check: return error if check fails --- crates/core/src/commands/check.rs | 779 +++++++++++++++++------------- crates/core/src/error.rs | 89 +++- crates/core/src/lib.rs | 2 + crates/core/src/repository.rs | 23 +- 4 files changed, 546 insertions(+), 347 deletions(-) diff --git a/crates/core/src/commands/check.rs b/crates/core/src/commands/check.rs index 87977664..df2c58a2 100644 --- a/crates/core/src/commands/check.rs +++ b/crates/core/src/commands/check.rs @@ -1,5 +1,5 @@ //! `check` subcommand -use std::collections::HashMap; +use std::{collections::HashMap, sync::Mutex}; use bytes::Bytes; use derive_setters::Setters; @@ -12,7 +12,7 @@ use crate::{ backend::{cache::Cache, decrypt::DecryptReadBackend, node::NodeType, FileType, ReadBackend}, blob::{tree::TreeStreamerOnce, BlobType}, crypto::hasher::hash, - error::{RusticErrorKind, RusticResult}, + error::{CheckError, CommandErrorKind, RusticErrorKind, RusticResult}, id::Id, index::{ binarysorted::{IndexCollector, IndexType}, @@ -21,6 +21,7 @@ use crate::{ progress::{Progress, ProgressBars}, repofile::{IndexFile, IndexPack, PackHeader, PackHeaderLength, PackHeaderRef, SnapshotFile}, repository::{Open, Repository}, + LogLevel, }; #[cfg_attr(feature = "clap", derive(clap::Parser))] @@ -37,7 +38,41 @@ pub struct CheckOptions { pub read_data: bool, } -impl CheckOptions { +#[allow(clippy::enum_glob_use)] +use CheckError::*; + +#[derive(Debug)] +pub struct CheckResults { + pub errors: Vec<(LogLevel, CheckError)>, +} + +impl CheckResults { + pub fn is_ok(&self) -> RusticResult<()> { + if self + .errors + .iter() + .any(|(level, _)| level == &LogLevel::Error) + { + return Err(CommandErrorKind::CheckFoundErrors.into()); + } + Ok(()) + } +} + +#[derive(Default)] +pub struct CheckResultsCollector { + log: bool, + errors: Mutex>, +} + +impl CheckResultsCollector { + pub(crate) fn log(self, log: bool) -> Self { + Self { + log, + errors: self.errors, + } + } + /// Runs the `check` command /// /// # Type Parameters @@ -52,13 +87,17 @@ impl CheckOptions { /// # Errors /// /// If the repository is corrupted - pub(crate) fn run(self, repo: &Repository) -> RusticResult<()> { + pub(crate) fn check( + &self, + opts: CheckOptions, + repo: &Repository, + ) -> RusticResult<()> { let be = repo.dbe(); let cache = repo.cache(); let hot_be = &repo.be_hot; let raw_be = repo.dbe(); let pb = &repo.pb; - if !self.trust_cache { + if !opts.trust_cache { if let Some(cache) = &cache { for file_type in [FileType::Snapshot, FileType::Index] { // list files in order to clean up the cache @@ -71,18 +110,18 @@ impl CheckOptions { let p = pb.progress_bytes(format!("checking {file_type:?} in cache...")); // TODO: Make concurrency (20) customizable - check_cache_files(20, cache, raw_be, file_type, &p)?; + self.check_cache_files(20, cache, raw_be, file_type, &p)?; } } } if let Some(hot_be) = hot_be { for file_type in [FileType::Snapshot, FileType::Index] { - check_hot_files(raw_be, hot_be, file_type, pb)?; + self.check_hot_files(raw_be, hot_be, file_type, pb)?; } } - let index_collector = check_packs(be, hot_be, self.read_data, pb)?; + let index_collector = self.check_packs(be, hot_be, opts.read_data, pb)?; if let Some(cache) = &cache { let p = pb.progress_spinner("cleaning up packs from cache..."); @@ -92,10 +131,10 @@ impl CheckOptions { } p.finish(); - if !self.trust_cache { + if !opts.trust_cache { let p = pb.progress_bytes("checking packs in cache..."); // TODO: Make concurrency (5) customizable - check_cache_files(5, cache, raw_be, FileType::Pack, &p)?; + self.check_cache_files(5, cache, raw_be, FileType::Pack, &p)?; } } @@ -112,9 +151,9 @@ impl CheckOptions { let index_be = GlobalIndex::new_from_index(index_collector.into_index()); - check_snapshots(be, &index_be, pb)?; + self.check_snapshots(be, &index_be, pb)?; - if self.read_data { + if opts.read_data { let p = pb.progress_bytes("reading pack data..."); p.set_length(total_pack_size); @@ -125,391 +164,445 @@ impl CheckOptions { .for_each(|pack| { let id = pack.id; let data = be.read_full(FileType::Pack, &id).unwrap(); - match check_pack(be, pack, data, &p) { + match self.check_pack(be, pack, data, &p) { Ok(()) => {} - Err(err) => error!("Error reading pack {id} : {err}",), + Err(err) => self.add_error(ErrorReadingPack { id, err }), } }); p.finish(); } Ok(()) } -} -/// Checks if all files in the backend are also in the hot backend -/// -/// # Arguments -/// -/// * `be` - The backend to check -/// * `be_hot` - The hot backend to check -/// * `file_type` - The type of the files to check -/// * `pb` - The progress bar to use -/// -/// # Errors -/// -/// If a file is missing or has a different size -fn check_hot_files( - be: &impl ReadBackend, - be_hot: &impl ReadBackend, - file_type: FileType, - pb: &impl ProgressBars, -) -> RusticResult<()> { - let p = pb.progress_spinner(format!("checking {file_type:?} in hot repo...")); - let mut files = be - .list_with_size(file_type) - .map_err(RusticErrorKind::Backend)? - .into_iter() - .collect::>(); - - let files_hot = be_hot - .list_with_size(file_type) - .map_err(RusticErrorKind::Backend)?; - - for (id, size_hot) in files_hot { - match files.remove(&id) { - None => error!("hot file Type: {file_type:?}, Id: {id} does not exist in repo"), - Some(size) if size != size_hot => { - // TODO: This should be an actual error not a log entry - error!("Type: {file_type:?}, Id: {id}: hot size: {size_hot}, actual size: {size}"); - } - _ => {} //everything ok + fn add_error(&self, err: CheckError) { + if self.log { + error!("{err}"); } + self.errors.lock().unwrap().push((LogLevel::Error, err)); } - for (id, _) in files { - error!("hot file Type: {file_type:?}, Id: {id} is missing!",); + fn add_warn(&self, err: CheckError) { + if self.log { + warn!("{err}"); + } + self.errors.lock().unwrap().push((LogLevel::Warn, err)); } - p.finish(); - Ok(()) -} + /// Checks if all files in the backend are also in the hot backend + /// + /// # Arguments + /// + /// * `be` - The backend to check + /// * `be_hot` - The hot backend to check + /// * `file_type` - The type of the files to check + /// * `pb` - The progress bar to use + /// + /// # Errors + /// + /// If a file is missing or has a different size + fn check_hot_files( + &self, + be: &impl ReadBackend, + be_hot: &impl ReadBackend, + file_type: FileType, + pb: &impl ProgressBars, + ) -> RusticResult<()> { + let p = pb.progress_spinner(format!("checking {file_type:?} in hot repo...")); + let mut files = be + .list_with_size(file_type) + .map_err(RusticErrorKind::Backend)? + .into_iter() + .collect::>(); + + let files_hot = be_hot + .list_with_size(file_type) + .map_err(RusticErrorKind::Backend)?; + + for (id, size_hot) in files_hot { + match files.remove(&id) { + None => self.add_error(NoColdFile { id, file_type }), + Some(size) if size != size_hot => self.add_error(HotFileSizeMismatch { + id, + file_type, + size_hot, + size, + }), + _ => {} //everything ok + } + } + + for (id, _) in files { + self.add_error(NoHotFile { id, file_type }); + } + p.finish(); -/// Checks if all files in the cache are also in the backend -/// -/// # Arguments -/// -/// * `concurrency` - The number of threads to use -/// * `cache` - The cache to check -/// * `be` - The backend to check -/// * `file_type` - The type of the files to check -/// * `p` - The progress bar to use -/// -/// # Errors -/// -/// If a file is missing or has a different size -fn check_cache_files( - _concurrency: usize, - cache: &Cache, - be: &impl ReadBackend, - file_type: FileType, - p: &impl Progress, -) -> RusticResult<()> { - let files = cache.list_with_size(file_type)?; - - if files.is_empty() { - return Ok(()); + Ok(()) } - let total_size = files.values().map(|size| u64::from(*size)).sum(); - p.set_length(total_size); - - files - .into_par_iter() - .for_each_with((cache, be, p.clone()), |(cache, be, p), (id, size)| { - // Read file from cache and from backend and compare - match ( - cache.read_full(file_type, &id), - be.read_full(file_type, &id), - ) { - (Err(err), _) => { - error!("Error reading cached file Type: {file_type:?}, Id: {id} : {err}"); - } - (_, Err(err)) => { - error!("Error reading file Type: {file_type:?}, Id: {id} : {err}"); - } - (Ok(Some(data_cached)), Ok(data)) if data_cached != data => { - error!( - "Cached file Type: {file_type:?}, Id: {id} is not identical to backend!" - ); + /// Checks if all files in the cache are also in the backend + /// + /// # Arguments + /// + /// * `concurrency` - The number of threads to use + /// * `cache` - The cache to check + /// * `be` - The backend to check + /// * `file_type` - The type of the files to check + /// * `p` - The progress bar to use + /// + /// # Errors + /// + /// If a file is missing or has a different size + fn check_cache_files( + &self, + _concurrency: usize, + cache: &Cache, + be: &impl ReadBackend, + file_type: FileType, + p: &impl Progress, + ) -> RusticResult<()> { + let files = cache.list_with_size(file_type)?; + + if files.is_empty() { + return Ok(()); + } + + let total_size = files.values().map(|size| u64::from(*size)).sum(); + p.set_length(total_size); + + files.into_par_iter().for_each_with( + (cache, be, p.clone()), + |(cache, be, p), (id, size)| { + // Read file from cache and from backend and compare + match ( + cache.read_full(file_type, &id), + be.read_full(file_type, &id), + ) { + (Err(err), _) => self.add_error(ErrorReadingCache { + id, + file_type, + err: Box::new(err), + }), + (_, Err(err)) => self.add_error(ErrorReadingFile { + id, + file_type, + err: Box::new(RusticErrorKind::Backend(err).into()), + }), + (Ok(Some(data_cached)), Ok(data)) if data_cached != data => { + self.add_error(CacheMismatch { id, file_type }); + } + (Ok(_), Ok(_)) => {} // everything ok } - (Ok(_), Ok(_)) => {} // everything ok - } - p.inc(u64::from(size)); - }); + p.inc(u64::from(size)); + }, + ); - p.finish(); - Ok(()) -} + p.finish(); + Ok(()) + } -/// Check if packs correspond to index and are present in the backend -/// -/// # Arguments -/// -/// * `be` - The backend to check -/// * `hot_be` - The hot backend to check -/// * `read_data` - Whether to read the data of the packs -/// * `pb` - The progress bar to use -/// -/// # Errors -/// -/// If a pack is missing or has a different size -/// -/// # Returns -/// -/// The index collector -fn check_packs( - be: &impl DecryptReadBackend, - hot_be: &Option, - read_data: bool, - pb: &impl ProgressBars, -) -> RusticResult { - let mut packs = HashMap::new(); - let mut tree_packs = HashMap::new(); - let mut index_collector = IndexCollector::new(if read_data { - IndexType::Full - } else { - IndexType::DataIds - }); - - let p = pb.progress_counter("reading index..."); - for index in be.stream_all::(&p)? { - let index = index?.1; - index_collector.extend(index.packs.clone()); - for (p, to_delete) in index.all_packs() { - let check_time = to_delete; // Check if time is set for packs marked to delete - let blob_type = p.blob_type(); - let pack_size = p.pack_size(); - _ = packs.insert(p.id, pack_size); - if hot_be.is_some() && blob_type == BlobType::Tree { - _ = tree_packs.insert(p.id, pack_size); - } + /// Check if packs correspond to index and are present in the backend + /// + /// # Arguments + /// + /// * `be` - The backend to check + /// * `hot_be` - The hot backend to check + /// * `read_data` - Whether to read the data of the packs + /// * `pb` - The progress bar to use + /// + /// # Errors + /// + /// If a pack is missing or has a different size + /// + /// # Returns + /// + /// The index collector + fn check_packs( + &self, + be: &impl DecryptReadBackend, + hot_be: &Option, + read_data: bool, + pb: &impl ProgressBars, + ) -> RusticResult { + let mut packs = HashMap::new(); + let mut tree_packs = HashMap::new(); + let mut index_collector = IndexCollector::new(if read_data { + IndexType::Full + } else { + IndexType::DataIds + }); - // Check if time is set _ - if check_time && p.time.is_none() { - error!("pack {}: No time is set! Run prune to correct this!", p.id); - } + let p = pb.progress_counter("reading index..."); + for index in be.stream_all::(&p)? { + let index = index?.1; + index_collector.extend(index.packs.clone()); + for (p, to_delete) in index.all_packs() { + let check_time = to_delete; // Check if time is set for packs marked to delete + let blob_type = p.blob_type(); + let pack_size = p.pack_size(); + _ = packs.insert(p.id, pack_size); + if hot_be.is_some() && blob_type == BlobType::Tree { + _ = tree_packs.insert(p.id, pack_size); + } - // check offsests in index - let mut expected_offset: u32 = 0; - let mut blobs = p.blobs; - blobs.sort_unstable(); - for blob in blobs { - if blob.tpe != blob_type { - error!( - "pack {}: blob {} blob type does not match: type: {:?}, expected: {:?}", - p.id, blob.id, blob.tpe, blob_type - ); + // Check if time is set _ + if check_time && p.time.is_none() { + self.add_error(PackTimeNotSet { id: p.id }); } - if blob.offset != expected_offset { - error!( - "pack {}: blob {} offset in index: {}, expected: {}", - p.id, blob.id, blob.offset, expected_offset - ); + // check offsests in index + let mut expected_offset: u32 = 0; + let mut blobs = p.blobs; + blobs.sort_unstable(); + for blob in blobs { + if blob.tpe != blob_type { + self.add_error(PackBlobTypesMismatch { + id: p.id, + blob_id: blob.id, + blob_type: blob.tpe, + expected: blob_type, + }); + } + + if blob.offset != expected_offset { + self.add_error(PackBlobOffsetMismatch { + id: p.id, + blob_id: blob.id, + offset: blob.offset, + expected: expected_offset, + }); + } + expected_offset += blob.length; } - expected_offset += blob.length; } } - } - p.finish(); - - if let Some(hot_be) = hot_be { - let p = pb.progress_spinner("listing packs in hot repo..."); - check_packs_list(hot_be, tree_packs)?; p.finish(); - } - let p = pb.progress_spinner("listing packs..."); - check_packs_list(be, packs)?; - p.finish(); + if let Some(hot_be) = hot_be { + let p = pb.progress_spinner("listing packs in hot repo..."); + self.check_packs_list(hot_be, tree_packs)?; + p.finish(); + } - Ok(index_collector) -} + let p = pb.progress_spinner("listing packs..."); + self.check_packs_list(be, packs)?; + p.finish(); + + Ok(index_collector) + } -// TODO: Add documentation -/// Checks if all packs in the backend are also in the index -/// -/// # Arguments -/// -/// * `be` - The backend to check -/// * `packs` - The packs to check -/// -/// # Errors -/// -/// If a pack is missing or has a different size -fn check_packs_list(be: &impl ReadBackend, mut packs: HashMap) -> RusticResult<()> { - for (id, size) in be - .list_with_size(FileType::Pack) - .map_err(RusticErrorKind::Backend)? - { - match packs.remove(&id) { - None => warn!("pack {id} not referenced in index. Can be a parallel backup job. To repair: 'rustic repair index'."), - Some(index_size) if index_size != size => { - error!("pack {id}: size computed by index: {index_size}, actual size: {size}. To repair: 'rustic repair index'."); + // TODO: Add documentation + /// Checks if all packs in the backend are also in the index + /// + /// # Arguments + /// + /// * `be` - The backend to check + /// * `packs` - The packs to check + /// + /// # Errors + /// + /// If a pack is missing or has a different size + fn check_packs_list( + &self, + be: &impl ReadBackend, + mut packs: HashMap, + ) -> RusticResult<()> { + for (id, size) in be + .list_with_size(FileType::Pack) + .map_err(RusticErrorKind::Backend)? + { + match packs.remove(&id) { + None => self.add_warn(PackNotReferenced { id }), + Some(index_size) if index_size != size => self.add_error(PackSizeMismatchIndex { + id, + index_size, + size, + }), + _ => {} //everything ok } - _ => {} //everything ok } - } - for (id, _) in packs { - error!("pack {id} is referenced by the index but not present! To repair: 'rustic repair index'.",); + for (id, _) in packs { + self.add_error(NoPack { id }); + } + Ok(()) } - Ok(()) -} -/// Check if all snapshots and contained trees can be loaded and contents exist in the index -/// -/// # Arguments -/// -/// * `index` - The index to check -/// * `pb` - The progress bar to use -/// -/// # Errors -/// -/// If a snapshot or tree is missing or has a different size -fn check_snapshots( - be: &impl DecryptReadBackend, - index: &impl ReadGlobalIndex, - pb: &impl ProgressBars, -) -> RusticResult<()> { - let p = pb.progress_counter("reading snapshots..."); - let snap_trees: Vec<_> = be - .stream_all::(&p)? - .iter() - .map_ok(|(_, snap)| snap.tree) - .try_collect()?; - p.finish(); - - let p = pb.progress_counter("checking trees..."); - let mut tree_streamer = TreeStreamerOnce::new(be, index, snap_trees, p)?; - while let Some(item) = tree_streamer.next().transpose()? { - let (path, tree) = item; - for node in tree.nodes { - match node.node_type { - NodeType::File => node.content.as_ref().map_or_else( - || { - error!("file {:?} doesn't have a content", path.join(node.name())); - }, - |content| { - for (i, id) in content.iter().enumerate() { - if id.is_null() { - error!("file {:?} blob {} has null ID", path.join(node.name()), i); - } + /// Check if all snapshots and contained trees can be loaded and contents exist in the index + /// + /// # Arguments + /// + /// * `index` - The index to check + /// * `pb` - The progress bar to use + /// + /// # Errors + /// + /// If a snapshot or tree is missing or has a different size + fn check_snapshots( + &self, + be: &impl DecryptReadBackend, + index: &impl ReadGlobalIndex, + pb: &impl ProgressBars, + ) -> RusticResult<()> { + let p = pb.progress_counter("reading snapshots..."); + let snap_trees: Vec<_> = be + .stream_all::(&p)? + .iter() + .map_ok(|(_, snap)| snap.tree) + .try_collect()?; + p.finish(); - if !index.has_data(id) { - error!( - "file {:?} blob {} is missing in index", - path.join(node.name()), - id - ); + let p = pb.progress_counter("checking trees..."); + let mut tree_streamer = TreeStreamerOnce::new(be, index, snap_trees, p)?; + while let Some(item) = tree_streamer.next().transpose()? { + let (path, tree) = item; + for node in tree.nodes { + match node.node_type { + NodeType::File => node.content.as_ref().map_or_else( + || { + self.add_error(FileHasNoContent { + file: path.join(node.name()), + }); + }, + |content| { + for (i, id) in content.iter().enumerate() { + if id.is_null() { + self.add_error(FileBlobHasNullId { + file: path.join(node.name()), + blob_num: i, + }); + } + + if !index.has_data(id) { + self.add_error(FileBlobNotInIndex { + file: path.join(node.name()), + blob_id: *id, + }); + } } + }, + ), + + NodeType::Dir => { + match node.subtree { + None => self.add_error(NoSubTree { + dir: path.join(node.name()), + }), + Some(tree) if tree.is_null() => self.add_error(NullSubTree { + dir: path.join(node.name()), + }), + _ => {} // subtree is ok } - }, - ), - - NodeType::Dir => { - match node.subtree { - None => { - error!("dir {:?} subtree does not exist", path.join(node.name())); - } - Some(tree) if tree.is_null() => { - error!("dir {:?} subtree has null ID", path.join(node.name())); - } - _ => {} // subtree is ok } + _ => {} // nothing to check } - - _ => {} // nothing to check } } + + Ok(()) } - Ok(()) -} + /// Check if a pack is valid + /// + /// # Arguments + /// + /// * `be` - The backend to use + /// * `index_pack` - The pack to check + /// * `data` - The data of the pack + /// * `p` - The progress bar to use + /// + /// # Errors + /// + /// If the pack is invalid + /// + /// # Panics + /// + /// If zstd decompression fails. + fn check_pack( + &self, + be: &impl DecryptReadBackend, + index_pack: IndexPack, + mut data: Bytes, + p: &impl Progress, + ) -> RusticResult<()> { + let id = index_pack.id; + let size = index_pack.pack_size(); + if data.len() != size as usize { + self.add_error(PackSizeMismatch { + id, + size: data.len(), + expected: size as usize, + }); + return Ok(()); + } -/// Check if a pack is valid -/// -/// # Arguments -/// -/// * `be` - The backend to use -/// * `index_pack` - The pack to check -/// * `data` - The data of the pack -/// * `p` - The progress bar to use -/// -/// # Errors -/// -/// If the pack is invalid -/// -/// # Panics -/// -/// If zstd decompression fails. -fn check_pack( - be: &impl DecryptReadBackend, - index_pack: IndexPack, - mut data: Bytes, - p: &impl Progress, -) -> RusticResult<()> { - let id = index_pack.id; - let size = index_pack.pack_size(); - if data.len() != size as usize { - error!( - "pack {id}: data size does not match expected size. Read: {} bytes, expected: {size} bytes", - data.len() - ); - return Ok(()); - } + let computed = hash(&data); + if id != computed { + self.add_error(PackHashMismatch { id, computed }); + return Ok(()); + } - let comp_id = hash(&data); - if id != comp_id { - error!("pack {id}: Hash mismatch. Computed hash: {comp_id}"); - return Ok(()); - } + // check header length + let header_len = PackHeaderRef::from_index_pack(&index_pack).size(); + let pack_header_len = + PackHeaderLength::from_binary(&data.split_off(data.len() - 4))?.to_u32(); + if pack_header_len != header_len { + self.add_error(PackHeaderLengthMismatch { + id, + length: pack_header_len, + computed: header_len, + }); + return Ok(()); + } - // check header length - let header_len = PackHeaderRef::from_index_pack(&index_pack).size(); - let pack_header_len = PackHeaderLength::from_binary(&data.split_off(data.len() - 4))?.to_u32(); - if pack_header_len != header_len { - error!("pack {id}: Header length in pack file doesn't match index. In pack: {pack_header_len}, calculated: {header_len}"); - return Ok(()); - } + // check header + let header = be.decrypt(&data.split_off(data.len() - header_len as usize))?; - // check header - let header = be.decrypt(&data.split_off(data.len() - header_len as usize))?; - - let pack_blobs = PackHeader::from_binary(&header)?.into_blobs(); - let mut blobs = index_pack.blobs; - blobs.sort_unstable_by_key(|b| b.offset); - if pack_blobs != blobs { - error!("pack {id}: Header from pack file does not match the index"); - debug!("pack file header: {pack_blobs:?}"); - debug!("index: {:?}", blobs); - return Ok(()); - } - p.inc(u64::from(header_len) + 4); - - // check blobs - for blob in blobs { - let blob_id = blob.id; - let mut blob_data = be.decrypt(&data.split_to(blob.length as usize))?; - - // TODO: this is identical to backend/decrypt.rs; unify these two parts! - if let Some(length) = blob.uncompressed_length { - blob_data = decode_all(&*blob_data).unwrap(); - if blob_data.len() != length.get() as usize { - error!("pack {id}, blob {blob_id}: Actual uncompressed length does not fit saved uncompressed length"); + let pack_blobs = PackHeader::from_binary(&header)?.into_blobs(); + let mut blobs = index_pack.blobs; + blobs.sort_unstable_by_key(|b| b.offset); + if pack_blobs != blobs { + self.add_error(PackHeaderMismatchIndex { id }); + debug!("pack file header: {pack_blobs:?}"); + debug!("index: {:?}", blobs); + return Ok(()); + } + p.inc(u64::from(header_len) + 4); + + // check blobs + for blob in blobs { + let blob_id = blob.id; + let mut blob_data = be.decrypt(&data.split_to(blob.length as usize))?; + + // TODO: this is identical to backend/decrypt.rs; unify these two parts! + if let Some(length) = blob.uncompressed_length { + blob_data = decode_all(&*blob_data).unwrap(); + if blob_data.len() != length.get() as usize { + self.add_error(PackBlobLengthMismatch { id, blob_id }); + return Ok(()); + } + } + + let computed = hash(&blob_data); + if blob.id != computed { + self.add_error(PackBlobHashMismatch { + id, + blob_id, + computed, + }); return Ok(()); } + p.inc(blob.length.into()); } - let comp_id = hash(&blob_data); - if blob.id != comp_id { - error!("pack {id}, blob {blob_id}: Hash mismatch. Computed hash: {comp_id}"); - return Ok(()); - } - p.inc(blob.length.into()); + Ok(()) } - Ok(()) + // turn collected results into `CheckResults` + pub(crate) fn into_check_results(self) -> CheckResults { + CheckResults { + errors: self.errors.into_inner().unwrap(), + } + } } diff --git a/crates/core/src/error.rs b/crates/core/src/error.rs index 57ecda26..c70d7fce 100644 --- a/crates/core/src/error.rs +++ b/crates/core/src/error.rs @@ -22,7 +22,12 @@ use chrono::OutOfRangeError; use displaydoc::Display; use thiserror::Error; -use crate::{backend::node::NodeType, id::Id, repofile::indexfile::IndexPack}; +use crate::{ + backend::node::NodeType, + id::Id, + repofile::{indexfile::IndexPack, BlobType}, + FileType, +}; /// Result type that is being returned from methods that can fail and thus have [`RusticError`]s. pub type RusticResult = Result; @@ -218,6 +223,8 @@ pub enum CommandErrorKind { ConversionFromIntFailed(TryFromIntError), /// {0} is not allowed on an append-only repository NotAllowedWithAppendOnly(String), + /// check found errors in the repository + CheckFoundErrors, } /// [`CryptoErrorKind`] describes the errors that can happen while dealing with Cryptographic functions @@ -790,3 +797,83 @@ where Self(RusticErrorKind::from(value)) } } + +#[non_exhaustive] +#[derive(Error, Debug, Display)] +pub enum CheckError { + /// error reading pack {id} : {err} + ErrorReadingPack { id: Id, err: RusticError }, + /// cold file for hot file Type: {file_type:?}, Id: {id} does not exist + NoColdFile { id: Id, file_type: FileType }, + /// Type: {file_type:?}, Id: {id}: hot size: {size_hot}, actual size: {size} + HotFileSizeMismatch { + id: Id, + file_type: FileType, + size_hot: u32, + size: u32, + }, + /// hot file Type: {file_type:?}, Id: {id} is missing! + NoHotFile { id: Id, file_type: FileType }, + /// Error reading cached file Type: {file_type:?}, Id: {id} : {err} + ErrorReadingCache { + id: Id, + file_type: FileType, + err: Box, + }, + /// Error reading file Type: {file_type:?}, Id: {id} : {err} + ErrorReadingFile { + id: Id, + file_type: FileType, + err: Box, + }, + /// Cached file Type: {file_type:?}, Id: {id} is not identical to backend! + CacheMismatch { id: Id, file_type: FileType }, + /// pack {id}: No time is set! Run prune to correct this! + PackTimeNotSet { id: Id }, + /// pack {id}: blob {blob_id} blob type does not match: type: {blob_type:?}, expected: {expected:?} + PackBlobTypesMismatch { + id: Id, + blob_id: Id, + blob_type: BlobType, + expected: BlobType, + }, + /// pack {id}: blob {blob_id} offset in index: {offset}, expected: {expected} + PackBlobOffsetMismatch { + id: Id, + blob_id: Id, + offset: u32, + expected: u32, + }, + /// pack {id} not referenced in index. Can be a parallel backup job. To repair: 'rustic repair index'. + PackNotReferenced { id: Id }, + /// pack {id}: size computed by index: {index_size}, actual size: {size}. To repair: 'rustic repair index'. + PackSizeMismatchIndex { id: Id, index_size: u32, size: u32 }, + /// pack {id} is referenced by the index but not present! To repair: 'rustic repair index'." + NoPack { id: Id }, + /// file {file:?} doesn't have a content + FileHasNoContent { file: PathBuf }, + /// file {file:?} blob {blob_num} has null ID + FileBlobHasNullId { file: PathBuf, blob_num: usize }, + /// file {file:?} blob {blob_id} is missing in index + FileBlobNotInIndex { file: PathBuf, blob_id: Id }, + /// dir {dir:?} doesn't have a subtree + NoSubTree { dir: PathBuf }, + /// "dir {dir:?} subtree has null ID + NullSubTree { dir: PathBuf }, + /// pack {id}: data size does not match expected size. Read: {size} bytes, expected: {expected} bytes + PackSizeMismatch { + id: Id, + size: usize, + expected: usize, + }, + /// pack {id}: Hash mismatch. Computed hash: {computed} + PackHashMismatch { id: Id, computed: Id }, + /// pack {id}: Header length in pack file doesn't match index. In pack: {length}, computed: {computed} + PackHeaderLengthMismatch { id: Id, length: u32, computed: u32 }, + /// pack {id}: Header from pack file does not match the index + PackHeaderMismatchIndex { id: Id }, + /// pack {id}, blob {blob_id}: Actual uncompressed length does not fit saved uncompressed length + PackBlobLengthMismatch { id: Id, blob_id: Id }, + /// pack {id}, blob {blob_id}: Hash mismatch. Computed hash: {computed} + PackBlobHashMismatch { id: Id, blob_id: Id, computed: Id }, +} diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 5df9ccff..45b7f74c 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -119,6 +119,8 @@ pub(crate) mod repository; /// Virtual File System support - allows to act on the repository like on a file system pub mod vfs; +pub use log::Level as LogLevel; + // rustic_core Public API pub use crate::{ backend::{ diff --git a/crates/core/src/repository.rs b/crates/core/src/repository.rs index dbf71528..5b4a4f01 100644 --- a/crates/core/src/repository.rs +++ b/crates/core/src/repository.rs @@ -31,7 +31,7 @@ use crate::{ commands::{ self, backup::BackupOptions, - check::CheckOptions, + check::{CheckOptions, CheckResults, CheckResultsCollector}, config::ConfigOptions, copy::CopySnapshot, forget::{ForgetGroups, KeepOptions}, @@ -57,7 +57,7 @@ use crate::{ snapshotfile::{SnapshotGroup, SnapshotGroupCriterion}, ConfigFile, PathList, RepoFile, SnapshotFile, SnapshotSummary, Tree, }, - repository::{warm_up::warm_up, warm_up::warm_up_wait}, + repository::warm_up::{warm_up, warm_up_wait}, vfs::OpenFile, RepositoryBackends, RusticResult, }; @@ -1065,7 +1065,24 @@ impl Repository { /// // TODO: Document errors pub fn check(&self, opts: CheckOptions) -> RusticResult<()> { - opts.run(self) + let results = CheckResultsCollector::default().log(true); + results.check(opts, self)?; + results.into_check_results().is_ok() + } + + /// Check the repository for errors or inconsistencies + /// + /// # Arguments + /// + /// * `opts` - The options to use + /// + /// # Errors + /// + // TODO: Document errors + pub fn check_results(&self, opts: CheckOptions) -> RusticResult { + let results = CheckResultsCollector::default(); + results.check(opts, self)?; + Ok(results.into_check_results()) } /// Get the plan about what should be pruned and/or repacked. From 2336888cecd8d0d18b6757e7f223d3897c37d42d Mon Sep 17 00:00:00 2001 From: Alexander Weiss Date: Tue, 7 May 2024 15:55:07 +0200 Subject: [PATCH 2/5] don't use glob imports --- crates/core/src/commands/check.rs | 67 ++++++++++++++++--------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/crates/core/src/commands/check.rs b/crates/core/src/commands/check.rs index df2c58a2..fd0b87c6 100644 --- a/crates/core/src/commands/check.rs +++ b/crates/core/src/commands/check.rs @@ -38,9 +38,6 @@ pub struct CheckOptions { pub read_data: bool, } -#[allow(clippy::enum_glob_use)] -use CheckError::*; - #[derive(Debug)] pub struct CheckResults { pub errors: Vec<(LogLevel, CheckError)>, @@ -166,7 +163,7 @@ impl CheckResultsCollector { let data = be.read_full(FileType::Pack, &id).unwrap(); match self.check_pack(be, pack, data, &p) { Ok(()) => {} - Err(err) => self.add_error(ErrorReadingPack { id, err }), + Err(err) => self.add_error(CheckError::ErrorReadingPack { id, err }), } }); p.finish(); @@ -220,8 +217,8 @@ impl CheckResultsCollector { for (id, size_hot) in files_hot { match files.remove(&id) { - None => self.add_error(NoColdFile { id, file_type }), - Some(size) if size != size_hot => self.add_error(HotFileSizeMismatch { + None => self.add_error(CheckError::NoColdFile { id, file_type }), + Some(size) if size != size_hot => self.add_error(CheckError::HotFileSizeMismatch { id, file_type, size_hot, @@ -232,7 +229,7 @@ impl CheckResultsCollector { } for (id, _) in files { - self.add_error(NoHotFile { id, file_type }); + self.add_error(CheckError::NoHotFile { id, file_type }); } p.finish(); @@ -277,18 +274,18 @@ impl CheckResultsCollector { cache.read_full(file_type, &id), be.read_full(file_type, &id), ) { - (Err(err), _) => self.add_error(ErrorReadingCache { + (Err(err), _) => self.add_error(CheckError::ErrorReadingCache { id, file_type, err: Box::new(err), }), - (_, Err(err)) => self.add_error(ErrorReadingFile { + (_, Err(err)) => self.add_error(CheckError::ErrorReadingFile { id, file_type, err: Box::new(RusticErrorKind::Backend(err).into()), }), (Ok(Some(data_cached)), Ok(data)) if data_cached != data => { - self.add_error(CacheMismatch { id, file_type }); + self.add_error(CheckError::CacheMismatch { id, file_type }); } (Ok(_), Ok(_)) => {} // everything ok } @@ -347,7 +344,7 @@ impl CheckResultsCollector { // Check if time is set _ if check_time && p.time.is_none() { - self.add_error(PackTimeNotSet { id: p.id }); + self.add_error(CheckError::PackTimeNotSet { id: p.id }); } // check offsests in index @@ -356,7 +353,7 @@ impl CheckResultsCollector { blobs.sort_unstable(); for blob in blobs { if blob.tpe != blob_type { - self.add_error(PackBlobTypesMismatch { + self.add_error(CheckError::PackBlobTypesMismatch { id: p.id, blob_id: blob.id, blob_type: blob.tpe, @@ -365,7 +362,7 @@ impl CheckResultsCollector { } if blob.offset != expected_offset { - self.add_error(PackBlobOffsetMismatch { + self.add_error(CheckError::PackBlobOffsetMismatch { id: p.id, blob_id: blob.id, offset: blob.offset, @@ -413,18 +410,20 @@ impl CheckResultsCollector { .map_err(RusticErrorKind::Backend)? { match packs.remove(&id) { - None => self.add_warn(PackNotReferenced { id }), - Some(index_size) if index_size != size => self.add_error(PackSizeMismatchIndex { - id, - index_size, - size, - }), + None => self.add_warn(CheckError::PackNotReferenced { id }), + Some(index_size) if index_size != size => { + self.add_error(CheckError::PackSizeMismatchIndex { + id, + index_size, + size, + }); + } _ => {} //everything ok } } for (id, _) in packs { - self.add_error(NoPack { id }); + self.add_error(CheckError::NoPack { id }); } Ok(()) } @@ -461,21 +460,21 @@ impl CheckResultsCollector { match node.node_type { NodeType::File => node.content.as_ref().map_or_else( || { - self.add_error(FileHasNoContent { + self.add_error(CheckError::FileHasNoContent { file: path.join(node.name()), }); }, |content| { for (i, id) in content.iter().enumerate() { if id.is_null() { - self.add_error(FileBlobHasNullId { + self.add_error(CheckError::FileBlobHasNullId { file: path.join(node.name()), blob_num: i, }); } if !index.has_data(id) { - self.add_error(FileBlobNotInIndex { + self.add_error(CheckError::FileBlobNotInIndex { file: path.join(node.name()), blob_id: *id, }); @@ -486,12 +485,14 @@ impl CheckResultsCollector { NodeType::Dir => { match node.subtree { - None => self.add_error(NoSubTree { - dir: path.join(node.name()), - }), - Some(tree) if tree.is_null() => self.add_error(NullSubTree { + None => self.add_error(CheckError::NoSubTree { dir: path.join(node.name()), }), + Some(tree) if tree.is_null() => { + self.add_error(CheckError::NullSubTree { + dir: path.join(node.name()), + }); + } _ => {} // subtree is ok } } @@ -529,7 +530,7 @@ impl CheckResultsCollector { let id = index_pack.id; let size = index_pack.pack_size(); if data.len() != size as usize { - self.add_error(PackSizeMismatch { + self.add_error(CheckError::PackSizeMismatch { id, size: data.len(), expected: size as usize, @@ -539,7 +540,7 @@ impl CheckResultsCollector { let computed = hash(&data); if id != computed { - self.add_error(PackHashMismatch { id, computed }); + self.add_error(CheckError::PackHashMismatch { id, computed }); return Ok(()); } @@ -548,7 +549,7 @@ impl CheckResultsCollector { let pack_header_len = PackHeaderLength::from_binary(&data.split_off(data.len() - 4))?.to_u32(); if pack_header_len != header_len { - self.add_error(PackHeaderLengthMismatch { + self.add_error(CheckError::PackHeaderLengthMismatch { id, length: pack_header_len, computed: header_len, @@ -563,7 +564,7 @@ impl CheckResultsCollector { let mut blobs = index_pack.blobs; blobs.sort_unstable_by_key(|b| b.offset); if pack_blobs != blobs { - self.add_error(PackHeaderMismatchIndex { id }); + self.add_error(CheckError::PackHeaderMismatchIndex { id }); debug!("pack file header: {pack_blobs:?}"); debug!("index: {:?}", blobs); return Ok(()); @@ -579,14 +580,14 @@ impl CheckResultsCollector { if let Some(length) = blob.uncompressed_length { blob_data = decode_all(&*blob_data).unwrap(); if blob_data.len() != length.get() as usize { - self.add_error(PackBlobLengthMismatch { id, blob_id }); + self.add_error(CheckError::PackBlobLengthMismatch { id, blob_id }); return Ok(()); } } let computed = hash(&blob_data); if blob.id != computed { - self.add_error(PackBlobHashMismatch { + self.add_error(CheckError::PackBlobHashMismatch { id, blob_id, computed, From 85633055e653344691fe9b7536b5fa0ff70118ce Mon Sep 17 00:00:00 2001 From: Alexander Weiss Date: Tue, 14 May 2024 13:56:19 +0200 Subject: [PATCH 3/5] use Box --- crates/core/src/commands/check.rs | 5 ++++- crates/core/src/error.rs | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/crates/core/src/commands/check.rs b/crates/core/src/commands/check.rs index fd0b87c6..8e417c1f 100644 --- a/crates/core/src/commands/check.rs +++ b/crates/core/src/commands/check.rs @@ -163,7 +163,10 @@ impl CheckResultsCollector { let data = be.read_full(FileType::Pack, &id).unwrap(); match self.check_pack(be, pack, data, &p) { Ok(()) => {} - Err(err) => self.add_error(CheckError::ErrorReadingPack { id, err }), + Err(err) => self.add_error(CheckError::ErrorReadingPack { + id, + err: Box::new(err), + }), } }); p.finish(); diff --git a/crates/core/src/error.rs b/crates/core/src/error.rs index c70d7fce..31c6fa50 100644 --- a/crates/core/src/error.rs +++ b/crates/core/src/error.rs @@ -802,7 +802,7 @@ where #[derive(Error, Debug, Display)] pub enum CheckError { /// error reading pack {id} : {err} - ErrorReadingPack { id: Id, err: RusticError }, + ErrorReadingPack { id: Id, err: Box }, /// cold file for hot file Type: {file_type:?}, Id: {id} does not exist NoColdFile { id: Id, file_type: FileType }, /// Type: {file_type:?}, Id: {id}: hot size: {size_hot}, actual size: {size} From 7a28eebf5f3b70dd63b6ba28932e1d4f23fd3528 Mon Sep 17 00:00:00 2001 From: Alexander Weiss Date: Tue, 28 May 2024 17:14:07 +0200 Subject: [PATCH 4/5] use own enum for error severity --- crates/core/src/commands/check.rs | 30 ++++++++++++++++++++++++------ crates/core/src/lib.rs | 4 +--- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/crates/core/src/commands/check.rs b/crates/core/src/commands/check.rs index 8e417c1f..5d961803 100644 --- a/crates/core/src/commands/check.rs +++ b/crates/core/src/commands/check.rs @@ -21,7 +21,6 @@ use crate::{ progress::{Progress, ProgressBars}, repofile::{IndexFile, IndexPack, PackHeader, PackHeaderLength, PackHeaderRef, SnapshotFile}, repository::{Open, Repository}, - LogLevel, }; #[cfg_attr(feature = "clap", derive(clap::Parser))] @@ -38,17 +37,30 @@ pub struct CheckOptions { pub read_data: bool, } +#[derive(Debug, PartialEq, Eq)] +#[non_exhaustive] +/// `CheckErrorLevel` describes severity levels of problems identified by check. +pub enum CheckErrorLevel { + /// A warning: Something is strange and should be corrected, but repository integrity is not affected. + Warn, + /// An erro: Something in the repository is not as it should be. + Error, +} + #[derive(Debug)] +/// CheckResults is a list of errors encountered during the check. pub struct CheckResults { - pub errors: Vec<(LogLevel, CheckError)>, + /// The list of errors with severity level. + pub errors: Vec<(CheckErrorLevel, CheckError)>, } impl CheckResults { + /// Returns whether severe errors have been found. pub fn is_ok(&self) -> RusticResult<()> { if self .errors .iter() - .any(|(level, _)| level == &LogLevel::Error) + .any(|(level, _)| level == &CheckErrorLevel::Error) { return Err(CommandErrorKind::CheckFoundErrors.into()); } @@ -59,7 +71,7 @@ impl CheckResults { #[derive(Default)] pub struct CheckResultsCollector { log: bool, - errors: Mutex>, + errors: Mutex>, } impl CheckResultsCollector { @@ -178,14 +190,20 @@ impl CheckResultsCollector { if self.log { error!("{err}"); } - self.errors.lock().unwrap().push((LogLevel::Error, err)); + self.errors + .lock() + .unwrap() + .push((CheckErrorLevel::Error, err)); } fn add_warn(&self, err: CheckError) { if self.log { warn!("{err}"); } - self.errors.lock().unwrap().push((LogLevel::Warn, err)); + self.errors + .lock() + .unwrap() + .push((CheckErrorLevel::Warn, err)); } /// Checks if all files in the backend are also in the hot backend diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 45b7f74c..972120da 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -119,8 +119,6 @@ pub(crate) mod repository; /// Virtual File System support - allows to act on the repository like on a file system pub mod vfs; -pub use log::Level as LogLevel; - // rustic_core Public API pub use crate::{ backend::{ @@ -134,7 +132,7 @@ pub use crate::{ blob::tree::{FindMatches, FindNode, TreeStreamerOptions as LsOptions}, commands::{ backup::{BackupOptions, ParentOptions}, - check::CheckOptions, + check::{CheckErrorLevel, CheckOptions, CheckResults}, config::ConfigOptions, copy::CopySnapshot, forget::{ForgetGroup, ForgetGroups, ForgetSnapshot, KeepOptions}, From 524642a59756d0ccd2adbddc78380e87e22c6fab Mon Sep 17 00:00:00 2001 From: Alexander Weiss Date: Tue, 28 May 2024 17:25:27 +0200 Subject: [PATCH 5/5] fix clippy lint --- crates/core/src/commands/check.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/core/src/commands/check.rs b/crates/core/src/commands/check.rs index 5d961803..b5e1ccd2 100644 --- a/crates/core/src/commands/check.rs +++ b/crates/core/src/commands/check.rs @@ -48,7 +48,7 @@ pub enum CheckErrorLevel { } #[derive(Debug)] -/// CheckResults is a list of errors encountered during the check. +/// `CheckResults` is a list of errors encountered during the check. pub struct CheckResults { /// The list of errors with severity level. pub errors: Vec<(CheckErrorLevel, CheckError)>, @@ -56,6 +56,9 @@ pub struct CheckResults { impl CheckResults { /// Returns whether severe errors have been found. + /// + /// # Errors + /// `CheckFoundErrors` if there are severe errors. pub fn is_ok(&self) -> RusticResult<()> { if self .errors