-
Notifications
You must be signed in to change notification settings - Fork 88
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Return error instead of panicking if rewriting fails #343
Changes from 13 commits
3a75b78
77f8beb
34db5d0
452d57e
a20cd43
43b25ca
6fcb077
8cc474d
1fd5416
c606f51
61fbdb6
862fe0b
0d2924b
a59609a
0554cd1
2c81285
bd2c3b4
005418f
2c8d59a
395d530
cee2d8f
8c2eb45
445fd1e
3106c04
bb27b29
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -72,9 +72,7 @@ pub(super) struct SinglePipe<F: FileSystem> { | |
impl<F: FileSystem> Drop for SinglePipe<F> { | ||
fn drop(&mut self) { | ||
let mut writable_file = self.writable_file.lock(); | ||
if let Err(e) = writable_file.writer.close() { | ||
error!("error while closing the active writer: {e}"); | ||
} | ||
writable_file.writer.close(); | ||
let mut recycled_files = self.recycled_files.write(); | ||
let mut next_reserved_seq = recycled_files | ||
.iter() | ||
|
@@ -248,7 +246,7 @@ impl<F: FileSystem> SinglePipe<F> { | |
let new_seq = writable_file.seq + 1; | ||
debug_assert!(new_seq > DEFAULT_FIRST_FILE_SEQ); | ||
|
||
writable_file.writer.close()?; | ||
writable_file.writer.close(); | ||
|
||
let (path_id, handle) = self | ||
.recycle_file(new_seq) | ||
|
@@ -272,7 +270,7 @@ impl<F: FileSystem> SinglePipe<F> { | |
}; | ||
// File header must be persisted. This way we can recover gracefully if power | ||
// loss before a new entry is written. | ||
new_file.writer.sync()?; | ||
new_file.writer.sync(); | ||
self.sync_dir(path_id)?; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This error needs to be handled carefully now. (e.g. remove the newly created file and make sure the old writer is okay to write again) Better just unwrap it as well. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Made But There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably.. I suggest add a few restart in test_file_rotate_error. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a few more verifications in test_file_rotate_error test, should be able to address your concern? PTAL |
||
|
||
**writable_file = new_file; | ||
|
@@ -321,12 +319,7 @@ impl<F: FileSystem> SinglePipe<F> { | |
fail_point!("file_pipe_log::append"); | ||
let mut writable_file = self.writable_file.lock(); | ||
if writable_file.writer.offset() >= self.target_file_size { | ||
if let Err(e) = self.rotate_imp(&mut writable_file) { | ||
panic!( | ||
"error when rotate [{:?}:{}]: {e}", | ||
self.queue, writable_file.seq, | ||
); | ||
} | ||
self.rotate_imp(&mut writable_file)?; | ||
v01dstar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
let seq = writable_file.seq; | ||
|
@@ -359,9 +352,7 @@ impl<F: FileSystem> SinglePipe<F> { | |
} | ||
let start_offset = writer.offset(); | ||
if let Err(e) = writer.write(bytes.as_bytes(&ctx), self.target_file_size) { | ||
if let Err(te) = writer.truncate() { | ||
panic!("error when truncate {seq} after error: {e}, get: {}", te); | ||
} | ||
writer.truncate(); | ||
if is_no_space_err(&e) { | ||
// TODO: There exists several corner cases should be tackled if | ||
// `bytes.len()` > `target_file_size`. For example, | ||
|
@@ -372,12 +363,7 @@ impl<F: FileSystem> SinglePipe<F> { | |
// - [3] Both main-dir and spill-dir have several recycled logs. | ||
// But as `bytes.len()` is always smaller than `target_file_size` in common | ||
// cases, this issue will be ignored temprorarily. | ||
if let Err(e) = self.rotate_imp(&mut writable_file) { | ||
panic!( | ||
"error when rotate [{:?}:{}]: {e}", | ||
self.queue, writable_file.seq | ||
); | ||
} | ||
self.rotate_imp(&mut writable_file)?; | ||
v01dstar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// If there still exists free space for this record, rotate the file | ||
// and return a special TryAgain Err (for retry) to the caller. | ||
return Err(Error::TryAgain(format!( | ||
|
@@ -401,18 +387,11 @@ impl<F: FileSystem> SinglePipe<F> { | |
Ok(handle) | ||
} | ||
|
||
fn sync(&self) -> Result<()> { | ||
fn sync(&self) { | ||
let mut writable_file = self.writable_file.lock(); | ||
let seq = writable_file.seq; | ||
let writer = &mut writable_file.writer; | ||
{ | ||
let _t = StopWatch::new(perf_context!(log_sync_duration)); | ||
if let Err(e) = writer.sync() { | ||
panic!("error when sync [{:?}:{seq}]: {e}", self.queue); | ||
} | ||
} | ||
|
||
Ok(()) | ||
let _t = StopWatch::new(perf_context!(log_sync_duration)); | ||
writer.sync(); | ||
} | ||
|
||
fn file_span(&self) -> (FileSeq, FileSeq) { | ||
|
@@ -520,8 +499,8 @@ impl<F: FileSystem> PipeLog for DualPipes<F> { | |
} | ||
|
||
#[inline] | ||
fn sync(&self, queue: LogQueue) -> Result<()> { | ||
self.pipes[queue as usize].sync() | ||
fn sync(&self, queue: LogQueue) { | ||
self.pipes[queue as usize].sync(); | ||
} | ||
|
||
#[inline] | ||
|
@@ -716,7 +695,7 @@ mod tests { | |
let mut handles = Vec::new(); | ||
for i in 0..10 { | ||
handles.push(pipe_log.append(&mut &content(i)).unwrap()); | ||
pipe_log.sync().unwrap(); | ||
pipe_log.sync(); | ||
} | ||
pipe_log.rotate().unwrap(); | ||
let (first, last) = pipe_log.file_span(); | ||
|
@@ -733,7 +712,7 @@ mod tests { | |
let mut handles = Vec::new(); | ||
for i in 0..10 { | ||
handles.push(pipe_log.append(&mut &content(i + 1)).unwrap()); | ||
pipe_log.sync().unwrap(); | ||
pipe_log.sync(); | ||
} | ||
// Verify the data. | ||
for (i, handle) in handles.into_iter().enumerate() { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -273,7 +273,7 @@ where | |
// Rewrites the entire rewrite queue into new log files. | ||
fn rewrite_rewrite_queue(&self) -> Result<Vec<u64>> { | ||
let _t = StopWatch::new(&*ENGINE_REWRITE_REWRITE_DURATION_HISTOGRAM); | ||
self.pipe_log.rotate(LogQueue::Rewrite)?; | ||
self.pipe_log.rotate(LogQueue::Rewrite).unwrap(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why unwrap this? |
||
|
||
let mut force_compact_regions = vec![]; | ||
let memtables = self.memtables.collect(|t| { | ||
|
@@ -430,7 +430,7 @@ where | |
) -> Result<Option<FileBlockHandle>> { | ||
if log_batch.is_empty() { | ||
debug_assert!(sync); | ||
self.pipe_log.sync(LogQueue::Rewrite)?; | ||
self.pipe_log.sync(LogQueue::Rewrite); | ||
return Ok(None); | ||
} | ||
log_batch.finish_populate( | ||
|
@@ -439,7 +439,7 @@ where | |
)?; | ||
let file_handle = self.pipe_log.append(LogQueue::Rewrite, log_batch)?; | ||
if sync { | ||
self.pipe_log.sync(LogQueue::Rewrite)? | ||
self.pipe_log.sync(LogQueue::Rewrite); | ||
} | ||
log_batch.finish_write(file_handle); | ||
self.memtables.apply_rewrite_writes( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -165,19 +165,17 @@ fn test_file_rotate_error() { | |
{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make two versions of this test: // case 1
if restart {
let engine = Engine::open_with_file_system(cfg.clone(), fs.clone()).unwrap();
}
// case 2
// ... |
||
// Fail to create new log file. | ||
let _f = FailGuard::new("default_fs::create::err", "return"); | ||
assert!(catch_unwind_silent(|| { | ||
let _ = engine.write(&mut generate_batch(1, 4, 5, Some(&entry)), false); | ||
}) | ||
.is_err()); | ||
assert!(engine | ||
.write(&mut generate_batch(1, 4, 5, Some(&entry)), false) | ||
.is_err()); | ||
assert_eq!(engine.file_span(LogQueue::Append).1, 1); | ||
} | ||
{ | ||
// Fail to write header of new log file. | ||
let _f = FailGuard::new("log_file::write::err", "1*off->return"); | ||
assert!(catch_unwind_silent(|| { | ||
let _ = engine.write(&mut generate_batch(1, 4, 5, Some(&entry)), false); | ||
}) | ||
.is_err()); | ||
assert!(engine | ||
.write(&mut generate_batch(1, 4, 5, Some(&entry)), false) | ||
.is_err()); | ||
assert_eq!(engine.file_span(LogQueue::Append).1, 1); | ||
} | ||
{ | ||
|
@@ -527,20 +525,17 @@ fn test_no_space_write_error() { | |
cfg.dir = dir.path().to_str().unwrap().to_owned(); | ||
cfg.spill_dir = Some(spill_dir.path().to_str().unwrap().to_owned()); | ||
{ | ||
// Case 1: `Write` is abnormal for no space left, Engine should panic at | ||
// Case 1: `Write` is abnormal for no space left, Engine should fail at | ||
// `rotate`. | ||
let cfg_err = Config { | ||
target_file_size: ReadableSize(1), | ||
..cfg.clone() | ||
}; | ||
let engine = Engine::open(cfg_err).unwrap(); | ||
let _f = FailGuard::new("log_fd::write::no_space_err", "return"); | ||
assert!(catch_unwind_silent(|| { | ||
engine | ||
.write(&mut generate_batch(2, 11, 21, Some(&entry)), true) | ||
.unwrap_err(); | ||
}) | ||
.is_err()); | ||
assert!(engine | ||
.write(&mut generate_batch(2, 11, 21, Some(&entry)), true) | ||
.is_err()); | ||
assert_eq!( | ||
0, | ||
engine | ||
|
@@ -554,12 +549,9 @@ fn test_no_space_write_error() { | |
let _f1 = FailGuard::new("log_fd::write::no_space_err", "2*return->off"); | ||
let _f2 = FailGuard::new("file_pipe_log::force_choose_dir", "return"); | ||
// The first write should fail, because all dirs run out of space for writing. | ||
assert!(catch_unwind_silent(|| { | ||
engine | ||
.write(&mut generate_batch(2, 11, 21, Some(&entry)), true) | ||
.unwrap_err(); | ||
}) | ||
.is_err()); | ||
assert!(engine | ||
.write(&mut generate_batch(2, 11, 21, Some(&entry)), true) | ||
.is_err()); | ||
assert_eq!( | ||
0, | ||
engine | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a comment to this struct stating it should be fail-safe, i.e. user can still use the writer without breaking data consistency if any operation has failed.