From 5b7babecd344c22dcafcc7c5550cebe6783273e2 Mon Sep 17 00:00:00 2001 From: Shaowei Song Date: Tue, 18 Jun 2024 16:55:56 +0800 Subject: [PATCH] Optimize handling BVT/BALs --- core/src/layers/4-lsm/tx_lsm_tree.rs | 10 +++++++--- core/src/layers/5-disk/block_alloc.rs | 18 ++++++++++++++++-- core/src/layers/5-disk/sworndisk.rs | 24 +++++++++++++++++------- 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/core/src/layers/4-lsm/tx_lsm_tree.rs b/core/src/layers/4-lsm/tx_lsm_tree.rs index a693c00..6cfb26d 100644 --- a/core/src/layers/4-lsm/tx_lsm_tree.rs +++ b/core/src/layers/4-lsm/tx_lsm_tree.rs @@ -407,8 +407,6 @@ impl, V: RecordValue, D: BlockSet + 'static> TreeInner self.memtable_manager.sync(master_sync_id); - self.tx_log_store.sync().unwrap(); - // TODO: Error handling: try twice or ignore self.master_sync_id.increment()?; Ok(()) @@ -767,6 +765,12 @@ impl, V: RecordValue, D: BlockSet + 'static> Debug for TreeInner } } +impl, V: RecordValue, D: BlockSet + 'static> Debug for TxLsmTree { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}", self.0) + } +} + impl LsmLevel { const LEVEL0_RATIO: u16 = 4; const LEVELI_RATIO: u16 = 10; @@ -963,7 +967,7 @@ mod tests { #[test] fn tx_lsm_tree_fns() -> Result<()> { - let nblocks = 64 * 1024; + let nblocks = 102400; let mem_disk = MemDisk::create(nblocks)?; let tx_log_store = Arc::new(TxLogStore::format(mem_disk, Key::random())?); let tx_lsm_tree: TxLsmTree = diff --git a/core/src/layers/5-disk/block_alloc.rs b/core/src/layers/5-disk/block_alloc.rs index 8674973..795ef23 100644 --- a/core/src/layers/5-disk/block_alloc.rs +++ b/core/src/layers/5-disk/block_alloc.rs @@ -8,7 +8,7 @@ use crate::util::BitMap; use core::mem::size_of; use core::num::NonZeroUsize; -use core::sync::atomic::{AtomicUsize, Ordering}; +use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use pod::Pod; use serde::{Deserialize, Serialize}; @@ -29,6 +29,7 @@ pub(super) struct AllocTable { bitmap: Mutex, next_avail: AtomicUsize, nblocks: NonZeroUsize, + is_dirty: AtomicBool, cvar: Condvar, num_free: CvarMutex, } @@ -60,6 +61,7 @@ impl AllocTable { bitmap: Mutex::new(BitMap::repeat(true, nblocks.get())), next_avail: AtomicUsize::new(0), nblocks, + is_dirty: AtomicBool::new(false), cvar: Condvar::new(), num_free: CvarMutex::new(nblocks.get()), } @@ -97,6 +99,9 @@ impl AllocTable { debug_assert_eq!(hbas.len(), cnt); *num_free -= cnt; + let _ = self + .is_dirty + .compare_exchange(false, true, Ordering::Relaxed, Ordering::Relaxed); Ok(hbas) } @@ -160,6 +165,7 @@ impl AllocTable { bitmap: Mutex::new(bitmap), next_avail: AtomicUsize::new(next_avail), nblocks, + is_dirty: AtomicBool::new(false), cvar: Condvar::new(), num_free: CvarMutex::new(num_free), }); @@ -203,6 +209,7 @@ impl AllocTable { bitmap: Mutex::new(bitmap), next_avail: AtomicUsize::new(next_avail), nblocks, + is_dirty: AtomicBool::new(false), cvar: Condvar::new(), num_free: CvarMutex::new(num_free), }) @@ -218,6 +225,10 @@ impl AllocTable { /// Persist the block validity table to `BVT` log. GC all existed `BAL` logs. pub fn do_compaction(&self, store: &Arc>) -> Result<()> { + if !self.is_dirty.load(Ordering::Relaxed) { + return Ok(()); + } + // Serialize the block validity table let bitmap = self.bitmap.lock(); const BITMAP_MAX_SIZE: usize = 1792 * BLOCK_SIZE; // TBD @@ -252,7 +263,10 @@ impl AllocTable { tx.abort(); return_errno_with_msg!(TxAborted, "persist block validity table TX aborted"); } - tx.commit() + tx.commit()?; + + self.is_dirty.store(false, Ordering::Relaxed); + Ok(()) } /// Mark a specific slot deallocated. diff --git a/core/src/layers/5-disk/sworndisk.rs b/core/src/layers/5-disk/sworndisk.rs index 3322f66..128d911 100644 --- a/core/src/layers/5-disk/sworndisk.rs +++ b/core/src/layers/5-disk/sworndisk.rs @@ -91,10 +91,11 @@ impl SwornDisk { /// Sync all cached data in the device to the storage medium for durability. pub fn sync(&self) -> Result<()> { let _wguard = self.inner.write_sync_region.write(); - self.inner.sync()?; + // TODO: Error handling the sync operation + self.inner.sync().unwrap(); #[cfg(not(feature = "linux"))] - debug!("[SwornDisk] Sync completed"); + trace!("[SwornDisk] Sync completed. {self:?}"); Ok(()) } @@ -150,7 +151,7 @@ impl SwornDisk { }; #[cfg(not(feature = "linux"))] - debug!("[SwornDisk] Created successfully!"); + info!("[SwornDisk] Created successfully! {:?}", &new_self); // XXX: Would `disk::drop()` bring unexpected behavior? Ok(new_self) } @@ -203,7 +204,7 @@ impl SwornDisk { }; #[cfg(not(feature = "linux"))] - debug!("[SwornDisk] Opened successfully!"); + info!("[SwornDisk] Opened successfully! {:?}", &opened_self); Ok(opened_self) } @@ -450,13 +451,13 @@ impl DiskInner { self.logical_block_table.sync()?; - self.user_data_disk.flush()?; - // XXX: May impact performance when there comes frequent syncs self.block_validity_table .do_compaction(&self.tx_log_store)?; - Ok(()) + self.tx_log_store.sync()?; + + self.user_data_disk.flush() } /// Handle one block I/O request. Mark the request completed when finished, @@ -524,6 +525,15 @@ impl Drop for SwornDisk { } } +impl Debug for SwornDisk { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SwornDisk") + .field("user_data_nblocks", &self.inner.user_data_disk.nblocks()) + .field("logical_block_table", &self.inner.logical_block_table) + .finish() + } +} + /// A wrapper for `[BufMut]` used in `readv()`. struct BufMutVec<'a> { bufs: &'a mut [BufMut<'a>],