Skip to content

Commit

Permalink
Add asynchronous I/O supports for page cache
Browse files Browse the repository at this point in the history
  • Loading branch information
Yingdi Shan authored and tatetian committed Mar 27, 2024
1 parent 223e8c2 commit 679a8d8
Show file tree
Hide file tree
Showing 7 changed files with 122 additions and 55 deletions.
21 changes: 11 additions & 10 deletions kernel/aster-nix/src/fs/exfat/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use core::{num::NonZeroUsize, ops::Range, sync::atomic::AtomicU64};

use aster_block::{id::BlockId, BlockDevice};
use aster_block::{bio::BioWaiter, id::BlockId, BlockDevice};
use aster_frame::vm::VmFrame;
pub(super) use aster_frame::vm::VmIo;
use hashbrown::HashMap;
Expand Down Expand Up @@ -361,23 +361,24 @@ impl ExfatFS {
}

impl PageCacheBackend for ExfatFS {
fn read_page(&self, idx: usize, frame: &VmFrame) -> Result<()> {
fn read_page(&self, idx: usize, frame: &VmFrame) -> Result<BioWaiter> {
if self.fs_size() < idx * PAGE_SIZE {
return_errno_with_message!(Errno::EINVAL, "invalid read size")
}
self.block_device
.read_block_sync(BlockId::new(idx as u64), frame)?;
Ok(())
let waiter = self
.block_device
.read_block(BlockId::new(idx as u64), frame)?;
Ok(waiter)
}

// What if block_size is not equal to page size?
fn write_page(&self, idx: usize, frame: &VmFrame) -> Result<()> {
fn write_page(&self, idx: usize, frame: &VmFrame) -> Result<BioWaiter> {
if self.fs_size() < idx * PAGE_SIZE {
return_errno_with_message!(Errno::EINVAL, "invalid write size")
}
self.block_device
.write_block_sync(BlockId::new(idx as u64), frame)?;
Ok(())
let waiter = self
.block_device
.write_block(BlockId::new(idx as u64), frame)?;
Ok(waiter)
}

fn npages(&self) -> usize {
Expand Down
13 changes: 7 additions & 6 deletions kernel/aster-nix/src/fs/exfat/inode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use core::{cmp::Ordering, time::Duration};

pub(super) use align_ext::AlignExt;
use aster_block::{
bio::BioWaiter,
id::{Bid, BlockId},
BLOCK_SIZE,
};
Expand Down Expand Up @@ -131,32 +132,32 @@ struct ExfatInodeInner {
}

impl PageCacheBackend for ExfatInode {
fn read_page(&self, idx: usize, frame: &VmFrame) -> Result<()> {
fn read_page(&self, idx: usize, frame: &VmFrame) -> Result<BioWaiter> {
let inner = self.inner.read();
if inner.size < idx * PAGE_SIZE {
return_errno_with_message!(Errno::EINVAL, "Invalid read size")
}
let sector_id = inner.get_sector_id(idx * PAGE_SIZE / inner.fs().sector_size())?;
inner.fs().block_device().read_block_sync(
let waiter = inner.fs().block_device().read_block(
BlockId::from_offset(sector_id * inner.fs().sector_size()),
frame,
)?;
Ok(())
Ok(waiter)
}

fn write_page(&self, idx: usize, frame: &VmFrame) -> Result<()> {
fn write_page(&self, idx: usize, frame: &VmFrame) -> Result<BioWaiter> {
let inner = self.inner.read();
let sector_size = inner.fs().sector_size();

let sector_id = inner.get_sector_id(idx * PAGE_SIZE / inner.fs().sector_size())?;

// FIXME: We may need to truncate the file if write_page fails.
// To fix this issue, we need to change the interface of the PageCacheBackend trait.
inner.fs().block_device().write_block_sync(
let waiter = inner.fs().block_device().write_block(
BlockId::from_offset(sector_id * inner.fs().sector_size()),
frame,
)?;
Ok(())
Ok(waiter)
}

fn npages(&self) -> usize {
Expand Down
10 changes: 4 additions & 6 deletions kernel/aster-nix/src/fs/ext2/block_group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -306,16 +306,14 @@ impl Debug for BlockGroup {
}

impl PageCacheBackend for BlockGroupImpl {
fn read_page(&self, idx: usize, frame: &VmFrame) -> Result<()> {
fn read_page(&self, idx: usize, frame: &VmFrame) -> Result<BioWaiter> {
let bid = self.inode_table_bid + idx as u64;
self.fs.upgrade().unwrap().read_block(bid, frame)?;
Ok(())
self.fs.upgrade().unwrap().read_block_async(bid, frame)
}

fn write_page(&self, idx: usize, frame: &VmFrame) -> Result<()> {
fn write_page(&self, idx: usize, frame: &VmFrame) -> Result<BioWaiter> {
let bid = self.inode_table_bid + idx as u64;
self.fs.upgrade().unwrap().write_block(bid, frame)?;
Ok(())
self.fs.upgrade().unwrap().write_block_async(bid, frame)
}

fn npages(&self) -> usize {
Expand Down
12 changes: 12 additions & 0 deletions kernel/aster-nix/src/fs/ext2/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,12 @@ impl Ext2 {
}
}

/// Reads one block indicated by the `bid` asynchronously.
pub(super) fn read_block_async(&self, bid: Bid, frame: &VmFrame) -> Result<BioWaiter> {
let waiter = self.block_device.read_block(bid, frame)?;
Ok(waiter)
}

/// Writes contiguous blocks starting from the `bid` synchronously.
pub(super) fn write_blocks(&self, bid: Bid, segment: &VmSegment) -> Result<()> {
let status = self.block_device.write_blocks_sync(bid, segment)?;
Expand All @@ -281,6 +287,12 @@ impl Ext2 {
}
}

/// Writes one block indicated by the `bid` asynchronously.
pub(super) fn write_block_async(&self, bid: Bid, frame: &VmFrame) -> Result<BioWaiter> {
let waiter = self.block_device.write_block(bid, frame)?;
Ok(waiter)
}

/// Writes back the metadata to the block device.
pub fn sync_metadata(&self) -> Result<()> {
// If the superblock is clean, the block groups must be clean.
Expand Down
64 changes: 43 additions & 21 deletions kernel/aster-nix/src/fs/ext2/inode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ impl Inner {
let mut buf_offset = 0;
for bid in Bid::from_offset(offset)..Bid::from_offset(offset + read_len) {
let frame = VmAllocOptions::new(1).uninit(true).alloc_single().unwrap();
self.inode_impl.read_block(bid, &frame)?;
self.inode_impl.read_block_sync(bid, &frame)?;
frame.read_bytes(0, &mut buf[buf_offset..buf_offset + BLOCK_SIZE])?;
buf_offset += BLOCK_SIZE;
}
Expand Down Expand Up @@ -710,7 +710,7 @@ impl Inner {
frame.write_bytes(0, &buf[buf_offset..buf_offset + BLOCK_SIZE])?;
frame
};
self.inode_impl.write_block(bid, &frame)?;
self.inode_impl.write_block_sync(bid, &frame)?;
buf_offset += BLOCK_SIZE;
}

Expand Down Expand Up @@ -844,7 +844,7 @@ impl InodeImpl_ {
self.weak_self.upgrade().unwrap()
}

pub fn read_block(&self, bid: Bid, block: &VmFrame) -> Result<()> {
pub fn read_block_async(&self, bid: Bid, block: &VmFrame) -> Result<BioWaiter> {
let bid = bid.to_raw() as u32;
if bid >= self.desc.blocks_count() {
return_errno!(Errno::EINVAL);
Expand All @@ -853,23 +853,35 @@ impl InodeImpl_ {
debug_assert!(field::DIRECT.contains(&(bid as usize)));
if self.blocks_hole_desc.is_hole(bid as usize) {
block.writer().fill(0);
return Ok(());
return Ok(BioWaiter::new());
}
let device_bid = Bid::new(self.desc.data[bid as usize] as _);
self.inode().fs().read_block(device_bid, block)?;
Ok(())
self.inode().fs().read_block_async(device_bid, block)
}

pub fn read_block_sync(&self, bid: Bid, block: &VmFrame) -> Result<()> {
match self.read_block_async(bid, block)?.wait() {
Some(BioStatus::Complete) => Ok(()),
_ => return_errno!(Errno::EIO),
}
}

pub fn write_block(&self, bid: Bid, block: &VmFrame) -> Result<()> {
pub fn write_block_async(&self, bid: Bid, block: &VmFrame) -> Result<BioWaiter> {
let bid = bid.to_raw() as u32;
if bid >= self.desc.blocks_count() {
return_errno!(Errno::EINVAL);
}

debug_assert!(field::DIRECT.contains(&(bid as usize)));
let device_bid = Bid::new(self.desc.data[bid as usize] as _);
self.inode().fs().write_block(device_bid, block)?;
Ok(())
self.inode().fs().write_block_async(device_bid, block)
}

pub fn write_block_sync(&self, bid: Bid, block: &VmFrame) -> Result<()> {
match self.write_block_async(bid, block)?.wait() {
Some(BioStatus::Complete) => Ok(()),
_ => return_errno!(Errno::EIO),
}
}

pub fn resize(&mut self, new_size: usize) -> Result<()> {
Expand Down Expand Up @@ -1006,13 +1018,25 @@ impl InodeImpl {
self.0.read().desc.ctime
}

pub fn read_block(&self, bid: Bid, block: &VmFrame) -> Result<()> {
self.0.read().read_block(bid, block)
pub fn read_block_sync(&self, bid: Bid, block: &VmFrame) -> Result<()> {
self.0.read().read_block_sync(bid, block)
}

pub fn write_block(&self, bid: Bid, block: &VmFrame) -> Result<()> {
pub fn read_block_async(&self, bid: Bid, block: &VmFrame) -> Result<BioWaiter> {
self.0.read().read_block_async(bid, block)
}

pub fn write_block_sync(&self, bid: Bid, block: &VmFrame) -> Result<()> {
let waiter = self.write_block_async(bid, block)?;
match waiter.wait() {
Some(BioStatus::Complete) => Ok(()),
_ => return_errno!(Errno::EIO),
}
}

pub fn write_block_async(&self, bid: Bid, block: &VmFrame) -> Result<BioWaiter> {
let inner = self.0.read();
inner.write_block(bid, block)?;
let waiter = inner.write_block_async(bid, block)?;

let bid = bid.to_raw() as usize;
if inner.blocks_hole_desc.is_hole(bid) {
Expand All @@ -1022,7 +1046,7 @@ impl InodeImpl {
inner.blocks_hole_desc.unset(bid);
}
}
Ok(())
Ok(waiter)
}

pub fn set_device_id(&self, device_id: u64) {
Expand Down Expand Up @@ -1059,7 +1083,7 @@ impl InodeImpl {
let zero_frame = VmAllocOptions::new(1).alloc_single().unwrap();
for bid in 0..inner.desc.blocks_count() {
if inner.blocks_hole_desc.is_hole(bid as usize) {
inner.write_block(Bid::new(bid as _), &zero_frame)?;
inner.write_block_sync(Bid::new(bid as _), &zero_frame)?;
inner.blocks_hole_desc.unset(bid as usize);
}
}
Expand Down Expand Up @@ -1095,16 +1119,14 @@ impl InodeImpl {
}

impl PageCacheBackend for InodeImpl {
fn read_page(&self, idx: usize, frame: &VmFrame) -> Result<()> {
fn read_page(&self, idx: usize, frame: &VmFrame) -> Result<BioWaiter> {
let bid = Bid::new(idx as _);
self.read_block(bid, frame)?;
Ok(())
self.read_block_async(bid, frame)
}

fn write_page(&self, idx: usize, frame: &VmFrame) -> Result<()> {
fn write_page(&self, idx: usize, frame: &VmFrame) -> Result<BioWaiter> {
let bid = Bid::new(idx as _);
self.write_block(bid, frame)?;
Ok(())
self.write_block_async(bid, frame)
}

fn npages(&self) -> usize {
Expand Down
9 changes: 5 additions & 4 deletions kernel/aster-nix/src/fs/ramfs/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use core::{
time::Duration,
};

use aster_block::bio::BioWaiter;
use aster_frame::{
sync::RwLockWriteGuard,
vm::{VmFrame, VmIo},
Expand Down Expand Up @@ -420,15 +421,15 @@ impl RamInode {
}

impl PageCacheBackend for RamInode {
fn read_page(&self, _idx: usize, frame: &VmFrame) -> Result<()> {
fn read_page(&self, _idx: usize, frame: &VmFrame) -> Result<BioWaiter> {
// Initially, any block/page in a RamFs inode contains all zeros
frame.writer().fill(0);
Ok(())
Ok(BioWaiter::new())
}

fn write_page(&self, _idx: usize, _frame: &VmFrame) -> Result<()> {
fn write_page(&self, _idx: usize, _frame: &VmFrame) -> Result<BioWaiter> {
// do nothing
Ok(())
Ok(BioWaiter::new())
}

fn npages(&self) -> usize {
Expand Down
48 changes: 40 additions & 8 deletions kernel/aster-nix/src/fs/utils/page_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

use core::ops::Range;

use aster_block::bio::{BioStatus, BioWaiter};
use aster_frame::vm::{VmAllocOptions, VmFrame};
use aster_rights::Full;
use lru::LruCache;
Expand Down Expand Up @@ -98,18 +99,30 @@ impl PageCacheManager {
pub fn evict_range(&self, range: Range<usize>) -> Result<()> {
let page_idx_range = get_page_idx_range(&range);
let mut pages = self.pages.lock();

//TODO: When there are many pages, we should submit them in batches of folios rather than all at once.
let mut indices_and_waiters: Vec<(usize, BioWaiter)> = Vec::new();

for idx in page_idx_range {
if let Some(page) = pages.get_mut(&idx) {
if let PageState::Dirty = page.state() {
let backend = self.backend();
if idx < backend.npages() {
backend.write_page(idx, page.frame())?;
page.set_state(PageState::UpToDate);
indices_and_waiters.push((idx, backend.write_page(idx, page.frame())?));
}
}
}
}

for (idx, waiter) in indices_and_waiters.iter() {
if matches!(waiter.wait(), Some(BioStatus::Complete)) {
pages.get_mut(idx).unwrap().set_state(PageState::UpToDate)
} else {
// TODO: We may need an error handler here.
return_errno!(Errno::EIO)
}
}

Ok(())
}
}
Expand All @@ -131,7 +144,7 @@ impl Pager for PageCacheManager {
let backend = self.backend();
let page = if idx < backend.npages() {
let mut page = Page::alloc()?;
backend.read_page(idx, page.frame())?;
backend.read_page_sync(idx, page.frame())?;
page.set_state(PageState::UpToDate);
page
} else {
Expand Down Expand Up @@ -164,7 +177,7 @@ impl Pager for PageCacheManager {
return Ok(());
};
if idx < backend.npages() {
backend.write_page(idx, page.frame())?;
backend.write_page_sync(idx, page.frame())?;
}
}
}
Expand Down Expand Up @@ -224,10 +237,29 @@ enum PageState {

/// This trait represents the backend for the page cache.
pub trait PageCacheBackend: Sync + Send {
/// Reads a page from the backend.
fn read_page(&self, idx: usize, frame: &VmFrame) -> Result<()>;
/// Writes a page to the backend.
fn write_page(&self, idx: usize, frame: &VmFrame) -> Result<()>;
/// Reads a page from the backend asynchronously.
fn read_page(&self, idx: usize, frame: &VmFrame) -> Result<BioWaiter>;
/// Writes a page to the backend asynchronously.
fn write_page(&self, idx: usize, frame: &VmFrame) -> Result<BioWaiter>;
/// Returns the number of pages in the backend.
fn npages(&self) -> usize;
}

impl dyn PageCacheBackend {
/// Reads a page from the backend synchronously.
fn read_page_sync(&self, idx: usize, frame: &VmFrame) -> Result<()> {
let waiter = self.read_page(idx, frame)?;
match waiter.wait() {
Some(BioStatus::Complete) => Ok(()),
_ => return_errno!(Errno::EIO),
}
}
/// Writes a page to the backend synchronously.
fn write_page_sync(&self, idx: usize, frame: &VmFrame) -> Result<()> {
let waiter = self.write_page(idx, frame)?;
match waiter.wait() {
Some(BioStatus::Complete) => Ok(()),
_ => return_errno!(Errno::EIO),
}
}
}

0 comments on commit 679a8d8

Please sign in to comment.