From 268d5b8784ce9f30e9c0b70dc6201107a0d5b8f1 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 31 Oct 2023 18:11:55 +0800 Subject: [PATCH] feat: Implement Lazy Reader (#3395) Signed-off-by: Xuanwo --- .github/actions/setup/action.yaml | 2 +- .github/workflows/behavior_test.yml | 1 + .typos.toml | 2 +- bin/oay/Cargo.toml | 7 +- bindings/c/Cargo.toml | 2 +- bindings/cpp/Cargo.toml | 6 +- bindings/dotnet/Cargo.toml | 1 - bindings/haskell/Cargo.toml | 2 +- bindings/java/Cargo.toml | 40 +- bindings/python/Cargo.toml | 39 +- core/Cargo.toml | 2 +- core/edge/file_write_on_full_disk/Cargo.toml | 6 +- core/fuzz/Cargo.toml | 2 +- core/src/layers/complete.rs | 104 +-- core/src/layers/logging.rs | 36 +- core/src/layers/madsim.rs | 9 +- core/src/layers/prometheus.rs | 38 +- core/src/layers/retry.rs | 2 +- core/src/raw/adapters/kv/backend.rs | 5 +- core/src/raw/adapters/typed_kv/backend.rs | 5 +- core/src/raw/http_util/body.rs | 16 +- core/src/raw/mod.rs | 3 + core/src/raw/oio/buf/adaptive.rs | 146 ++++ core/src/raw/oio/buf/mod.rs | 3 + core/src/raw/oio/read/api.rs | 128 ++- core/src/raw/oio/read/cloneable_read.rs | 140 --- core/src/raw/oio/read/file_read.rs | 539 ++++++++++++ core/src/raw/oio/read/futures_read.rs | 67 ++ core/src/raw/oio/read/into_read_from_file.rs | 192 ----- .../oio/read/into_seekable_read_by_range.rs | 569 ------------- core/src/raw/oio/read/lazy_read.rs | 198 +++++ core/src/raw/oio/read/mod.rs | 26 +- core/src/raw/oio/read/range_read.rs | 795 ++++++++++++++++++ core/src/raw/oio/read/std_read.rs | 62 ++ core/src/raw/oio/read/tokio_read.rs | 88 ++ core/src/raw/ops.rs | 17 + core/src/raw/rps.rs | 38 +- .../hdfs/error.rs => raw/std_io_util.rs} | 21 +- core/src/services/azblob/backend.rs | 6 +- core/src/services/azdls/backend.rs | 5 +- core/src/services/azfile/backend.rs | 5 +- core/src/services/cos/backend.rs | 5 +- core/src/services/dbfs/backend.rs | 25 +- core/src/services/dropbox/backend.rs | 6 +- core/src/services/fs/backend.rs | 162 ++-- core/src/services/fs/error.rs | 41 - core/src/services/fs/mod.rs | 1 - core/src/services/fs/pager.rs | 9 +- core/src/services/fs/writer.rs | 17 +- core/src/services/ftp/backend.rs | 14 +- core/src/services/gcs/backend.rs | 6 +- core/src/services/gdrive/backend.rs | 17 +- core/src/services/ghac/backend.rs | 5 +- core/src/services/hdfs/backend.rs | 97 +-- core/src/services/hdfs/mod.rs | 1 - core/src/services/hdfs/writer.rs | 11 +- core/src/services/http/backend.rs | 5 +- core/src/services/ipfs/backend.rs | 5 +- core/src/services/ipmfs/backend.rs | 5 +- core/src/services/memcached/ascii.rs | 26 +- core/src/services/memcached/backend.rs | 6 +- core/src/services/obs/backend.rs | 5 +- core/src/services/onedrive/backend.rs | 5 +- core/src/services/oss/backend.rs | 5 +- core/src/services/s3/backend.rs | 5 +- core/src/services/sftp/backend.rs | 45 +- core/src/services/sftp/error.rs | 1 + core/src/services/sftp/utils.rs | 73 -- core/src/services/sftp/writer.rs | 9 +- core/src/services/supabase/backend.rs | 5 +- core/src/services/vercel_artifacts/backend.rs | 5 +- core/src/services/wasabi/backend.rs | 5 +- core/src/services/webdav/backend.rs | 5 +- core/src/services/webhdfs/backend.rs | 17 +- core/src/services/webhdfs/error.rs | 2 +- core/src/types/operator/blocking_operator.rs | 23 +- core/src/types/operator/operator.rs | 33 +- core/tests/behavior/fuzz.rs | 85 +- integrations/dav-server/Cargo.toml | 5 +- 79 files changed, 2537 insertions(+), 1635 deletions(-) create mode 100644 core/src/raw/oio/buf/adaptive.rs delete mode 100644 core/src/raw/oio/read/cloneable_read.rs create mode 100644 core/src/raw/oio/read/file_read.rs create mode 100644 core/src/raw/oio/read/futures_read.rs delete mode 100644 core/src/raw/oio/read/into_read_from_file.rs delete mode 100644 core/src/raw/oio/read/into_seekable_read_by_range.rs create mode 100644 core/src/raw/oio/read/lazy_read.rs create mode 100644 core/src/raw/oio/read/range_read.rs create mode 100644 core/src/raw/oio/read/std_read.rs create mode 100644 core/src/raw/oio/read/tokio_read.rs rename core/src/{services/hdfs/error.rs => raw/std_io_util.rs} (73%) delete mode 100644 core/src/services/fs/error.rs diff --git a/.github/actions/setup/action.yaml b/.github/actions/setup/action.yaml index d217c3fc73e7..bde6031d41dc 100644 --- a/.github/actions/setup/action.yaml +++ b/.github/actions/setup/action.yaml @@ -42,7 +42,7 @@ runs: # Enable backtraces echo "RUST_BACKTRACE=1" >> $GITHUB_ENV # Enable logging - echo "RUST_LOG=opendal=debug" >> $GITHUB_ENV + echo "RUST_LOG=opendal=trace" >> $GITHUB_ENV # Enable sparse index echo "CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse" >> $GITHUB_ENV diff --git a/.github/workflows/behavior_test.yml b/.github/workflows/behavior_test.yml index 861a7f981900..d1cb64ab7ae3 100644 --- a/.github/workflows/behavior_test.yml +++ b/.github/workflows/behavior_test.yml @@ -102,6 +102,7 @@ jobs: with: os: ${{ matrix.os }} cases: ${{ toJson(matrix.cases) }} + test_binding_python: name: binding_python / ${{ matrix.os }} needs: [plan] diff --git a/.typos.toml b/.typos.toml index 5b2bca99b5a6..df205aecc397 100644 --- a/.typos.toml +++ b/.typos.toml @@ -18,8 +18,8 @@ [default.extend-words] # Random strings. "Dum" = "Dum" -"ba" = "ba" "Hel" = "Hel" +"ba" = "ba" "hellow" = "hellow" # Showed up in examples. "thw" = "thw" diff --git a/bin/oay/Cargo.toml b/bin/oay/Cargo.toml index 87c07a4648af..5a907dfcee60 100644 --- a/bin/oay/Cargo.toml +++ b/bin/oay/Cargo.toml @@ -33,7 +33,12 @@ version.workspace = true default = ["frontends-webdav", "frontends-s3"] frontends-s3 = [] -frontends-webdav = ["dep:dav-server", "dep:dav-server-opendalfs", "dep:bytes", "dep:futures-util"] +frontends-webdav = [ + "dep:dav-server", + "dep:dav-server-opendalfs", + "dep:bytes", + "dep:futures-util", +] [dependencies] anyhow = "1" diff --git a/bindings/c/Cargo.toml b/bindings/c/Cargo.toml index a0f31bc75f9b..86532f8acd4d 100644 --- a/bindings/c/Cargo.toml +++ b/bindings/c/Cargo.toml @@ -36,6 +36,6 @@ cbindgen = "0.25.0" [dependencies] bytes = "1.4.0" +once_cell = "1.17.1" opendal.workspace = true tokio = { version = "1.27", features = ["fs", "macros", "rt-multi-thread"] } -once_cell = "1.17.1" diff --git a/bindings/cpp/Cargo.toml b/bindings/cpp/Cargo.toml index 058de3a2078f..40e90680eecb 100644 --- a/bindings/cpp/Cargo.toml +++ b/bindings/cpp/Cargo.toml @@ -24,17 +24,17 @@ edition.workspace = true homepage.workspace = true license.workspace = true repository.workspace = true -version.workspace = true rust-version.workspace = true +version.workspace = true [lib] crate-type = ["staticlib"] [dependencies] -opendal.workspace = true -cxx = "1.0" anyhow = "1.0" chrono = "0.4" +cxx = "1.0" +opendal.workspace = true [build-dependencies] cxx-build = "1.0" diff --git a/bindings/dotnet/Cargo.toml b/bindings/dotnet/Cargo.toml index 11a6a3c5a250..e6a320bc463f 100644 --- a/bindings/dotnet/Cargo.toml +++ b/bindings/dotnet/Cargo.toml @@ -27,7 +27,6 @@ license.workspace = true repository.workspace = true rust-version.workspace = true - [lib] crate-type = ["cdylib"] doc = false diff --git a/bindings/haskell/Cargo.toml b/bindings/haskell/Cargo.toml index 637ee3d8e196..5c1021f076e7 100644 --- a/bindings/haskell/Cargo.toml +++ b/bindings/haskell/Cargo.toml @@ -24,8 +24,8 @@ edition.workspace = true homepage.workspace = true license.workspace = true repository.workspace = true -version.workspace = true rust-version.workspace = true +version.workspace = true [lib] crate-type = ["cdylib"] diff --git a/bindings/java/Cargo.toml b/bindings/java/Cargo.toml index c5e7707b96bf..15bf36620525 100644 --- a/bindings/java/Cargo.toml +++ b/bindings/java/Cargo.toml @@ -86,22 +86,23 @@ services-all = [ ] # Default services provided by opendal. -services-azblob = [ "opendal/services-azblob" ] -services-azdls = [ "opendal/services-azdls" ] -services-cos = [ "opendal/services-cos" ] -services-fs = [ "opendal/services-fs" ] -services-gcs = [ "opendal/services-gcs" ] -services-ghac = [ "opendal/services-ghac" ] -services-http = [ "opendal/services-http" ] -services-ipmfs = [ "opendal/services-ipmfs" ] -services-memory = [ "opendal/services-memory" ] -services-obs = [ "opendal/services-obs" ] -services-oss = [ "opendal/services-oss" ] -services-s3 = [ "opendal/services-s3" ] -services-webdav = [ "opendal/services-webdav" ] -services-webhdfs = [ "opendal/services-webhdfs" ] +services-azblob = ["opendal/services-azblob"] +services-azdls = ["opendal/services-azdls"] +services-cos = ["opendal/services-cos"] +services-fs = ["opendal/services-fs"] +services-gcs = ["opendal/services-gcs"] +services-ghac = ["opendal/services-ghac"] +services-http = ["opendal/services-http"] +services-ipmfs = ["opendal/services-ipmfs"] +services-memory = ["opendal/services-memory"] +services-obs = ["opendal/services-obs"] +services-oss = ["opendal/services-oss"] +services-s3 = ["opendal/services-s3"] +services-webdav = ["opendal/services-webdav"] +services-webhdfs = ["opendal/services-webhdfs"] # Optional services provided by opendal. +services-azfile = ["opendal/services-azfile"] services-cacache = ["opendal/services-cacache"] services-dashmap = ["opendal/services-dashmap"] services-dropbox = ["opendal/services-dropbox"] @@ -114,6 +115,8 @@ services-ipfs = ["opendal/services-ipfs"] services-memcached = ["opendal/services-memcached"] services-mini-moka = ["opendal/services-mini-moka"] services-moka = ["opendal/services-moka"] +services-mongodb = ["opendal/services-mongodb"] +services-mysql = ["opendal/services-mysql"] services-onedrive = ["opendal/services-onedrive"] services-persy = ["opendal/services-persy"] services-postgresql = ["opendal/services-postgresql"] @@ -123,28 +126,25 @@ services-redis-rustls = ["opendal/services-redis-rustls"] services-rocksdb = ["opendal/services-rocksdb"] services-sftp = ["opendal/services-sftp"] services-sled = ["opendal/services-sled"] +services-sqlite = ["opendal/services-sqlite"] services-supabase = ["opendal/services-supabase"] services-tikv = ["opendal/services-tikv"] services-vercel-artifacts = ["opendal/services-vercel-artifacts"] services-wasabi = ["opendal/services-wasabi"] -services-mysql = ["opendal/services-mysql"] -services-mongodb = ["opendal/services-mongodb"] -services-sqlite = ["opendal/services-sqlite"] -services-azfile = ["opendal/services-azfile"] [dependencies] anyhow = "1.0.71" jni = "0.21.1" num_cpus = "1.15.0" once_cell = "1.17.1" -tokio = { version = "1.28.1", features = ["full"] } opendal = { workspace = true } +tokio = { version = "1.28.1", features = ["full"] } # This is not optimal. See also the Cargo issue: # https://github.com/rust-lang/cargo/issues/1197#issuecomment-1641086954 [target.'cfg(unix)'.dependencies.opendal] -workspace = true features = [ # Depend on "openssh" which depends on "tokio-pipe" that is unavailable on Windows. "services-sftp", ] +workspace = true diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index a2ef50b5bf81..fa636592ce57 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -27,7 +27,6 @@ repository.workspace = true rust-version.workspace = true version.workspace = true - [features] # Enable all opendal default feature by default. default = [ @@ -50,6 +49,7 @@ default = [ services-all = [ "default", + "services-azfile", "services-cacache", "services-dashmap", "services-dropbox", @@ -82,20 +82,21 @@ services-all = [ ] # Default services provided by opendal. -services-azblob = [ "opendal/services-azblob" ] -services-azdls = [ "opendal/services-azdls" ] -services-cos = [ "opendal/services-cos" ] -services-fs = [ "opendal/services-fs" ] -services-gcs = [ "opendal/services-gcs" ] -services-ghac = [ "opendal/services-ghac" ] -services-http = [ "opendal/services-http" ] -services-ipmfs = [ "opendal/services-ipmfs" ] -services-memory = [ "opendal/services-memory" ] -services-obs = [ "opendal/services-obs" ] -services-oss = [ "opendal/services-oss" ] -services-s3 = [ "opendal/services-s3" ] -services-webdav = [ "opendal/services-webdav" ] -services-webhdfs = [ "opendal/services-webhdfs" ] +services-azblob = ["opendal/services-azblob"] +services-azdls = ["opendal/services-azdls"] +services-azfile = ["opendal/services-azfile"] +services-cos = ["opendal/services-cos"] +services-fs = ["opendal/services-fs"] +services-gcs = ["opendal/services-gcs"] +services-ghac = ["opendal/services-ghac"] +services-http = ["opendal/services-http"] +services-ipmfs = ["opendal/services-ipmfs"] +services-memory = ["opendal/services-memory"] +services-obs = ["opendal/services-obs"] +services-oss = ["opendal/services-oss"] +services-s3 = ["opendal/services-s3"] +services-webdav = ["opendal/services-webdav"] +services-webhdfs = ["opendal/services-webhdfs"] # Optional services provided by opendal. services-cacache = ["opendal/services-cacache"] @@ -110,6 +111,8 @@ services-ipfs = ["opendal/services-ipfs"] services-memcached = ["opendal/services-memcached"] services-mini-moka = ["opendal/services-mini-moka"] services-moka = ["opendal/services-moka"] +services-mongodb = ["opendal/services-mongodb"] +services-mysql = ["opendal/services-mysql"] services-onedrive = ["opendal/services-onedrive"] services-persy = ["opendal/services-persy"] services-postgresql = ["opendal/services-postgresql"] @@ -119,13 +122,11 @@ services-redis-rustls = ["opendal/services-redis-rustls"] services-rocksdb = ["opendal/services-rocksdb"] services-sftp = ["opendal/services-sftp"] services-sled = ["opendal/services-sled"] +services-sqlite = ["opendal/services-sqlite"] services-supabase = ["opendal/services-supabase"] services-tikv = ["opendal/services-tikv"] services-vercel-artifacts = ["opendal/services-vercel-artifacts"] services-wasabi = ["opendal/services-wasabi"] -services-mysql = ["opendal/services-mysql"] -services-mongodb = ["opendal/services-mongodb"] -services-sqlite = ["opendal/services-sqlite"] [lib] crate-type = ["cdylib"] @@ -136,4 +137,4 @@ futures = "0.3.28" opendal.workspace = true pyo3 = "0.19" pyo3-asyncio = { version = "0.19", features = ["tokio-runtime"] } -tokio = "1" \ No newline at end of file +tokio = "1" diff --git a/core/Cargo.toml b/core/Cargo.toml index 2bf456813148..a0a1d76a4900 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -234,6 +234,7 @@ chrono = { version = "0.4.28", default-features = false, features = [ ] } dashmap = { version = "5.4", optional = true } dirs = { version = "5.0.1", optional = true } +dotenvy = { version = "0.15", optional = true } etcd-client = { version = "0.11", optional = true, features = ["tls"] } flagset = "0.4" foundationdb = { version = "0.8.0", features = [ @@ -295,7 +296,6 @@ tokio = "1.27" tokio-postgres = { version = "0.7.8", optional = true } tracing = { version = "0.1", optional = true } uuid = { version = "1", features = ["serde", "v4"] } -dotenvy = { version = "0.15", optional = true } [dev-dependencies] criterion = { version = "0.4", features = ["async", "async_tokio"] } diff --git a/core/edge/file_write_on_full_disk/Cargo.toml b/core/edge/file_write_on_full_disk/Cargo.toml index aefee22d47b0..7cb9e4a3b39a 100644 --- a/core/edge/file_write_on_full_disk/Cargo.toml +++ b/core/edge/file_write_on_full_disk/Cargo.toml @@ -16,13 +16,13 @@ # under the License. [package] -name = "edge_test_file_write_on_full_disk" edition = "2021" -version = "0.0.0" +name = "edge_test_file_write_on_full_disk" publish = false +version = "0.0.0" [dependencies] futures = "0.3" opendal = { workspace = true } -tokio = { version = "1", features = ["full"] } rand = "0.8" +tokio = { version = "1", features = ["full"] } diff --git a/core/fuzz/Cargo.toml b/core/fuzz/Cargo.toml index f0a3ad726dc4..fbc45cc1fa0e 100644 --- a/core/fuzz/Cargo.toml +++ b/core/fuzz/Cargo.toml @@ -32,11 +32,11 @@ dotenvy = "0.15.6" libfuzzer-sys = "0.4" opendal = { path = "..", features = ["tests"] } tokio = { version = "1", features = ["full"] } -uuid = { version = "1", features = ["v4"] } tracing-subscriber = { version = "0.3", features = [ "env-filter", "tracing-log", ] } +uuid = { version = "1", features = ["v4"] } [[bin]] name = "fuzz_reader" diff --git a/core/src/layers/complete.rs b/core/src/layers/complete.rs index de32637fe664..c5e593a47460 100644 --- a/core/src/layers/complete.rs +++ b/core/src/layers/complete.rs @@ -27,13 +27,13 @@ use std::task::Poll; use async_trait::async_trait; use bytes::Bytes; -use crate::raw::oio::into_flat_page; -use crate::raw::oio::into_hierarchy_page; -use crate::raw::oio::ByRangeSeekableReader; use crate::raw::oio::Entry; use crate::raw::oio::FlatPager; use crate::raw::oio::HierarchyPager; +use crate::raw::oio::RangeReader; use crate::raw::oio::StreamableReader; +use crate::raw::oio::{into_flat_page, FileReader}; +use crate::raw::oio::{into_hierarchy_page, LazyReader}; use crate::raw::*; use crate::*; @@ -116,10 +116,10 @@ use crate::*; pub struct CompleteLayer; impl Layer for CompleteLayer { - type LayeredAccessor = CompleteReaderAccessor; + type LayeredAccessor = CompleteAccessor; fn layer(&self, inner: A) -> Self::LayeredAccessor { - CompleteReaderAccessor { + CompleteAccessor { meta: inner.info(), inner: Arc::new(inner), } @@ -127,18 +127,18 @@ impl Layer for CompleteLayer { } /// Provide complete wrapper for backend. -pub struct CompleteReaderAccessor { +pub struct CompleteAccessor { meta: AccessorInfo, inner: Arc, } -impl Debug for CompleteReaderAccessor { +impl Debug for CompleteAccessor { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { self.inner.fmt(f) } } -impl CompleteReaderAccessor { +impl CompleteAccessor { fn new_unsupported_error(&self, op: impl Into<&'static str>) -> Error { let scheme = self.meta.scheme(); let op = op.into(); @@ -162,41 +162,24 @@ impl CompleteReaderAccessor { let seekable = capability.read_can_seek; let streamable = capability.read_can_next; - let range = args.range(); - let (rp, r) = self.inner.read(path, args).await?; - let content_length = rp.metadata().content_length(); - match (seekable, streamable) { - (true, true) => Ok((rp, CompleteReader::AlreadyComplete(r))), + (true, true) => { + let r = LazyReader::new(self.inner.clone(), path, args); + Ok((RpRead::new(), CompleteReader::AlreadyComplete(r))) + } (true, false) => { - let r = oio::into_streamable_read(r, 256 * 1024); - Ok((rp, CompleteReader::NeedStreamable(r))) + let r = FileReader::new(self.inner.clone(), path, args); + + Ok((RpRead::new(), CompleteReader::NeedStreamable(r))) } _ => { - let (offset, size) = match (range.offset(), range.size()) { - (Some(offset), _) => (offset, content_length), - (None, None) => (0, content_length), - (None, Some(size)) => { - // TODO: we can read content range to calculate - // the total content length. - let om = self.inner.stat(path, OpStat::new()).await?.into_metadata(); - let total_size = om.content_length(); - let (offset, size) = if size > total_size { - (0, total_size) - } else { - (total_size - size, size) - }; - - (offset, size) - } - }; - let r = oio::into_seekable_read_by_range(self.inner.clone(), path, r, offset, size); + let r = RangeReader::new(self.inner.clone(), path, args); if streamable { - Ok((rp, CompleteReader::NeedSeekable(r))) + Ok((RpRead::new(), CompleteReader::NeedSeekable(r))) } else { let r = oio::into_streamable_read(r, 256 * 1024); - Ok((rp, CompleteReader::NeedBoth(r))) + Ok((RpRead::new(), CompleteReader::NeedBoth(r))) } } } @@ -215,44 +198,23 @@ impl CompleteReaderAccessor { let seekable = capability.read_can_seek; let streamable = capability.read_can_next; - let range = args.range(); - let (rp, r) = self.inner.blocking_read(path, args)?; - let content_length = rp.metadata().content_length(); - match (seekable, streamable) { - (true, true) => Ok((rp, CompleteReader::AlreadyComplete(r))), + (true, true) => { + let r = LazyReader::new(self.inner.clone(), path, args); + Ok((RpRead::new(), CompleteReader::AlreadyComplete(r))) + } (true, false) => { - let r = oio::into_streamable_read(r, 256 * 1024); - Ok((rp, CompleteReader::NeedStreamable(r))) + let r = FileReader::new(self.inner.clone(), path, args); + Ok((RpRead::new(), CompleteReader::NeedStreamable(r))) } _ => { - let (offset, size) = match (range.offset(), range.size()) { - (Some(offset), _) => (offset, content_length), - (None, None) => (0, content_length), - (None, Some(size)) => { - // TODO: we can read content range to calculate - // the total content length. - let om = self - .inner - .blocking_stat(path, OpStat::new())? - .into_metadata(); - let total_size = om.content_length(); - let (offset, size) = if size > total_size { - (0, total_size) - } else { - (total_size - size, size) - }; - - (offset, size) - } - }; - let r = oio::into_seekable_read_by_range(self.inner.clone(), path, r, offset, size); + let r = RangeReader::new(self.inner.clone(), path, args); if streamable { - Ok((rp, CompleteReader::NeedSeekable(r))) + Ok((RpRead::new(), CompleteReader::NeedSeekable(r))) } else { let r = oio::into_streamable_read(r, 256 * 1024); - Ok((rp, CompleteReader::NeedBoth(r))) + Ok((RpRead::new(), CompleteReader::NeedBoth(r))) } } } @@ -351,7 +313,7 @@ impl CompleteReaderAccessor { } #[async_trait] -impl LayeredAccessor for CompleteReaderAccessor { +impl LayeredAccessor for CompleteAccessor { type Inner = A; type Reader = CompleteReader; type BlockingReader = CompleteReader; @@ -587,10 +549,10 @@ impl LayeredAccessor for CompleteReaderAccessor { } pub enum CompleteReader { - AlreadyComplete(R), - NeedSeekable(ByRangeSeekableReader), - NeedStreamable(StreamableReader), - NeedBoth(StreamableReader>), + AlreadyComplete(LazyReader), + NeedSeekable(RangeReader), + NeedStreamable(FileReader), + NeedBoth(StreamableReader>), } impl oio::Read for CompleteReader @@ -829,7 +791,7 @@ mod tests { } async fn read(&self, _: &str, _: OpRead) -> Result<(RpRead, Self::Reader)> { - Ok((RpRead::new(0), Box::new(()))) + Ok((RpRead::new(), Box::new(oio::Cursor::new()))) } async fn write(&self, _: &str, _: OpWrite) -> Result<(RpWrite, Self::Writer)> { diff --git a/core/src/layers/logging.rs b/core/src/layers/logging.rs index 457fc2612725..78ebc8d4ccc1 100644 --- a/core/src/layers/logging.rs +++ b/core/src/layers/logging.rs @@ -991,17 +991,20 @@ impl Drop for LoggingReader { impl oio::Read for LoggingReader { fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + let buf_size = buf.len(); + match self.inner.poll_read(cx, buf) { Poll::Ready(res) => match res { Ok(n) => { self.read += n as u64; trace!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> data read {}B ", + "service={} operation={} path={} read={} -> buf size: {}B, read {}B ", self.ctx.scheme, ReadOperation::Read, self.path, self.read, + buf_size, n ); Poll::Ready(Ok(n)) @@ -1011,7 +1014,7 @@ impl oio::Read for LoggingReader { log!( target: LOGGING_TARGET, lvl, - "service={} operation={} path={} read={} -> data read failed: {}", + "service={} operation={} path={} read={} -> read failed: {}", self.ctx.scheme, ReadOperation::Read, self.path, @@ -1025,11 +1028,12 @@ impl oio::Read for LoggingReader { Poll::Pending => { trace!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> data read pending", + "service={} operation={} path={} read={} -> buf size: {}B, read pending", self.ctx.scheme, ReadOperation::Read, self.path, - self.read + self.read, + buf_size ); Poll::Pending } @@ -1042,7 +1046,7 @@ impl oio::Read for LoggingReader { Ok(n) => { trace!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> data seek to offset {n}", + "service={} operation={} path={} read={} -> seek to {pos:?}, current offset {n}", self.ctx.scheme, ReadOperation::Seek, self.path, @@ -1055,7 +1059,7 @@ impl oio::Read for LoggingReader { log!( target: LOGGING_TARGET, lvl, - "service={} operation={} path={} read={} -> data read failed: {}", + "service={} operation={} path={} read={} -> seek to {pos:?} failed: {}", self.ctx.scheme, ReadOperation::Seek, self.path, @@ -1069,7 +1073,7 @@ impl oio::Read for LoggingReader { Poll::Pending => { trace!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> data seek pending", + "service={} operation={} path={} read={} -> seek to {pos:?} pending", self.ctx.scheme, ReadOperation::Seek, self.path, @@ -1087,7 +1091,7 @@ impl oio::Read for LoggingReader { self.read += bs.len() as u64; trace!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> data read {}B", + "service={} operation={} path={} read={} -> next returns {}B", self.ctx.scheme, ReadOperation::Next, self.path, @@ -1101,7 +1105,7 @@ impl oio::Read for LoggingReader { log!( target: LOGGING_TARGET, lvl, - "service={} operation={} path={} read={} -> data read failed: {}", + "service={} operation={} path={} read={} -> next failed: {}", self.ctx.scheme, ReadOperation::Next, self.path, @@ -1111,12 +1115,22 @@ impl oio::Read for LoggingReader { } Poll::Ready(Some(Err(err))) } - None => Poll::Ready(None), + None => { + trace!( + target: LOGGING_TARGET, + "service={} operation={} path={} read={} -> next returns None", + self.ctx.scheme, + ReadOperation::Next, + self.path, + self.read, + ); + Poll::Ready(None) + } }, Poll::Pending => { trace!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> data read pending", + "service={} operation={} path={} read={} -> next returns pending", self.ctx.scheme, ReadOperation::Next, self.path, diff --git a/core/src/layers/madsim.rs b/core/src/layers/madsim.rs index d10451774ac2..1d1253bafd3d 100644 --- a/core/src/layers/madsim.rs +++ b/core/src/layers/madsim.rs @@ -191,10 +191,7 @@ impl LayeredAccessor for MadsimAccessor { .downcast::() .expect("fail to downcast response to ReadResponse"); let content_length = resp.data.as_ref().map(|b| b.len()).unwrap_or(0); - Ok(( - RpRead::new(content_length as u64), - MadsimReader { data: resp.data }, - )) + Ok((RpRead::new(), MadsimReader { data: resp.data })) } #[cfg(not(madsim))] { @@ -346,10 +343,6 @@ impl oio::Page for MadsimPager { } } -fn parse_io_error(e: std::io::Error) -> Error { - Error::new(ErrorKind::Unexpected, "madsim error") -} - /// A simulated server.This an experimental feature, docs are not ready yet. #[derive(Default, Clone)] pub struct MadsimServer; diff --git a/core/src/layers/prometheus.rs b/core/src/layers/prometheus.rs index 4692da0a68c7..f26a4af2bf3b 100644 --- a/core/src/layers/prometheus.rs +++ b/core/src/layers/prometheus.rs @@ -320,28 +320,18 @@ impl LayeredAccessor for PrometheusAccessor { .with_label_values(&labels) .start_timer(); - let read_res = self - .inner - .read(path, args) - .map(|v| { - v.map(|(rp, r)| { - self.stats - .bytes_total - .with_label_values(&labels) - .observe(rp.metadata().content_length() as f64); - ( - rp, - PrometheusMetricWrapper::new( - r, - Operation::Read, - self.stats.clone(), - self.scheme, - &path.to_string(), - ), - ) - }) - }) - .await; + let read_res = self.inner.read(path, args).await.map(|(rp, r)| { + ( + rp, + PrometheusMetricWrapper::new( + r, + Operation::Read, + self.stats.clone(), + self.scheme, + &path.to_string(), + ), + ) + }); timer.observe_duration(); read_res.map_err(|e| { self.stats.increment_errors_total(Operation::Read, e.kind()); @@ -546,10 +536,6 @@ impl LayeredAccessor for PrometheusAccessor { .with_label_values(&labels) .start_timer(); let result = self.inner.blocking_read(path, args).map(|(rp, r)| { - self.stats - .bytes_total - .with_label_values(&labels) - .observe(rp.metadata().content_length() as f64); ( rp, PrometheusMetricWrapper::new( diff --git a/core/src/layers/retry.rs b/core/src/layers/retry.rs index 67239d3fd5ee..9ae6c877d7de 100644 --- a/core/src/layers/retry.rs +++ b/core/src/layers/retry.rs @@ -1164,7 +1164,7 @@ mod tests { async fn read(&self, _: &str, _: OpRead) -> Result<(RpRead, Self::Reader)> { Ok(( - RpRead::new(13), + RpRead::new(), MockReader { attempt: self.attempt.clone(), pos: 0, diff --git a/core/src/raw/adapters/kv/backend.rs b/core/src/raw/adapters/kv/backend.rs index 94db4de8dc5e..799dd1be5415 100644 --- a/core/src/raw/adapters/kv/backend.rs +++ b/core/src/raw/adapters/kv/backend.rs @@ -129,8 +129,7 @@ impl Accessor for Backend { let bs = self.apply_range(bs, args.range()); - let length = bs.len(); - Ok((RpRead::new(length as u64), oio::Cursor::from(bs))) + Ok((RpRead::new(), oio::Cursor::from(bs))) } fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { @@ -142,7 +141,7 @@ impl Accessor for Backend { }; let bs = self.apply_range(bs, args.range()); - Ok((RpRead::new(bs.len() as u64), oio::Cursor::from(bs))) + Ok((RpRead::new(), oio::Cursor::from(bs))) } async fn write(&self, path: &str, _: OpWrite) -> Result<(RpWrite, Self::Writer)> { diff --git a/core/src/raw/adapters/typed_kv/backend.rs b/core/src/raw/adapters/typed_kv/backend.rs index d5313b8e0097..ca872346a7a8 100644 --- a/core/src/raw/adapters/typed_kv/backend.rs +++ b/core/src/raw/adapters/typed_kv/backend.rs @@ -135,8 +135,7 @@ impl Accessor for Backend { let bs = self.apply_range(bs, args.range()); - let length = bs.len(); - Ok((RpRead::new(length as u64), oio::Cursor::from(bs))) + Ok((RpRead::new(), oio::Cursor::from(bs))) } fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { @@ -149,7 +148,7 @@ impl Accessor for Backend { }; let bs = self.apply_range(bs, args.range()); - Ok((RpRead::new(bs.len() as u64), oio::Cursor::from(bs))) + Ok((RpRead::new(), oio::Cursor::from(bs))) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { diff --git a/core/src/raw/http_util/body.rs b/core/src/raw/http_util/body.rs index 474f7489780d..8b7d5da5af09 100644 --- a/core/src/raw/http_util/body.rs +++ b/core/src/raw/http_util/body.rs @@ -78,6 +78,16 @@ impl IncomingAsyncBody { } } + /// Create an empty IncomingAsyncBody. + pub(crate) fn empty() -> Self { + Self { + inner: Box::new(()), + size: Some(0), + consumed: 0, + chunk: None, + } + } + /// Consume the entire body. pub async fn consume(mut self) -> Result<()> { use oio::ReadExt; @@ -145,7 +155,7 @@ impl IncomingAsyncBody { impl oio::Read for IncomingAsyncBody { fn poll_read(&mut self, cx: &mut Context<'_>, mut buf: &mut [u8]) -> Poll> { - if buf.is_empty() { + if buf.is_empty() || self.size == Some(0) { return Poll::Ready(Ok(0)); } @@ -179,6 +189,10 @@ impl oio::Read for IncomingAsyncBody { } fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { + if self.size == Some(0) { + return Poll::Ready(None); + } + if let Some(bs) = self.chunk.take() { return Poll::Ready(Some(Ok(bs))); } diff --git a/core/src/raw/mod.rs b/core/src/raw/mod.rs index 12fe913056da..3150791ec8b2 100644 --- a/core/src/raw/mod.rs +++ b/core/src/raw/mod.rs @@ -59,6 +59,9 @@ pub use chrono_util::*; mod tokio_util; pub use tokio_util::*; +mod std_io_util; +pub use std_io_util::*; + // Expose as a pub mod to avoid confusing. pub mod adapters; pub mod oio; diff --git a/core/src/raw/oio/buf/adaptive.rs b/core/src/raw/oio/buf/adaptive.rs new file mode 100644 index 000000000000..abebcb36f6c3 --- /dev/null +++ b/core/src/raw/oio/buf/adaptive.rs @@ -0,0 +1,146 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use bytes::{Bytes, BytesMut}; +use std::cmp; +use tokio::io::ReadBuf; + +/// The default minimum adaptive buffer size is 8 KiB. +const DEFAULT_MIN_BUFFER_SIZE: usize = 8192; + +/// The default maximum adaptive buffer size is 4 MiB. +/// +/// We will not grow the buffer beyond this size. +const DEFAULT_MAX_BUFFER_SIZE: usize = 4 * 1024 * 1024; + +/// AdaptiveBuf is inspired by hyper [ReadStrategy](https://github.com/hyperium/hyper/blob/master/src/proto/h1/io.rs#L26). +/// +/// We build this adaptive buf to make our internal buf grow and shrink automatically based on IO +/// throughput. +pub struct AdaptiveBuf { + /// The underlying buffer. + buffer: BytesMut, + + next: usize, + decrease_now: bool, +} + +impl Default for AdaptiveBuf { + fn default() -> Self { + Self { + buffer: BytesMut::default(), + next: DEFAULT_MIN_BUFFER_SIZE, + decrease_now: false, + } + } +} + +impl AdaptiveBuf { + /// reserve will reserve the buffer to the next size. + pub fn reserve(&mut self) { + if self.buffer.capacity() < self.next { + self.buffer.reserve(self.next); + } + } + + /// Returning the initialized part of the buffer. + pub fn initialized_mut(&mut self) -> ReadBuf { + assert_eq!( + self.buffer.len(), + 0, + "buffer must be empty before initialized_mut" + ); + + let dst = self.buffer.spare_capacity_mut(); + let length = dst.len(); + let mut buf = ReadBuf::uninit(dst); + + // Safety: we make sure that we only return the initialized part of the buffer. + unsafe { + buf.assume_init(length); + } + buf + } + + /// Records the number of bytes read from the underlying IO. + pub fn record(&mut self, read: usize) { + if read >= self.next { + // Growing if we uses the whole buffer. + self.next = cmp::min(self.next.saturating_mul(2), DEFAULT_MAX_BUFFER_SIZE); + self.decrease_now = false; + } else { + // Shrinking if we uses less than half of the buffer. + let decr_to = self.next.saturating_div(2); + if read < decr_to { + if self.decrease_now { + self.next = cmp::max(decr_to, DEFAULT_MIN_BUFFER_SIZE); + self.decrease_now = false; + } else { + // Mark decrease_now as true to shrink the buffer next time. + self.decrease_now = true; + } + } else { + // Mark decrease_now as false to keep current buffer size. + self.decrease_now = false; + } + } + } + + /// Splits the buffer into two at the given index. + /// + /// # Safety + /// + /// It's required that buffer has been filled with given bytes. + pub fn split(&mut self, n: usize) -> Bytes { + unsafe { self.buffer.set_len(n) } + self.buffer.split().freeze() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn read_strategy_adaptive_decrements() { + let mut huf = AdaptiveBuf::default(); + huf.record(8192); + assert_eq!(huf.next, 16384); + + huf.record(1); + assert_eq!( + huf.next, 16384, + "first smaller record doesn't decrement yet" + ); + huf.record(8192); + assert_eq!(huf.next, 16384, "record was with range"); + + huf.record(1); + assert_eq!( + huf.next, 16384, + "in-range record should make this the 'first' again" + ); + + huf.record(1); + assert_eq!(huf.next, 8192, "second smaller record decrements"); + + huf.record(1); + assert_eq!(huf.next, 8192, "first doesn't decrement"); + huf.record(1); + assert_eq!(huf.next, 8192, "doesn't decrement under minimum"); + } +} diff --git a/core/src/raw/oio/buf/mod.rs b/core/src/raw/oio/buf/mod.rs index dfd3663e56f8..abc8bf3286de 100644 --- a/core/src/raw/oio/buf/mod.rs +++ b/core/src/raw/oio/buf/mod.rs @@ -20,3 +20,6 @@ pub use chunked_bytes::ChunkedBytes; mod write_buf; pub use write_buf::WriteBuf; + +mod adaptive; +pub use adaptive::AdaptiveBuf; diff --git a/core/src/raw/oio/read/api.rs b/core/src/raw/oio/read/api.rs index da32259301e8..0b9aaf7703cf 100644 --- a/core/src/raw/oio/read/api.rs +++ b/core/src/raw/oio/read/api.rs @@ -17,10 +17,10 @@ use std::fmt::Display; use std::fmt::Formatter; -use std::io; use std::pin::Pin; -use std::task::Context; use std::task::Poll; +use std::task::{ready, Context}; +use std::{cmp, io}; use bytes::Bytes; use futures::Future; @@ -198,6 +198,18 @@ pub trait ReadExt: Read { fn next(&mut self) -> NextFuture<'_, Self> { NextFuture { reader: self } } + + /// Build a future for `read_to_end`. + fn read_to_end<'a>(&'a mut self, buf: &'a mut Vec) -> ReadToEndFuture<'a, Self> { + let start = buf.len(); + ReadToEndFuture { + reader: self, + buf, + start, + length: start, + next: MIN_READ_TO_END_GROW_SIZE, + } + } } /// Make this future `!Unpin` for compatibility with async trait methods. @@ -256,6 +268,70 @@ where } } +/// The MIN read to end grow size. +const MIN_READ_TO_END_GROW_SIZE: usize = 8 * 1024; +/// The MAX read to end grow size. +const MAX_READ_TO_END_GROW_SIZE: usize = 4 * 1024 * 1024; + +/// Make this future `!Unpin` for compatibility with async trait methods. +#[pin_project(!Unpin)] +pub struct ReadToEndFuture<'a, R: Read + Unpin + ?Sized> { + reader: &'a mut R, + buf: &'a mut Vec, + start: usize, + length: usize, + next: usize, +} + +impl Future for ReadToEndFuture<'_, R> +where + R: Read + Unpin + ?Sized, +{ + type Output = Result; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let this = self.project(); + + loop { + if this.buf.capacity() == *this.length { + this.buf.reserve(*this.next); + // # Safety + // + // We make sure that the length of buf is maintained correctly. + #[allow(clippy::uninit_vec)] + unsafe { + this.buf.set_len(this.buf.capacity()); + } + } + + let buf = &mut this.buf[*this.length..]; + match ready!(this.reader.poll_read(cx, buf)) { + Ok(0) => { + unsafe { + this.buf.set_len(*this.length); + } + return Poll::Ready(Ok(*this.length - *this.start)); + } + Ok(n) => { + *this.next = if n >= *this.next { + cmp::min((*this.next).saturating_mul(2), MAX_READ_TO_END_GROW_SIZE) + } else if n >= *this.next / 2 { + *this.next + } else { + cmp::max((*this.next).saturating_div(2), MIN_READ_TO_END_GROW_SIZE) + }; + // We can't allow bogus values from read. If it is too large, the returned vec could have its length + // set past its capacity, or if it overflows the vec could be shortened which could create an invalid + // string if this is called via read_to_string. + assert!(n <= buf.len()); + *this.length += n; + } + Err(e) => return Poll::Ready(Err(e)), + } + } + } +} + /// BlockingReader is a boxed dyn `BlockingRead`. pub type BlockingReader = Box; @@ -269,7 +345,7 @@ pub type BlockingReader = Box; /// /// `Read` is required to be implemented, `Seek` and `Iterator` /// is optional. We use `Read` to make users life easier. -pub trait BlockingRead: Send + Sync + 'static { +pub trait BlockingRead: Send + Sync { /// Read synchronously. fn read(&mut self, buf: &mut [u8]) -> Result; @@ -278,6 +354,52 @@ pub trait BlockingRead: Send + Sync + 'static { /// Iterating [`Bytes`] from underlying reader. fn next(&mut self) -> Option>; + + /// Read all data of current reader to the end of buf. + fn read_to_end(&mut self, buf: &mut Vec) -> Result { + let start = buf.len(); + let mut next = MAX_READ_TO_END_GROW_SIZE; + let mut length = start; + + loop { + if buf.capacity() == length { + buf.reserve(next); + // # Safety + // + // We make sure that the length of buf is maintained correctly. + #[allow(clippy::uninit_vec)] + unsafe { + buf.set_len(buf.capacity()); + } + } + + let bs = &mut buf[length..]; + match self.read(bs) { + Ok(0) => { + unsafe { + buf.set_len(length); + } + return Ok(length - start); + } + Ok(n) => { + next = if n >= next { + cmp::min(next.saturating_mul(2), MAX_READ_TO_END_GROW_SIZE) + } else if n >= next / 2 { + next + } else { + cmp::max(next.saturating_div(2), MIN_READ_TO_END_GROW_SIZE) + }; + + // We can't allow bogus values from read. If it is too large, the returned vec could have its length + // set past its capacity, or if it overflows the vec could be shortened which could create an invalid + // string if this is called via read_to_string. + assert!(n <= buf.len()); + length += n; + } + Err(e) => return Err(e), + } + } + } } impl BlockingRead for () { diff --git a/core/src/raw/oio/read/cloneable_read.rs b/core/src/raw/oio/read/cloneable_read.rs deleted file mode 100644 index a5fe921847dd..000000000000 --- a/core/src/raw/oio/read/cloneable_read.rs +++ /dev/null @@ -1,140 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::io::SeekFrom; -use std::sync::Arc; -use std::task::Context; -use std::task::Poll; - -use bytes::Bytes; - -use crate::raw::*; -use crate::*; - -/// Convert given reader into a wrapper with `std::sync::Mutex` for `Send + Sync + Clone`. -pub fn into_cloneable_reader_within_std(reader: R) -> CloneableReaderWithinStd { - CloneableReaderWithinStd(Arc::new(std::sync::Mutex::new(reader))) -} - -/// CloneableReaderWithinStd is a Send + Sync + Clone with `std::sync::Mutex` wrapper of input -/// reader. -/// -/// Caller can clone this reader but only one thread can calling `oio::Read` API at the -/// same time, otherwise, we will return error if lock block happened. -pub struct CloneableReaderWithinStd(Arc>); - -impl CloneableReaderWithinStd { - /// Consume self to get inner reader. - pub fn into_inner(self) -> Arc> { - self.0 - } -} - -impl Clone for CloneableReaderWithinStd { - fn clone(&self) -> Self { - Self(self.0.clone()) - } -} - -impl oio::Read for CloneableReaderWithinStd { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - match self.0.try_lock() { - Ok(mut this) => this.poll_read(cx, buf), - Err(_) => Poll::Ready(Err(Error::new( - ErrorKind::Unexpected, - "the cloneable reader is expected to have only one owner, but it's not", - ))), - } - } - - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - match self.0.try_lock() { - Ok(mut this) => this.poll_seek(cx, pos), - Err(_) => Poll::Ready(Err(Error::new( - ErrorKind::Unexpected, - "the cloneable reader is expected to have only one owner, but it's not", - ))), - } - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - match self.0.try_lock() { - Ok(mut this) => this.poll_next(cx), - Err(_) => Poll::Ready(Some(Err(Error::new( - ErrorKind::Unexpected, - "the cloneable reader is expected to have only one owner, but it's not", - )))), - } - } -} - -/// Convert given reader into a wrapper with `tokio::sync::Mutex` for `Send + Sync + Clone`. -pub fn into_cloneable_reader_within_tokio(reader: R) -> CloneableReaderWithinTokio { - CloneableReaderWithinTokio(Arc::new(tokio::sync::Mutex::new(reader))) -} - -/// CloneableReaderWithinTokio is a Send + Sync + Clone with `tokio::sync::Mutex` wrapper of input -/// reader. -/// -/// Caller can clone this reader but only one thread can calling `oio::Read` API at the -/// same time, otherwise, we will return error if lock block happened. -pub struct CloneableReaderWithinTokio(Arc>); - -impl CloneableReaderWithinTokio { - /// Consume self to get inner reader. - pub fn into_inner(self) -> Arc> { - self.0 - } -} - -impl Clone for CloneableReaderWithinTokio { - fn clone(&self) -> Self { - Self(self.0.clone()) - } -} - -impl oio::Read for CloneableReaderWithinTokio { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - match self.0.try_lock() { - Ok(mut this) => this.poll_read(cx, buf), - Err(_) => Poll::Ready(Err(Error::new( - ErrorKind::Unexpected, - "the cloneable reader is expected to have only one owner, but it's not", - ))), - } - } - - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - match self.0.try_lock() { - Ok(mut this) => this.poll_seek(cx, pos), - Err(_) => Poll::Ready(Err(Error::new( - ErrorKind::Unexpected, - "the cloneable reader is expected to have only one owner, but it's not", - ))), - } - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - match self.0.try_lock() { - Ok(mut this) => this.poll_next(cx), - Err(_) => Poll::Ready(Some(Err(Error::new( - ErrorKind::Unexpected, - "the cloneable reader is expected to have only one owner, but it's not", - )))), - } - } -} diff --git a/core/src/raw/oio/read/file_read.rs b/core/src/raw/oio/read/file_read.rs new file mode 100644 index 000000000000..2c403f4a688d --- /dev/null +++ b/core/src/raw/oio/read/file_read.rs @@ -0,0 +1,539 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::cmp; + +use std::io::SeekFrom; +use std::pin::Pin; +use std::sync::Arc; +use std::task::ready; +use std::task::Context; +use std::task::Poll; + +use bytes::Bytes; +use futures::future::BoxFuture; +use futures::Future; + +use crate::raw::*; +use crate::*; + +/// FileReader that implement range read and streamable read on seekable reader. +/// +/// `oio::Reader` requires the underlying reader to handle range correctly and have streamable support. +/// But some services like `fs`, `hdfs` only have seek support. FileReader implements range and stream +/// support based on `seek`. We will maintain the correct range for give file and implement streamable +/// operations based on [`oio::AdaptiveBuf`]. +pub struct FileReader { + acc: Arc, + path: Arc, + op: OpRead, + + offset: Option, + size: Option, + cur: u64, + + buf: oio::AdaptiveBuf, + state: State, + /// Do we need to reset our cursor? + seek_dirty: bool, +} + +enum State { + Idle, + Send(BoxFuture<'static, Result<(RpRead, R)>>), + Read(R), +} + +/// Safety: State will only be accessed under &mut. +unsafe impl Sync for State {} + +impl FileReader +where + A: Accessor, +{ + /// Create a new FileReader. + /// + /// # Notes + /// + /// It's required that input reader's cursor is at the input `start` of the file. + pub fn new(acc: Arc, path: &str, op: OpRead) -> FileReader { + FileReader { + acc, + path: Arc::new(path.to_string()), + op, + + offset: None, + size: None, + cur: 0, + buf: oio::AdaptiveBuf::default(), + state: State::::Idle, + seek_dirty: false, + } + } +} + +impl FileReader +where + A: Accessor, + R: oio::Read, +{ + fn read_future(&self) -> BoxFuture<'static, Result<(RpRead, R)>> { + let acc = self.acc.clone(); + let path = self.path.clone(); + + // FileReader doesn't support range, we will always use full range to open a file. + let op = self.op.clone().with_range(BytesRange::from(..)); + + Box::pin(async move { acc.read(&path, op).await }) + } + + /// calculate_offset will make sure that the offset has been set. + fn poll_offset( + cx: &mut Context<'_>, + r: &mut R, + range: BytesRange, + ) -> Poll, Option)>> { + let (offset, size) = match (range.offset(), range.size()) { + (None, None) => (0, None), + (None, Some(size)) => { + let start = ready!(r.poll_seek(cx, SeekFrom::End(-(size as i64))))?; + (start, Some(size)) + } + (Some(offset), None) => { + let start = ready!(r.poll_seek(cx, SeekFrom::Start(offset)))?; + (start, None) + } + (Some(offset), Some(size)) => { + let start = ready!(r.poll_seek(cx, SeekFrom::Start(offset)))?; + (start, Some(size)) + } + }; + + Poll::Ready(Ok((Some(offset), size))) + } + + fn poll_seek_inner( + cx: &mut Context<'_>, + r: &mut R, + offset: Option, + size: Option, + cur: u64, + pos: SeekFrom, + ) -> Poll> { + let offset = offset.expect("offset should be set for calculate_position"); + + match pos { + SeekFrom::Start(n) => { + // It's valid for user to seek outsides end of the file. + r.poll_seek(cx, SeekFrom::Start(offset + n)) + } + SeekFrom::End(n) => { + let size = + size.expect("size should be set for calculate_position when seek with end"); + if size as i64 + n < 0 { + return Poll::Ready(Err(Error::new( + ErrorKind::InvalidInput, + "seek to a negative position is invalid", + ) + .with_context("position", format!("{pos:?}")))); + } + // size is known, we can convert SeekFrom::End into SeekFrom::Start. + let pos = SeekFrom::Start(offset + (size as i64 + n) as u64); + r.poll_seek(cx, pos) + } + SeekFrom::Current(n) => { + if cur as i64 + n < 0 { + return Poll::Ready(Err(Error::new( + ErrorKind::InvalidInput, + "seek to a negative position is invalid", + ) + .with_context("position", format!("{pos:?}")))); + } + let pos = SeekFrom::Start(offset + (cur as i64 + n) as u64); + r.poll_seek(cx, pos) + } + } + } +} + +impl FileReader +where + A: Accessor, + R: oio::BlockingRead, +{ + /// calculate_offset will make sure that the offset has been set. + fn calculate_offset(r: &mut R, range: BytesRange) -> Result<(Option, Option)> { + let (offset, size) = match (range.offset(), range.size()) { + (None, None) => (0, None), + (None, Some(size)) => { + let start = r.seek(SeekFrom::End(-(size as i64)))?; + (start, Some(size)) + } + (Some(offset), None) => { + let start = r.seek(SeekFrom::Start(offset))?; + (start, None) + } + (Some(offset), Some(size)) => { + let start = r.seek(SeekFrom::Start(offset))?; + (start, Some(size)) + } + }; + + Ok((Some(offset), size)) + } + + fn seek_inner( + r: &mut R, + offset: Option, + size: Option, + cur: u64, + pos: SeekFrom, + ) -> Result { + let offset = offset.expect("offset should be set for calculate_position"); + + match pos { + SeekFrom::Start(n) => { + // It's valid for user to seek outsides end of the file. + r.seek(SeekFrom::Start(offset + n)) + } + SeekFrom::End(n) => { + let size = + size.expect("size should be set for calculate_position when seek with end"); + if size as i64 + n < 0 { + return Err(Error::new( + ErrorKind::InvalidInput, + "seek to a negative position is invalid", + ) + .with_context("position", format!("{pos:?}"))); + } + // size is known, we can convert SeekFrom::End into SeekFrom::Start. + let pos = SeekFrom::Start(offset + (size as i64 + n) as u64); + r.seek(pos) + } + SeekFrom::Current(n) => { + if cur as i64 + n < 0 { + return Err(Error::new( + ErrorKind::InvalidInput, + "seek to a negative position is invalid", + ) + .with_context("position", format!("{pos:?}"))); + } + let pos = SeekFrom::Start(offset + (cur as i64 + n) as u64); + r.seek(pos) + } + } + } +} + +impl oio::Read for FileReader +where + A: Accessor, + R: oio::Read, +{ + fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + match &mut self.state { + State::Idle => { + self.state = State::Send(self.read_future()); + self.poll_read(cx, buf) + } + State::Send(fut) => { + let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If send future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + self.state = State::Read(r); + self.poll_read(cx, buf) + } + State::Read(r) => { + // We should know where to start read the data. + if self.offset.is_none() { + (self.offset, self.size) = ready!(Self::poll_offset(cx, r, self.op.range()))?; + } + + let size = if let Some(size) = self.size { + // Sanity check. + if self.cur >= size { + return Poll::Ready(Ok(0)); + } + cmp::min(buf.len(), (size - self.cur) as usize) + } else { + buf.len() + }; + + match ready!(r.poll_read(cx, &mut buf[..size])) { + Ok(0) => Poll::Ready(Ok(0)), + Ok(n) => { + self.cur += n as u64; + Poll::Ready(Ok(n)) + } + // We don't need to reset state here since it's ok to poll the same reader. + Err(err) => Poll::Ready(Err(err)), + } + } + } + } + + fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { + match &mut self.state { + State::Idle => { + self.state = State::Send(self.read_future()); + self.poll_seek(cx, pos) + } + State::Send(fut) => { + let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If send future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + self.state = State::Read(r); + self.poll_seek(cx, pos) + } + State::Read(r) => { + // We should know where to start read the data. + if self.offset.is_none() { + (self.offset, self.size) = ready!(Self::poll_offset(cx, r, self.op.range()))?; + } + + // Fetch size when seek end. + let current_offset = self.offset.unwrap() + self.cur; + if matches!(pos, SeekFrom::End(_)) && self.size.is_none() { + let size = ready!(r.poll_seek(cx, SeekFrom::End(0)))?; + self.size = Some(size - self.offset.unwrap()); + self.seek_dirty = true; + } + if self.seek_dirty { + // Reset cursor. + ready!(r.poll_seek(cx, SeekFrom::Start(current_offset)))?; + self.seek_dirty = false; + } + + let pos = ready!(Self::poll_seek_inner( + cx, + r, + self.offset, + self.size, + self.cur, + pos + ))?; + self.cur = pos - self.offset.unwrap(); + Poll::Ready(Ok(self.cur)) + } + } + } + + fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { + match &mut self.state { + State::Idle => { + self.state = State::Send(self.read_future()); + self.poll_next(cx) + } + State::Send(fut) => { + let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If send future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + self.state = State::Read(r); + self.poll_next(cx) + } + State::Read(r) => { + // We should know where to start read the data. + if self.offset.is_none() { + (self.offset, self.size) = ready!(Self::poll_offset(cx, r, self.op.range()))?; + } + + self.buf.reserve(); + + let mut buf = self.buf.initialized_mut(); + let buf = buf.initialized_mut(); + + let size = if let Some(size) = self.size { + // Sanity check. + if self.cur >= size { + return Poll::Ready(None); + } + cmp::min(buf.len(), (size - self.cur) as usize) + } else { + buf.len() + }; + + match ready!(r.poll_read(cx, &mut buf[..size])) { + Ok(0) => Poll::Ready(None), + Ok(n) => { + self.cur += n as u64; + self.buf.record(n); + Poll::Ready(Some(Ok(self.buf.split(n)))) + } + // We don't need to reset state here since it's ok to poll the same reader. + Err(err) => Poll::Ready(Some(Err(err))), + } + } + } + } +} + +impl oio::BlockingRead for FileReader +where + A: Accessor, + R: oio::BlockingRead, +{ + fn read(&mut self, buf: &mut [u8]) -> Result { + match &mut self.state { + State::Idle => { + // FileReader doesn't support range, we will always use full range to open a file. + let op = self.op.clone().with_range(BytesRange::from(..)); + + let (_, r) = self.acc.blocking_read(&self.path, op)?; + self.state = State::Read(r); + self.read(buf) + } + + State::Read(r) => { + // We should know where to start read the data. + if self.offset.is_none() { + (self.offset, self.size) = Self::calculate_offset(r, self.op.range())?; + } + + let size = if let Some(size) = self.size { + // Sanity check. + if self.cur >= size { + return Ok(0); + } + cmp::min(buf.len(), (size - self.cur) as usize) + } else { + buf.len() + }; + + match r.read(&mut buf[..size]) { + Ok(0) => Ok(0), + Ok(n) => { + self.cur += n as u64; + Ok(n) + } + // We don't need to reset state here since it's ok to poll the same reader. + Err(err) => Err(err), + } + } + State::Send(_) => { + unreachable!( + "It's invalid to go into State::Send for BlockingRead, please report this bug" + ) + } + } + } + + fn seek(&mut self, pos: SeekFrom) -> Result { + match &mut self.state { + State::Idle => { + // FileReader doesn't support range, we will always use full range to open a file. + let op = self.op.clone().with_range(BytesRange::from(..)); + + let (_, r) = self.acc.blocking_read(&self.path, op)?; + self.state = State::Read(r); + self.seek(pos) + } + State::Read(r) => { + // We should know where to start read the data. + if self.offset.is_none() { + (self.offset, self.size) = Self::calculate_offset(r, self.op.range())?; + } + // Fetch size when seek end. + let current_offset = self.offset.unwrap() + self.cur; + if matches!(pos, SeekFrom::End(_)) && self.size.is_none() { + let size = r.seek(SeekFrom::End(0))?; + self.size = Some(size - self.offset.unwrap()); + self.seek_dirty = true; + } + if self.seek_dirty { + // Reset cursor. + r.seek(SeekFrom::Start(current_offset))?; + self.seek_dirty = false; + } + + let pos = Self::seek_inner(r, self.offset, self.size, self.cur, pos)?; + self.cur = pos - self.offset.unwrap(); + Ok(self.cur) + } + State::Send(_) => { + unreachable!( + "It's invalid to go into State::Send for BlockingRead, please report this bug" + ) + } + } + } + + fn next(&mut self) -> Option> { + match &mut self.state { + State::Idle => { + // FileReader doesn't support range, we will always use full range to open a file. + let op = self.op.clone().with_range(BytesRange::from(..)); + + let r = match self.acc.blocking_read(&self.path, op) { + Ok((_, r)) => r, + Err(err) => return Some(Err(err)), + }; + self.state = State::Read(r); + self.next() + } + + State::Read(r) => { + // We should know where to start read the data. + if self.offset.is_none() { + (self.offset, self.size) = match Self::calculate_offset(r, self.op.range()) { + Ok(v) => v, + Err(err) => return Some(Err(err)), + } + } + + self.buf.reserve(); + + let mut buf = self.buf.initialized_mut(); + let buf = buf.initialized_mut(); + + let size = if let Some(size) = self.size { + // Sanity check. + if self.cur >= size { + return None; + } + cmp::min(buf.len(), (size - self.cur) as usize) + } else { + buf.len() + }; + + match r.read(&mut buf[..size]) { + Ok(0) => None, + Ok(n) => { + self.cur += n as u64; + self.buf.record(n); + Some(Ok(self.buf.split(n))) + } + // We don't need to reset state here since it's ok to poll the same reader. + Err(err) => Some(Err(err)), + } + } + State::Send(_) => { + unreachable!( + "It's invalid to go into State::Send for BlockingRead, please report this bug" + ) + } + } + } +} diff --git a/core/src/raw/oio/read/futures_read.rs b/core/src/raw/oio/read/futures_read.rs new file mode 100644 index 000000000000..03b52bbf74ee --- /dev/null +++ b/core/src/raw/oio/read/futures_read.rs @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::raw::*; +use crate::*; +use bytes::Bytes; +use futures::AsyncRead; +use futures::AsyncSeek; +use std::io::SeekFrom; +use std::pin::Pin; +use std::task::{Context, Poll}; + +/// FuturesReader implements [`oio::Read`] via [`AsyncRead`] + [`AsyncSeek`]. +pub struct FuturesReader { + inner: R, +} + +impl FuturesReader { + /// Create a new futures reader. + pub fn new(inner: R) -> Self { + Self { inner } + } +} + +impl oio::Read for FuturesReader +where + R: AsyncRead + AsyncSeek + Unpin + Send + Sync, +{ + fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + Pin::new(&mut self.inner).poll_read(cx, buf).map_err(|err| { + new_std_io_error(err) + .with_operation(oio::ReadOperation::Read) + .with_context("source", "FuturesReader") + }) + } + + fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { + Pin::new(&mut self.inner).poll_seek(cx, pos).map_err(|err| { + new_std_io_error(err) + .with_operation(oio::ReadOperation::Seek) + .with_context("source", "FuturesReader") + }) + } + + fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { + let _ = cx; + + Poll::Ready(Some(Err(Error::new( + ErrorKind::Unsupported, + "FuturesReader doesn't support poll_next", + )))) + } +} diff --git a/core/src/raw/oio/read/into_read_from_file.rs b/core/src/raw/oio/read/into_read_from_file.rs deleted file mode 100644 index f005ac73721d..000000000000 --- a/core/src/raw/oio/read/into_read_from_file.rs +++ /dev/null @@ -1,192 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::cmp; -use std::io::Read; -use std::io::Seek; -use std::io::SeekFrom; -use std::pin::Pin; -use std::task::ready; -use std::task::Context; -use std::task::Poll; - -use bytes::Bytes; -use futures::AsyncRead; -use futures::AsyncSeek; - -use crate::raw::*; -use crate::*; - -/// Convert given file into [`oio::Reader`]. -pub fn into_read_from_file(fd: R, start: u64, end: u64) -> FromFileReader { - FromFileReader { - inner: fd, - start, - end, - offset: 0, - } -} - -/// FromFileReader is a wrapper of input fd to implement [`oio::Read`]. -pub struct FromFileReader { - inner: R, - - start: u64, - end: u64, - offset: u64, -} - -impl FromFileReader { - pub(crate) fn current_size(&self) -> i64 { - debug_assert!(self.offset >= self.start, "offset must in range"); - self.end as i64 - self.offset as i64 - } -} - -impl oio::Read for FromFileReader -where - R: AsyncRead + AsyncSeek + Unpin + Send + Sync, -{ - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - if self.current_size() <= 0 { - return Poll::Ready(Ok(0)); - } - - let max = cmp::min(buf.len() as u64, self.current_size() as u64) as usize; - // TODO: we can use pread instead. - let n = - ready!(Pin::new(&mut self.inner).poll_read(cx, &mut buf[..max])).map_err(|err| { - Error::new(ErrorKind::Unexpected, "read data from FdReader") - .with_context("source", "FdReader") - .set_source(err) - })?; - self.offset += n as u64; - Poll::Ready(Ok(n)) - } - - /// TODO: maybe we don't need to do seek really, just call pread instead. - /// - /// We need to wait for tokio's pread support. - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - let (base, offset) = match pos { - SeekFrom::Start(n) => (self.start as i64, n as i64), - SeekFrom::End(n) => (self.end as i64, n), - SeekFrom::Current(n) => (self.offset as i64, n), - }; - - match base.checked_add(offset) { - // Seek to position like `-123` is invalid. - Some(n) if n < 0 => Poll::Ready(Err(Error::new( - ErrorKind::InvalidInput, - "seek to a negative or overflowing position is invalid", - ) - .with_context("position", n.to_string()))), - // Seek to position before the start of current file is invalid. - Some(n) if n < self.start as i64 => Poll::Ready(Err(Error::new( - ErrorKind::InvalidInput, - "seek to a position before start of file is invalid", - ) - .with_context("position", n.to_string()) - .with_context("start", self.start.to_string()))), - Some(n) => { - let cur = - ready!(Pin::new(&mut self.inner).poll_seek(cx, SeekFrom::Start(n as u64))) - .map_err(|err| { - Error::new(ErrorKind::Unexpected, "seek data from FdReader") - .with_context("source", "FdReader") - .set_source(err) - })?; - - self.offset = cur; - Poll::Ready(Ok(self.offset - self.start)) - } - None => Poll::Ready(Err(Error::new( - ErrorKind::InvalidInput, - "invalid seek to a negative or overflowing position", - ))), - } - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - let _ = cx; - - Poll::Ready(Some(Err(Error::new( - ErrorKind::Unsupported, - "output reader doesn't support next", - )))) - } -} - -impl oio::BlockingRead for FromFileReader -where - R: Read + Seek + Send + Sync + 'static, -{ - fn read(&mut self, buf: &mut [u8]) -> Result { - if self.current_size() <= 0 { - return Ok(0); - } - - let max = cmp::min(buf.len() as u64, self.current_size() as u64) as usize; - // TODO: we can use pread instead. - let n = self.inner.read(&mut buf[..max]).map_err(|err| { - Error::new(ErrorKind::Unexpected, "read data from FdReader") - .with_context("source", "FdReader") - .set_source(err) - })?; - self.offset += n as u64; - Ok(n) - } - - /// TODO: maybe we don't need to do seek really, just call pread instead. - /// - /// We need to wait for tokio's pread support. - fn seek(&mut self, pos: SeekFrom) -> Result { - let (base, offset) = match pos { - SeekFrom::Start(n) => (self.start as i64, n as i64), - SeekFrom::End(n) => (self.end as i64, n), - SeekFrom::Current(n) => (self.offset as i64, n), - }; - - match base.checked_add(offset) { - Some(n) if n < 0 => Err(Error::new( - ErrorKind::InvalidInput, - "invalid seek to a negative or overflowing position", - )), - Some(n) => { - let cur = self.inner.seek(SeekFrom::Start(n as u64)).map_err(|err| { - Error::new(ErrorKind::Unexpected, "seek data from FdReader") - .with_context("source", "FdReader") - .set_source(err) - })?; - - self.offset = cur; - Ok(self.offset - self.start) - } - None => Err(Error::new( - ErrorKind::InvalidInput, - "invalid seek to a negative or overflowing position", - )), - } - } - - fn next(&mut self) -> Option> { - Some(Err(Error::new( - ErrorKind::Unsupported, - "output reader doesn't support iterating", - ))) - } -} diff --git a/core/src/raw/oio/read/into_seekable_read_by_range.rs b/core/src/raw/oio/read/into_seekable_read_by_range.rs deleted file mode 100644 index 26ac62ba79d9..000000000000 --- a/core/src/raw/oio/read/into_seekable_read_by_range.rs +++ /dev/null @@ -1,569 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::future::Future; -use std::io::SeekFrom; -use std::pin::Pin; -use std::sync::Arc; -use std::task::ready; -use std::task::Context; -use std::task::Poll; - -use bytes::Bytes; -use futures::future::BoxFuture; - -use crate::raw::*; -use crate::*; - -/// Convert given reader into [`oio::Reader`] by range. -/// -/// # Input -/// -/// The input is an Accessor will may return a non-seekable reader. -/// -/// # Output -/// -/// The output is a reader that can be seek by range. -/// -/// # Notes -/// -/// This operation is not zero cost. If the accessor already returns a -/// seekable reader, please don't use this. -pub fn into_seekable_read_by_range( - acc: Arc, - path: &str, - reader: R, - offset: u64, - size: u64, -) -> ByRangeSeekableReader { - ByRangeSeekableReader { - acc, - path: path.to_string(), - offset, - size, - cur: 0, - state: State::Reading(reader), - last_seek_pos: None, - } -} - -/// ByRangeReader that can do seek on non-seekable reader. -pub struct ByRangeSeekableReader { - acc: Arc, - path: String, - - offset: u64, - size: u64, - cur: u64, - state: State, - - /// Seek operation could return Pending which may lead - /// `SeekFrom::Current(off)` been input multiple times. - /// - /// So we need to store the last seek pos to make sure - /// we always seek to the right position. - last_seek_pos: Option, -} - -enum State { - Idle, - Sending(BoxFuture<'static, Result<(RpRead, R)>>), - Reading(R), -} - -/// Safety: State will only be accessed under &mut. -unsafe impl Sync for State {} - -impl ByRangeSeekableReader -where - A: Accessor, -{ - /// calculate the seek position. - /// - /// This operation will not update the `self.cur`. - fn seek_pos(&self, pos: SeekFrom) -> Result { - if let Some(last_pos) = self.last_seek_pos { - return Ok(last_pos); - } - - let (base, amt) = match pos { - SeekFrom::Start(n) => (0, n as i64), - SeekFrom::End(n) => (self.size as i64, n), - SeekFrom::Current(n) => (self.cur as i64, n), - }; - - let n = match base.checked_add(amt) { - Some(n) if n >= 0 => n as u64, - _ => { - return Err(Error::new( - ErrorKind::InvalidInput, - "invalid seek to a negative or overflowing position", - )) - } - }; - Ok(n) - } -} - -impl ByRangeSeekableReader -where - A: Accessor, - R: oio::Read, -{ - fn read_future(&self) -> BoxFuture<'static, Result<(RpRead, R)>> { - let acc = self.acc.clone(); - let path = self.path.clone(); - let op = OpRead::default().with_range(BytesRange::new( - Some(self.offset + self.cur), - Some(self.size - self.cur), - )); - - Box::pin(async move { acc.read(&path, op).await }) - } -} - -impl ByRangeSeekableReader -where - A: Accessor, - R: oio::BlockingRead, -{ - fn read_action(&self) -> Result<(RpRead, R)> { - let acc = self.acc.clone(); - let path = self.path.clone(); - let op = OpRead::default().with_range(BytesRange::new( - Some(self.offset + self.cur), - Some(self.size - self.cur), - )); - - acc.blocking_read(&path, op) - } -} - -impl oio::Read for ByRangeSeekableReader -where - A: Accessor, - R: oio::Read, -{ - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - match &mut self.state { - State::Idle => { - if self.cur >= self.size { - return Poll::Ready(Ok(0)); - } - - self.state = State::Sending(self.read_future()); - self.poll_read(cx, buf) - } - State::Sending(fut) => { - // TODO - // - // we can use RpRead returned here to correct size. - let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { - // If read future returns an error, we should reset - // state to Idle so that we can retry it. - self.state = State::Idle; - err - })?; - - self.state = State::Reading(r); - self.poll_read(cx, buf) - } - State::Reading(r) => match ready!(Pin::new(r).poll_read(cx, buf)) { - Ok(0) => { - // Reset state to Idle after all data has been consumed. - self.state = State::Idle; - Poll::Ready(Ok(0)) - } - Ok(n) => { - self.cur += n as u64; - Poll::Ready(Ok(n)) - } - Err(e) => { - self.state = State::Idle; - Poll::Ready(Err(e)) - } - }, - } - } - - fn poll_seek(&mut self, _: &mut Context<'_>, pos: SeekFrom) -> Poll> { - let seek_pos = self.seek_pos(pos)?; - self.last_seek_pos = Some(seek_pos); - - match &mut self.state { - State::Idle => { - self.cur = seek_pos; - self.last_seek_pos = None; - Poll::Ready(Ok(self.cur)) - } - State::Sending(_) => { - // It's impossible for us to go into this state while - // poll_seek. We can just drop this future and check state. - self.state = State::Idle; - - self.cur = seek_pos; - self.last_seek_pos = None; - Poll::Ready(Ok(self.cur)) - } - State::Reading(_) => { - if seek_pos == self.cur { - self.last_seek_pos = None; - return Poll::Ready(Ok(self.cur)); - } - - self.state = State::Idle; - self.cur = seek_pos; - self.last_seek_pos = None; - Poll::Ready(Ok(self.cur)) - } - } - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - match &mut self.state { - State::Idle => { - if self.cur >= self.size { - return Poll::Ready(None); - } - - self.state = State::Sending(self.read_future()); - self.poll_next(cx) - } - State::Sending(fut) => { - // TODO - // - // we can use RpRead returned here to correct size. - let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { - // If read future returns an error, we should reset - // state to Idle so that we can retry it. - self.state = State::Idle; - err - })?; - - self.state = State::Reading(r); - self.poll_next(cx) - } - State::Reading(r) => match ready!(Pin::new(r).poll_next(cx)) { - Some(Ok(bs)) => { - self.cur += bs.len() as u64; - Poll::Ready(Some(Ok(bs))) - } - Some(Err(err)) => { - self.state = State::Idle; - Poll::Ready(Some(Err(err))) - } - None => { - self.state = State::Idle; - Poll::Ready(None) - } - }, - } - } -} - -impl oio::BlockingRead for ByRangeSeekableReader -where - A: Accessor, - R: oio::BlockingRead, -{ - fn read(&mut self, buf: &mut [u8]) -> Result { - match &mut self.state { - State::Idle => { - if self.cur >= self.size { - return Ok(0); - } - - let (_, r) = self.read_action()?; - self.state = State::Reading(r); - self.read(buf) - } - State::Reading(r) => { - match r.read(buf) { - Ok(0) => { - // Reset state to Idle after all data has been consumed. - self.state = State::Idle; - Ok(0) - } - Ok(n) => { - self.cur += n as u64; - Ok(n) - } - Err(e) => { - self.state = State::Idle; - Err(e) - } - } - } - State::Sending(_) => { - unreachable!("It's invalid to go into State::Sending for BlockingRead, please report this bug") - } - } - } - - fn seek(&mut self, pos: SeekFrom) -> Result { - let seek_pos = self.seek_pos(pos)?; - - match &mut self.state { - State::Idle => { - self.cur = seek_pos; - Ok(self.cur) - } - State::Reading(_) => { - if seek_pos == self.cur { - return Ok(self.cur); - } - - self.state = State::Idle; - self.cur = seek_pos; - Ok(self.cur) - } - State::Sending(_) => { - unreachable!("It's invalid to go into State::Sending for BlockingRead, please report this bug") - } - } - } - - fn next(&mut self) -> Option> { - match &mut self.state { - State::Idle => { - if self.cur >= self.size { - return None; - } - - let r = match self.read_action() { - Ok((_, r)) => r, - Err(err) => return Some(Err(err)), - }; - self.state = State::Reading(r); - self.next() - } - State::Reading(r) => match r.next() { - Some(Ok(bs)) => { - self.cur += bs.len() as u64; - Some(Ok(bs)) - } - Some(Err(err)) => { - self.state = State::Idle; - Some(Err(err)) - } - None => { - self.state = State::Idle; - None - } - }, - State::Sending(_) => { - unreachable!("It's invalid to go into State::Sending for BlockingRead, please report this bug") - } - } - } -} - -#[cfg(test)] -mod tests { - use std::io::SeekFrom; - - use async_trait::async_trait; - use bytes::Bytes; - use futures::AsyncRead; - use futures::AsyncReadExt; - use futures::AsyncSeekExt; - use rand::prelude::*; - use sha2::Digest; - use sha2::Sha256; - - use super::*; - - // Generate bytes between [4MiB, 16MiB) - fn gen_bytes() -> (Bytes, usize) { - let mut rng = thread_rng(); - - let size = rng.gen_range(4 * 1024 * 1024..16 * 1024 * 1024); - let mut content = vec![0; size]; - rng.fill_bytes(&mut content); - - (Bytes::from(content), size) - } - - #[derive(Debug, Clone, Default)] - struct MockReadService { - data: Bytes, - } - - impl MockReadService { - fn new(data: Bytes) -> Self { - Self { data } - } - } - - #[async_trait] - impl Accessor for MockReadService { - type Reader = MockReader; - type BlockingReader = (); - type Writer = (); - type BlockingWriter = (); - type Pager = (); - type BlockingPager = (); - - fn info(&self) -> AccessorInfo { - let mut am = AccessorInfo::default(); - am.set_native_capability(Capability { - read: true, - ..Default::default() - }); - - am - } - - async fn read(&self, _: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let bs = args.range().apply_on_bytes(self.data.clone()); - - Ok(( - RpRead::new(bs.len() as u64), - MockReader { - inner: futures::io::Cursor::new(bs.into()), - }, - )) - } - } - - #[derive(Debug, Clone, Default)] - struct MockReader { - inner: futures::io::Cursor>, - } - - impl oio::Read for MockReader { - fn poll_read(&mut self, cx: &mut Context, buf: &mut [u8]) -> Poll> { - Pin::new(&mut self.inner).poll_read(cx, buf).map_err(|err| { - Error::new(ErrorKind::Unexpected, "read data from mock").set_source(err) - }) - } - - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - let (_, _) = (cx, pos); - - Poll::Ready(Err(Error::new( - ErrorKind::Unsupported, - "output reader doesn't support seeking", - ))) - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - let mut bs = vec![0; 4 * 1024]; - let n = ready!(Pin::new(&mut self.inner) - .poll_read(cx, &mut bs) - .map_err( - |err| Error::new(ErrorKind::Unexpected, "read data from mock").set_source(err) - )?); - if n == 0 { - Poll::Ready(None) - } else { - Poll::Ready(Some(Ok(Bytes::from(bs[..n].to_vec())))) - } - } - } - - #[tokio::test] - async fn test_read_all() -> anyhow::Result<()> { - let (bs, _) = gen_bytes(); - let acc = Arc::new(MockReadService::new(bs.clone())); - - let r = MockReader { - inner: futures::io::Cursor::new(bs.to_vec()), - }; - let mut r = - Box::new(into_seekable_read_by_range(acc, "x", r, 0, bs.len() as u64)) as oio::Reader; - - let mut buf = Vec::new(); - r.read_to_end(&mut buf).await?; - assert_eq!(bs.len(), buf.len(), "read size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs)), - format!("{:x}", Sha256::digest(&buf)), - "read content" - ); - - let n = r.seek(SeekFrom::Start(0)).await?; - assert_eq!(n, 0, "seek position must be 0"); - - let mut buf = Vec::new(); - r.read_to_end(&mut buf).await?; - assert_eq!(bs.len(), buf.len(), "read twice size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs)), - format!("{:x}", Sha256::digest(&buf)), - "read twice content" - ); - - Ok(()) - } - - #[tokio::test] - async fn test_read_part() -> anyhow::Result<()> { - let (bs, _) = gen_bytes(); - let acc = Arc::new(MockReadService::new(bs.clone())); - - let r = MockReader { - inner: futures::io::Cursor::new(bs[4096..4096 + 4096].to_vec()), - }; - let mut r = Box::new(into_seekable_read_by_range(acc, "x", r, 4096, 4096)) as oio::Reader; - - let mut buf = Vec::new(); - r.read_to_end(&mut buf).await?; - assert_eq!(4096, buf.len(), "read size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs[4096..4096 + 4096])), - format!("{:x}", Sha256::digest(&buf)), - "read content" - ); - - let n = r.seek(SeekFrom::Start(0)).await?; - assert_eq!(n, 0, "seek position must be 0"); - - let mut buf = Vec::new(); - r.read_to_end(&mut buf).await?; - assert_eq!(4096, buf.len(), "read twice size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs[4096..4096 + 4096])), - format!("{:x}", Sha256::digest(&buf)), - "read twice content" - ); - - let n = r.seek(SeekFrom::Start(1024)).await?; - assert_eq!(1024, n, "seek to 1024"); - - let mut buf = vec![0; 1024]; - r.read_exact(&mut buf).await?; - assert_eq!( - format!("{:x}", Sha256::digest(&bs[4096 + 1024..4096 + 2048])), - format!("{:x}", Sha256::digest(&buf)), - "read after seek 1024" - ); - - let n = r.seek(SeekFrom::Current(1024)).await?; - assert_eq!(3072, n, "seek to 3072"); - - let mut buf = vec![0; 1024]; - r.read_exact(&mut buf).await?; - assert_eq!( - format!("{:x}", Sha256::digest(&bs[4096 + 3072..4096 + 3072 + 1024])), - format!("{:x}", Sha256::digest(&buf)), - "read after seek to 3072" - ); - - Ok(()) - } -} diff --git a/core/src/raw/oio/read/lazy_read.rs b/core/src/raw/oio/read/lazy_read.rs new file mode 100644 index 000000000000..89705deff534 --- /dev/null +++ b/core/src/raw/oio/read/lazy_read.rs @@ -0,0 +1,198 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::raw::*; +use crate::*; +use bytes::Bytes; +use futures::future::BoxFuture; +use futures::Future; +use std::io::SeekFrom; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{ready, Context, Poll}; + +/// LazyReader implements [`oio::Read`] in a lazy way. +/// +/// The real requests are send when users calling read or seek. +pub struct LazyReader { + acc: Arc, + path: Arc, + op: OpRead, + state: State, +} + +enum State { + Idle, + Send(BoxFuture<'static, Result<(RpRead, R)>>), + Read(R), +} + +/// Safety: State will only be accessed under &mut. +unsafe impl Sync for State {} + +impl LazyReader +where + A: Accessor, +{ + /// Create a new [`oio::Reader`] with lazy support. + pub fn new(acc: Arc, path: &str, op: OpRead) -> LazyReader { + LazyReader { + acc, + path: Arc::new(path.to_string()), + op, + + state: State::::Idle, + } + } +} + +impl LazyReader +where + A: Accessor, + R: oio::Read, +{ + fn read_future(&self) -> BoxFuture<'static, Result<(RpRead, R)>> { + let acc = self.acc.clone(); + let path = self.path.clone(); + let op = self.op.clone(); + + Box::pin(async move { acc.read(&path, op).await }) + } +} + +impl oio::Read for LazyReader +where + A: Accessor, + R: oio::Read, +{ + fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + match &mut self.state { + State::Idle => { + self.state = State::Send(self.read_future()); + self.poll_read(cx, buf) + } + State::Send(fut) => { + let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If read future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + self.state = State::Read(r); + self.poll_read(cx, buf) + } + State::Read(r) => r.poll_read(cx, buf), + } + } + + fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { + match &mut self.state { + State::Idle => { + self.state = State::Send(self.read_future()); + self.poll_seek(cx, pos) + } + State::Send(fut) => { + let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If read future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + self.state = State::Read(r); + self.poll_seek(cx, pos) + } + State::Read(r) => r.poll_seek(cx, pos), + } + } + + fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { + match &mut self.state { + State::Idle => { + self.state = State::Send(self.read_future()); + self.poll_next(cx) + } + State::Send(fut) => { + let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If read future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + self.state = State::Read(r); + self.poll_next(cx) + } + State::Read(r) => r.poll_next(cx), + } + } +} + +impl oio::BlockingRead for LazyReader +where + A: Accessor, + R: oio::BlockingRead, +{ + fn read(&mut self, buf: &mut [u8]) -> Result { + match &mut self.state { + State::Idle => { + let (_, r) = self.acc.blocking_read(&self.path, self.op.clone())?; + self.state = State::Read(r); + self.read(buf) + } + State::Read(r) => r.read(buf), + State::Send(_) => { + unreachable!( + "It's invalid to go into State::Send for BlockingRead, please report this bug" + ) + } + } + } + + fn seek(&mut self, pos: SeekFrom) -> Result { + match &mut self.state { + State::Idle => { + let (_, r) = self.acc.blocking_read(&self.path, self.op.clone())?; + self.state = State::Read(r); + self.seek(pos) + } + State::Read(r) => r.seek(pos), + State::Send(_) => { + unreachable!( + "It's invalid to go into State::Send for BlockingRead, please report this bug" + ) + } + } + } + + fn next(&mut self) -> Option> { + match &mut self.state { + State::Idle => { + let r = match self.acc.blocking_read(&self.path, self.op.clone()) { + Ok((_, r)) => r, + Err(err) => return Some(Err(err)), + }; + self.state = State::Read(r); + self.next() + } + State::Read(r) => r.next(), + State::Send(_) => { + unreachable!( + "It's invalid to go into State::Send for BlockingRead, please report this bug" + ) + } + } + } +} diff --git a/core/src/raw/oio/read/mod.rs b/core/src/raw/oio/read/mod.rs index 841dfdd1aa52..5f7d5d93a163 100644 --- a/core/src/raw/oio/read/mod.rs +++ b/core/src/raw/oio/read/mod.rs @@ -27,20 +27,24 @@ mod into_streamable_read; pub use into_streamable_read::into_streamable_read; pub use into_streamable_read::StreamableReader; -mod into_seekable_read_by_range; -pub use into_seekable_read_by_range::into_seekable_read_by_range; -pub use into_seekable_read_by_range::ByRangeSeekableReader; +mod range_read; +pub use range_read::RangeReader; -mod into_read_from_file; -pub use into_read_from_file::into_read_from_file; -pub use into_read_from_file::FromFileReader; +mod file_read; +pub use file_read::FileReader; mod into_read_from_stream; pub use into_read_from_stream::into_read_from_stream; pub use into_read_from_stream::FromStreamReader; -mod cloneable_read; -pub use cloneable_read::into_cloneable_reader_within_std; -pub use cloneable_read::into_cloneable_reader_within_tokio; -pub use cloneable_read::CloneableReaderWithinStd; -pub use cloneable_read::CloneableReaderWithinTokio; +mod futures_read; +pub use futures_read::FuturesReader; + +mod tokio_read; +pub use tokio_read::TokioReader; + +mod std_read; +pub use std_read::StdReader; + +mod lazy_read; +pub use lazy_read::LazyReader; diff --git a/core/src/raw/oio/read/range_read.rs b/core/src/raw/oio/read/range_read.rs new file mode 100644 index 000000000000..0447aba8f4a8 --- /dev/null +++ b/core/src/raw/oio/read/range_read.rs @@ -0,0 +1,795 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::future::Future; +use std::io::SeekFrom; +use std::pin::Pin; +use std::sync::Arc; +use std::task::ready; +use std::task::Context; +use std::task::Poll; + +use bytes::Bytes; +use futures::future::BoxFuture; + +use crate::raw::*; +use crate::*; + +/// RangeReader that can do seek on non-seekable reader. +/// +/// `oio::Reader` requires the underlying reader to be seekable, but some services like s3, gcs +/// doesn't support seek natively. RangeReader implement seek by read_with_range. We will start +/// a new read request with the correct range when seek is called. +/// +/// The `seek` operation on `RangeReader` is zero cost and purely in-memory. But calling `seek` +/// while there is a pending read request will cancel the request and start a new one. This could +/// add extra cost to the read operation. +pub struct RangeReader { + acc: Arc, + path: Arc, + op: OpRead, + + offset: Option, + size: Option, + cur: u64, + state: State, +} + +enum State { + Idle, + SendStat(BoxFuture<'static, Result>), + SendRead(BoxFuture<'static, Result<(RpRead, R)>>), + Read(R), +} + +/// Safety: State will only be accessed under &mut. +unsafe impl Sync for State {} + +impl RangeReader +where + A: Accessor, +{ + /// Create a new [`oio::Reader`] by range support. + /// + /// # Input + /// + /// The input is an Accessor will may return a non-seekable reader. + /// + /// # Output + /// + /// The output is a reader that can be seek by range. + /// + /// # Notes + /// + /// This operation is not zero cost. If the accessor already returns a + /// seekable reader, please don't use this. + pub fn new(acc: Arc, path: &str, op: OpRead) -> RangeReader { + // Normalize range like `..` into `0..` to make sure offset is valid. + let (offset, size) = match (op.range().offset(), op.range().size()) { + (None, None) => (Some(0), None), + v => v, + }; + + RangeReader { + acc, + path: Arc::new(path.to_string()), + op, + + offset, + size, + cur: 0, + state: State::::Idle, + } + } + + /// Fill current reader's range by total_size. + fn fill_range(&mut self, total_size: u64) -> Result<()> { + (self.offset, self.size) = match (self.offset, self.size) { + (None, Some(size)) => { + if size > total_size { + return Err(Error::new( + ErrorKind::InvalidInput, + "read to a negative or overflowing position is invalid", + )); + } + + (Some(total_size - size), Some(size)) + } + (Some(offset), None) => { + // It's valid for reader to seek to a position that out of the content length. + // We should return `Ok(0)` instead of an error at this case to align fs behavior. + let size = total_size.checked_sub(offset).unwrap_or_default(); + + (Some(offset), Some(size)) + } + (Some(offset), Some(size)) => (Some(offset), Some(size)), + (None, None) => { + unreachable!("fill_range should not reach this case after normalization") + } + }; + + Ok(()) + } + + /// Calculate the current range, maybe sent as next read request. + /// + /// # Panics + /// + /// Offset must be normalized before calling this function. + /// + /// - `..` should be transformed into `0..` + /// - `..size` should be transformed into `(total-size)..total`. + fn calculate_range(&self) -> BytesRange { + let offset = self + .offset + .expect("offset must be set before calculating range"); + + BytesRange::new(Some(offset + self.cur), self.size.map(|v| v - self.cur)) + } +} + +impl RangeReader +where + A: Accessor, + R: oio::Read, +{ + fn read_future(&self) -> BoxFuture<'static, Result<(RpRead, R)>> { + let acc = self.acc.clone(); + let path = self.path.clone(); + + let mut op = self.op.clone(); + // cur != 0 means we have read some data out, we should convert + // the op into deterministic to avoid ETag changes. + if self.cur != 0 { + op = op.into_deterministic(); + } + // Alter OpRead with correct calculated range. + op = op.with_range(self.calculate_range()); + + Box::pin(async move { acc.read(&path, op).await }) + } + + fn stat_future(&self) -> BoxFuture<'static, Result> { + let acc = self.acc.clone(); + let path = self.path.clone(); + + // Handle if-match and if-none-match correctly. + let mut args = OpStat::default(); + // TODO: stat should support range to check if ETag matches. + if self.op.range().is_full() { + if let Some(v) = self.op.if_match() { + args = args.with_if_match(v); + } + if let Some(v) = self.op.if_none_match() { + args = args.with_if_none_match(v); + } + } + + Box::pin(async move { acc.stat(&path, args).await }) + } +} + +impl RangeReader +where + A: Accessor, + R: oio::BlockingRead, +{ + fn read_action(&self) -> Result<(RpRead, R)> { + let acc = self.acc.clone(); + let path = self.path.clone(); + + let mut op = self.op.clone(); + // cur != 0 means we have read some data out, we should convert + // the op into deterministic to avoid ETag changes. + if self.cur != 0 { + op = op.into_deterministic(); + } + // Alter OpRead with correct calculated range. + op = op.with_range(self.calculate_range()); + + acc.blocking_read(&path, op) + } + + fn stat_action(&self) -> Result { + let acc = self.acc.clone(); + let path = self.path.clone(); + + // Handle if-match and if-none-match correctly. + let mut args = OpStat::default(); + // TODO: stat should support range to check if ETag matches. + if self.op.range().is_full() { + if let Some(v) = self.op.if_match() { + args = args.with_if_match(v); + } + if let Some(v) = self.op.if_none_match() { + args = args.with_if_none_match(v); + } + } + + acc.blocking_stat(&path, args) + } +} + +impl oio::Read for RangeReader +where + A: Accessor, + R: oio::Read, +{ + fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + // Sanity check for normal cases. + if buf.is_empty() || self.cur >= self.size.unwrap_or(u64::MAX) { + return Poll::Ready(Ok(0)); + } + + match &mut self.state { + State::Idle => { + self.state = if self.offset.is_none() { + // Offset is none means we are doing tailing reading. + // we should stat first to get the correct offset. + State::SendStat(self.stat_future()) + } else { + State::SendRead(self.read_future()) + }; + + self.poll_read(cx, buf) + } + State::SendStat(fut) => { + let rp = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If stat future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + + let length = rp.into_metadata().content_length(); + self.fill_range(length).map_err(|err| { + // If stat future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + + self.state = State::Idle; + self.poll_read(cx, buf) + } + State::SendRead(fut) => { + let (rp, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If read future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + + // Set size if read returns size hint. + if let Some(size) = rp.size() { + if size != 0 && self.size.is_none() { + self.size = Some(size + self.cur); + } + } + self.state = State::Read(r); + self.poll_read(cx, buf) + } + State::Read(r) => match ready!(Pin::new(r).poll_read(cx, buf)) { + Ok(0) => { + // Reset state to Idle after all data has been consumed. + self.state = State::Idle; + Poll::Ready(Ok(0)) + } + Ok(n) => { + self.cur += n as u64; + Poll::Ready(Ok(n)) + } + Err(e) => { + self.state = State::Idle; + Poll::Ready(Err(e)) + } + }, + } + } + + fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { + match &mut self.state { + State::Idle => { + let (base, amt) = match pos { + SeekFrom::Start(n) => (0, n as i64), + SeekFrom::Current(n) => (self.cur as i64, n), + SeekFrom::End(n) => { + if let Some(size) = self.size { + (size as i64, n) + } else { + self.state = State::SendStat(self.stat_future()); + return self.poll_seek(cx, pos); + } + } + }; + + let seek_pos = match base.checked_add(amt) { + Some(n) if n >= 0 => n as u64, + _ => { + return Poll::Ready(Err(Error::new( + ErrorKind::InvalidInput, + "invalid seek to a negative or overflowing position", + ))) + } + }; + + self.cur = seek_pos; + Poll::Ready(Ok(self.cur)) + } + State::SendStat(fut) => { + let rp = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If stat future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + + let length = rp.into_metadata().content_length(); + self.fill_range(length)?; + + self.state = State::Idle; + self.poll_seek(cx, pos) + } + State::SendRead(_) => { + // It's impossible for us to go into this state while + // poll_seek. We can just drop this future and check state. + self.state = State::Idle; + self.poll_seek(cx, pos) + } + State::Read(_) => { + // There is an optimization here that we can calculate if users trying to seek + // the same position, for example, `reader.seek(SeekFrom::Current(0))`. + // In this case, we can just return current position without dropping reader. + if pos == SeekFrom::Current(0) || pos == SeekFrom::Start(self.cur) { + return Poll::Ready(Ok(self.cur)); + } + + self.state = State::Idle; + self.poll_seek(cx, pos) + } + } + } + + fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { + // Sanity check for normal cases. + if self.cur >= self.size.unwrap_or(u64::MAX) { + return Poll::Ready(None); + } + + match &mut self.state { + State::Idle => { + self.state = if self.offset.is_none() { + // Offset is none means we are doing tailing reading. + // we should stat first to get the correct offset. + State::SendStat(self.stat_future()) + } else { + State::SendRead(self.read_future()) + }; + + self.poll_next(cx) + } + State::SendStat(fut) => { + let rp = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If stat future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + + let length = rp.into_metadata().content_length(); + self.fill_range(length)?; + + self.state = State::Idle; + self.poll_next(cx) + } + State::SendRead(fut) => { + let (rp, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If read future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + + // Set size if read returns size hint. + if let Some(size) = rp.size() { + if size != 0 && self.size.is_none() { + self.size = Some(size + self.cur); + } + } + self.state = State::Read(r); + self.poll_next(cx) + } + State::Read(r) => match ready!(Pin::new(r).poll_next(cx)) { + Some(Ok(bs)) => { + self.cur += bs.len() as u64; + Poll::Ready(Some(Ok(bs))) + } + Some(Err(err)) => { + self.state = State::Idle; + Poll::Ready(Some(Err(err))) + } + None => { + self.state = State::Idle; + Poll::Ready(None) + } + }, + } + } +} + +impl oio::BlockingRead for RangeReader +where + A: Accessor, + R: oio::BlockingRead, +{ + fn read(&mut self, buf: &mut [u8]) -> Result { + // Sanity check for normal cases. + if buf.is_empty() || self.cur >= self.size.unwrap_or(u64::MAX) { + return Ok(0); + } + + match &mut self.state { + State::Idle => { + // Offset is none means we are doing tailing reading. + // we should stat first to get the correct offset. + if self.offset.is_none() { + let rp = self.stat_action()?; + + let length = rp.into_metadata().content_length(); + self.fill_range(length)?; + } + + let (rp, r) = self.read_action()?; + + // Set size if read returns size hint. + if let Some(size) = rp.size() { + if size != 0 && self.size.is_none() { + self.size = Some(size + self.cur); + } + } + + self.state = State::Read(r); + self.read(buf) + } + State::Read(r) => { + match r.read(buf) { + Ok(0) => { + // Reset state to Idle after all data has been consumed. + self.state = State::Idle; + Ok(0) + } + Ok(n) => { + self.cur += n as u64; + Ok(n) + } + Err(e) => { + self.state = State::Idle; + Err(e) + } + } + } + State::SendStat(_) => { + unreachable!("It's invalid to go into State::SendStat for BlockingRead, please report this bug") + } + State::SendRead(_) => { + unreachable!("It's invalid to go into State::SendRead for BlockingRead, please report this bug") + } + } + } + + fn seek(&mut self, pos: SeekFrom) -> Result { + match &mut self.state { + State::Idle => { + let (base, amt) = match pos { + SeekFrom::Start(n) => (0, n as i64), + SeekFrom::End(n) => { + if let Some(size) = self.size { + (size as i64, n) + } else { + let rp = self.stat_action()?; + let length = rp.into_metadata().content_length(); + self.fill_range(length)?; + + let size = self.size.expect("size must be valid after fill_range"); + (size as i64, n) + } + } + SeekFrom::Current(n) => (self.cur as i64, n), + }; + + let seek_pos = match base.checked_add(amt) { + Some(n) if n >= 0 => n as u64, + _ => { + return Err(Error::new( + ErrorKind::InvalidInput, + "invalid seek to a negative or overflowing position", + )); + } + }; + + self.cur = seek_pos; + Ok(self.cur) + } + State::Read(_) => { + // There is an optimization here that we can calculate if users trying to seek + // the same position, for example, `reader.seek(SeekFrom::Current(0))`. + // In this case, we can just return current position without dropping reader. + if pos == SeekFrom::Current(0) || pos == SeekFrom::Start(self.cur) { + return Ok(self.cur); + } + + self.state = State::Idle; + self.seek(pos) + } + State::SendStat(_) => { + unreachable!("It's invalid to go into State::SendStat for BlockingRead, please report this bug") + } + State::SendRead(_) => { + unreachable!("It's invalid to go into State::SendRead for BlockingRead, please report this bug") + } + } + } + + fn next(&mut self) -> Option> { + match &mut self.state { + State::Idle => { + // Sanity check for normal cases. + if self.cur >= self.size.unwrap_or(u64::MAX) { + return None; + } + + // Offset is none means we are doing tailing reading. + // we should stat first to get the correct offset. + if self.offset.is_none() { + let rp = match self.stat_action() { + Ok(rp) => rp, + Err(err) => return Some(Err(err)), + }; + + let length = rp.into_metadata().content_length(); + if let Err(err) = self.fill_range(length) { + return Some(Err(err)); + } + } + + let r = match self.read_action() { + Ok((_, r)) => r, + Err(err) => return Some(Err(err)), + }; + self.state = State::Read(r); + self.next() + } + State::Read(r) => match r.next() { + Some(Ok(bs)) => { + self.cur += bs.len() as u64; + Some(Ok(bs)) + } + Some(Err(err)) => { + self.state = State::Idle; + Some(Err(err)) + } + None => { + self.state = State::Idle; + None + } + }, + State::SendStat(_) => { + unreachable!("It's invalid to go into State::SendStat for BlockingRead, please report this bug") + } + State::SendRead(_) => { + unreachable!("It's invalid to go into State::SendRead for BlockingRead, please report this bug") + } + } + } +} + +#[cfg(test)] +mod tests { + use std::io::SeekFrom; + + use async_trait::async_trait; + use bytes::Bytes; + use futures::AsyncRead; + use futures::AsyncReadExt; + use futures::AsyncSeekExt; + use rand::prelude::*; + use sha2::Digest; + use sha2::Sha256; + + use super::*; + + // Generate bytes between [4MiB, 16MiB) + fn gen_bytes() -> (Bytes, usize) { + let mut rng = thread_rng(); + + let size = rng.gen_range(4 * 1024 * 1024..16 * 1024 * 1024); + let mut content = vec![0; size]; + rng.fill_bytes(&mut content); + + (Bytes::from(content), size) + } + + #[derive(Debug, Clone, Default)] + struct MockReadService { + data: Bytes, + } + + impl MockReadService { + fn new(data: Bytes) -> Self { + Self { data } + } + } + + #[async_trait] + impl Accessor for MockReadService { + type Reader = MockReader; + type BlockingReader = (); + type Writer = (); + type BlockingWriter = (); + type Pager = (); + type BlockingPager = (); + + fn info(&self) -> AccessorInfo { + let mut am = AccessorInfo::default(); + am.set_native_capability(Capability { + read: true, + ..Default::default() + }); + + am + } + + async fn read(&self, _: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { + let bs = args.range().apply_on_bytes(self.data.clone()); + + Ok(( + RpRead::new(), + MockReader { + inner: futures::io::Cursor::new(bs.into()), + }, + )) + } + } + + #[derive(Debug, Clone, Default)] + struct MockReader { + inner: futures::io::Cursor>, + } + + impl oio::Read for MockReader { + fn poll_read(&mut self, cx: &mut Context, buf: &mut [u8]) -> Poll> { + Pin::new(&mut self.inner).poll_read(cx, buf).map_err(|err| { + Error::new(ErrorKind::Unexpected, "read data from mock").set_source(err) + }) + } + + fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { + let (_, _) = (cx, pos); + + Poll::Ready(Err(Error::new( + ErrorKind::Unsupported, + "output reader doesn't support seeking", + ))) + } + + fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { + let mut bs = vec![0; 4 * 1024]; + let n = ready!(Pin::new(&mut self.inner) + .poll_read(cx, &mut bs) + .map_err( + |err| Error::new(ErrorKind::Unexpected, "read data from mock").set_source(err) + )?); + if n == 0 { + Poll::Ready(None) + } else { + Poll::Ready(Some(Ok(Bytes::from(bs[..n].to_vec())))) + } + } + } + + #[tokio::test] + async fn test_read_all() -> anyhow::Result<()> { + let (bs, _) = gen_bytes(); + let acc = Arc::new(MockReadService::new(bs.clone())); + + let mut r = Box::new(RangeReader::new( + acc, + "x", + OpRead::default().with_range(BytesRange::from(..)), + )) as oio::Reader; + + let mut buf = Vec::new(); + r.read_to_end(&mut buf).await?; + assert_eq!(bs.len(), buf.len(), "read size"); + assert_eq!( + format!("{:x}", Sha256::digest(&bs)), + format!("{:x}", Sha256::digest(&buf)), + "read content" + ); + + let n = r.seek(SeekFrom::Start(0)).await?; + assert_eq!(n, 0, "seek position must be 0"); + + let mut buf = Vec::new(); + r.read_to_end(&mut buf).await?; + assert_eq!(bs.len(), buf.len(), "read twice size"); + assert_eq!( + format!("{:x}", Sha256::digest(&bs)), + format!("{:x}", Sha256::digest(&buf)), + "read twice content" + ); + + Ok(()) + } + + #[tokio::test] + async fn test_read_part() -> anyhow::Result<()> { + let (bs, _) = gen_bytes(); + let acc = Arc::new(MockReadService::new(bs.clone())); + + let mut r = Box::new(RangeReader::new( + acc, + "x", + OpRead::default().with_range(BytesRange::from(4096..4096 + 4096)), + )) as oio::Reader; + + let mut buf = Vec::new(); + r.read_to_end(&mut buf).await?; + assert_eq!(4096, buf.len(), "read size"); + assert_eq!( + format!("{:x}", Sha256::digest(&bs[4096..4096 + 4096])), + format!("{:x}", Sha256::digest(&buf)), + "read content" + ); + + let n = r.seek(SeekFrom::Start(0)).await?; + assert_eq!(n, 0, "seek position must be 0"); + + let mut buf = Vec::new(); + r.read_to_end(&mut buf).await?; + assert_eq!(4096, buf.len(), "read twice size"); + assert_eq!( + format!("{:x}", Sha256::digest(&bs[4096..4096 + 4096])), + format!("{:x}", Sha256::digest(&buf)), + "read twice content" + ); + + let n = r.seek(SeekFrom::Start(1024)).await?; + assert_eq!(1024, n, "seek to 1024"); + + let mut buf = vec![0; 1024]; + r.read_exact(&mut buf).await?; + assert_eq!( + format!("{:x}", Sha256::digest(&bs[4096 + 1024..4096 + 2048])), + format!("{:x}", Sha256::digest(&buf)), + "read after seek 1024" + ); + + let n = r.seek(SeekFrom::Current(1024)).await?; + assert_eq!(3072, n, "seek to 3072"); + + let mut buf = vec![0; 1024]; + r.read_exact(&mut buf).await?; + assert_eq!( + format!("{:x}", Sha256::digest(&bs[4096 + 3072..4096 + 3072 + 1024])), + format!("{:x}", Sha256::digest(&buf)), + "read after seek to 3072" + ); + + Ok(()) + } +} diff --git a/core/src/raw/oio/read/std_read.rs b/core/src/raw/oio/read/std_read.rs new file mode 100644 index 000000000000..926b9abb358c --- /dev/null +++ b/core/src/raw/oio/read/std_read.rs @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::raw::*; +use crate::*; +use bytes::Bytes; +use std::io::Seek; +use std::io::{Read, SeekFrom}; + +/// FuturesReader implements [`oio::BlockingRead`] via [`Read`] + [`Seek`]. +pub struct StdReader { + inner: R, +} + +impl StdReader { + /// Create a new std reader. + pub fn new(inner: R) -> Self { + Self { inner } + } +} + +impl oio::BlockingRead for StdReader +where + R: Read + Seek + Send + Sync, +{ + fn read(&mut self, buf: &mut [u8]) -> Result { + self.inner.read(buf).map_err(|err| { + new_std_io_error(err) + .with_operation(oio::ReadOperation::BlockingRead) + .with_context("source", "StdReader") + }) + } + + fn seek(&mut self, pos: SeekFrom) -> Result { + self.inner.seek(pos).map_err(|err| { + new_std_io_error(err) + .with_operation(oio::ReadOperation::BlockingSeek) + .with_context("source", "StdReader") + }) + } + + fn next(&mut self) -> Option> { + Some(Err(Error::new( + ErrorKind::Unsupported, + "StdReader doesn't support poll_next", + ))) + } +} diff --git a/core/src/raw/oio/read/tokio_read.rs b/core/src/raw/oio/read/tokio_read.rs new file mode 100644 index 000000000000..966973bdc3c8 --- /dev/null +++ b/core/src/raw/oio/read/tokio_read.rs @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::raw::*; +use crate::*; +use bytes::Bytes; +use std::io::SeekFrom; +use std::pin::Pin; +use std::task::{ready, Context, Poll}; +use tokio::io::AsyncSeek; +use tokio::io::{AsyncRead, ReadBuf}; + +/// FuturesReader implements [`oio::Read`] via [`AsyncRead`] + [`AsyncSeek`]. +pub struct TokioReader { + inner: R, + + seek_pos: Option, +} + +impl TokioReader { + /// Create a new tokio reader. + pub fn new(inner: R) -> Self { + Self { + inner, + seek_pos: None, + } + } +} + +impl oio::Read for TokioReader +where + R: AsyncRead + AsyncSeek + Unpin + Send + Sync, +{ + fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + let mut buf = ReadBuf::new(buf); + + ready!(Pin::new(&mut self.inner).poll_read(cx, &mut buf)).map_err(|err| { + new_std_io_error(err) + .with_operation(oio::ReadOperation::Read) + .with_context("source", "TokioReader") + })?; + + Poll::Ready(Ok(buf.filled().len())) + } + + fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { + if self.seek_pos != Some(pos) { + Pin::new(&mut self.inner).start_seek(pos).map_err(|err| { + new_std_io_error(err) + .with_operation(oio::ReadOperation::Seek) + .with_context("source", "TokioReader") + })?; + self.seek_pos = Some(pos) + } + + // NOTE: don't return error by `?` here, we need to reset seek_pos. + let pos = ready!(Pin::new(&mut self.inner).poll_complete(cx).map_err(|err| { + new_std_io_error(err) + .with_operation(oio::ReadOperation::Seek) + .with_context("source", "TokioReader") + })); + self.seek_pos = None; + Poll::Ready(pos) + } + + fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { + let _ = cx; + + Poll::Ready(Some(Err(Error::new( + ErrorKind::Unsupported, + "TokioReader doesn't support poll_next", + )))) + } +} diff --git a/core/src/raw/ops.rs b/core/src/raw/ops.rs index 1e60d329eb91..0eb3937c6a8c 100644 --- a/core/src/raw/ops.rs +++ b/core/src/raw/ops.rs @@ -274,6 +274,23 @@ impl OpRead { Self::default() } + /// The into_deterministic function transforms the OpRead into a deterministic version. + /// + /// This API is utilized because it allows for internal optimizations such as dividing read + /// ranges or retrying the read request from where it failed. In these scenarios, the expected + /// `ETag` value differs from what users specify in `If-Match` or `If-None-Match`.Therefore, + /// we need to eliminate these conditional headers to ensure that the read operation is + /// deterministic. + /// + /// This API is not intended to be used by users and should never be exposed. + pub(crate) fn into_deterministic(self) -> Self { + Self { + if_match: None, + if_none_match: None, + ..self + } + } + /// Create a new OpRead with range. pub fn with_range(mut self, range: BytesRange) -> Self { self.br = range; diff --git a/core/src/raw/rps.rs b/core/src/raw/rps.rs index bc45d1457b6e..4cba4b8cc8b5 100644 --- a/core/src/raw/rps.rs +++ b/core/src/raw/rps.rs @@ -97,32 +97,36 @@ impl From for Request { } /// Reply for `read` operation. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] pub struct RpRead { - meta: Metadata, + /// Size is the size of the reader returned by this read operation. + /// + /// - `Some(size)` means the reader has at most size bytes. + /// - `None` means the reader has unknown size. + /// + /// It's ok to leave size as empty, but it's recommended to set size if possible. We will use + /// this size as hint to do some optimization like avoid an extra stat or read. + size: Option, } impl RpRead { /// Create a new reply for `read`. - pub fn new(content_length: u64) -> Self { - RpRead { - meta: Metadata::new(EntryMode::FILE).with_content_length(content_length), - } - } - - /// Create reply read with existing metadata. - pub fn with_metadata(meta: Metadata) -> Self { - RpRead { meta } + pub fn new() -> Self { + RpRead::default() } - /// Get a ref of metadata. - pub fn metadata(&self) -> &Metadata { - &self.meta + /// Got the size of the reader returned by this read operation. + /// + /// - `Some(size)` means the reader has at most size bytes. + /// - `None` means the reader has unknown size. + pub fn size(&self) -> Option { + self.size } - /// Consume reply to get the meta. - pub fn into_metadata(self) -> Metadata { - self.meta + /// Set the size of the reader returned by this read operation. + pub fn with_size(mut self, size: Option) -> Self { + self.size = size; + self } } diff --git a/core/src/services/hdfs/error.rs b/core/src/raw/std_io_util.rs similarity index 73% rename from core/src/services/hdfs/error.rs rename to core/src/raw/std_io_util.rs index f97fada23a2b..a36e1e47f6cc 100644 --- a/core/src/services/hdfs/error.rs +++ b/core/src/raw/std_io_util.rs @@ -15,22 +15,25 @@ // specific language governing permissions and limitations // under the License. -use std::io; +use crate::*; -use crate::Error; -use crate::ErrorKind; - -/// Parse all path related errors. +/// Parse std io error into opendal::Error. +/// +/// # TODO /// -/// ## Notes +/// Add `NotADirectory` and `IsADirectory` once they are stable. /// -/// Skip utf-8 check to allow invalid path input. -pub fn parse_io_error(err: io::Error) -> Error { - use io::ErrorKind::*; +/// ref: +pub fn new_std_io_error(err: std::io::Error) -> Error { + use std::io::ErrorKind::*; let (kind, retryable) = match err.kind() { NotFound => (ErrorKind::NotFound, false), PermissionDenied => (ErrorKind::PermissionDenied, false), + AlreadyExists => (ErrorKind::AlreadyExists, false), + InvalidInput => (ErrorKind::InvalidInput, false), + Unsupported => (ErrorKind::Unsupported, false), + Interrupted | UnexpectedEof | TimedOut | WouldBlock => (ErrorKind::Unexpected, true), _ => (ErrorKind::Unexpected, true), }; diff --git a/core/src/services/azblob/backend.rs b/core/src/services/azblob/backend.rs index de6d5e3e943a..6e46e1b35771 100644 --- a/core/src/services/azblob/backend.rs +++ b/core/src/services/azblob/backend.rs @@ -587,10 +587,10 @@ impl Accessor for AzblobBackend { match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - - Ok((RpRead::with_metadata(meta), resp.into_body())) + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) } + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/azdls/backend.rs b/core/src/services/azdls/backend.rs index 9351a25221e3..ad03274a2c11 100644 --- a/core/src/services/azdls/backend.rs +++ b/core/src/services/azdls/backend.rs @@ -293,9 +293,10 @@ impl Accessor for AzdlsBackend { match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) } + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/azfile/backend.rs b/core/src/services/azfile/backend.rs index 75e475699e97..106ec405d2a8 100644 --- a/core/src/services/azfile/backend.rs +++ b/core/src/services/azfile/backend.rs @@ -317,9 +317,10 @@ impl Accessor for AzfileBackend { match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) } + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/cos/backend.rs b/core/src/services/cos/backend.rs index d376abcb7ab6..e5a52171803e 100644 --- a/core/src/services/cos/backend.rs +++ b/core/src/services/cos/backend.rs @@ -333,9 +333,10 @@ impl Accessor for CosBackend { match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) } + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/dbfs/backend.rs b/core/src/services/dbfs/backend.rs index c01acb043ae6..be4822309840 100644 --- a/core/src/services/dbfs/backend.rs +++ b/core/src/services/dbfs/backend.rs @@ -200,32 +200,9 @@ impl Accessor for DbfsBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let mut meta = Metadata::new(EntryMode::FILE); - - if let Some(length) = args.range().size() { - meta.set_content_length(length); - } else { - let stat_resp = self.core.dbfs_get_status(path).await?; - meta = parse_into_metadata(path, stat_resp.headers())?; - let decoded_response = - serde_json::from_slice::(&stat_resp.into_body().bytes().await?) - .map_err(new_json_deserialize_error)?; - meta.set_last_modified(parse_datetime_from_from_timestamp_millis( - decoded_response.modification_time, - )?); - meta.set_mode(if decoded_response.is_dir { - EntryMode::DIR - } else { - EntryMode::FILE - }); - if !decoded_response.is_dir { - meta.set_content_length(decoded_response.file_size as u64); - } - } - let op = DbfsReader::new(self.core.clone(), args, path.to_string()); - Ok((RpRead::with_metadata(meta), op)) + Ok((RpRead::new(), op)) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { diff --git a/core/src/services/dropbox/backend.rs b/core/src/services/dropbox/backend.rs index 8400b3300cb1..e250ea76c0c8 100644 --- a/core/src/services/dropbox/backend.rs +++ b/core/src/services/dropbox/backend.rs @@ -97,10 +97,8 @@ impl Accessor for DropboxBackend { let resp = self.core.dropbox_get(path, args).await?; let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/fs/backend.rs b/core/src/services/fs/backend.rs index 40ada10f2194..4b0d14207b1d 100644 --- a/core/src/services/fs/backend.rs +++ b/core/src/services/fs/backend.rs @@ -15,19 +15,15 @@ // specific language governing permissions and limitations // under the License. -use std::cmp::min; use std::collections::HashMap; -use std::io::SeekFrom; use std::path::Path; use std::path::PathBuf; -use async_compat::Compat; use async_trait::async_trait; use chrono::DateTime; use log::debug; use uuid::Uuid; -use super::error::parse_io_error; use super::pager::FsPager; use super::writer::FsWriter; use crate::raw::*; @@ -212,7 +208,7 @@ impl FsBackend { })? .to_path_buf(); - std::fs::create_dir_all(parent).map_err(parse_io_error)?; + std::fs::create_dir_all(parent).map_err(new_std_io_error)?; Ok(p) } @@ -240,7 +236,7 @@ impl FsBackend { tokio::fs::create_dir_all(&parent) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; Ok(p) } @@ -248,8 +244,8 @@ impl FsBackend { #[async_trait] impl Accessor for FsBackend { - type Reader = oio::FromFileReader>; - type BlockingReader = oio::FromFileReader; + type Reader = oio::TokioReader; + type BlockingReader = oio::StdReader; type Writer = FsWriter; type BlockingWriter = FsWriter; type Pager = Option>; @@ -264,7 +260,6 @@ impl Accessor for FsBackend { read: true, read_can_seek: true, - read_with_range: true, write: true, write_can_empty: true, @@ -291,7 +286,7 @@ impl Accessor for FsBackend { tokio::fs::create_dir_all(&p) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; Ok(RpCreateDir::default()) } @@ -305,20 +300,18 @@ impl Accessor for FsBackend { /// - open file first, and than use `seek`. (100ns) /// /// Benchmark could be found [here](https://gist.github.com/Xuanwo/48f9cfbc3022ea5f865388bb62e1a70f) - async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - use oio::ReadExt; - + async fn read(&self, path: &str, _: OpRead) -> Result<(RpRead, Self::Reader)> { let p = self.root.join(path.trim_end_matches('/')); - let mut f = tokio::fs::OpenOptions::new() + let f = tokio::fs::OpenOptions::new() .read(true) .open(&p) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; - let total_length = if self.enable_path_check { + if self.enable_path_check { // Get fs metadata of file at given path, ensuring it is not a false-positive due to slash normalization. - let meta = f.metadata().await.map_err(parse_io_error)?; + let meta = f.metadata().await.map_err(new_std_io_error)?; if meta.is_dir() != path.ends_with('/') { return Err(Error::new( ErrorKind::NotFound, @@ -331,41 +324,10 @@ impl Accessor for FsBackend { "given path is a directory", )); } + } - meta.len() - } else { - use tokio::io::AsyncSeekExt; - - f.seek(SeekFrom::End(0)).await.map_err(parse_io_error)? - }; - - let f = Compat::new(f); - - let br = args.range(); - let (start, end) = match (br.offset(), br.size()) { - // Read a specific range. - (Some(offset), Some(size)) => (offset, min(offset + size, total_length)), - // Read from offset. - (Some(offset), None) => (offset, total_length), - // Read the last size bytes. - (None, Some(size)) => ( - if total_length > size { - total_length - size - } else { - 0 - }, - total_length, - ), - // Read the whole file. - (None, None) => (0, total_length), - }; - - let mut r = oio::into_read_from_file(f, start, end); - - // Rewind to make sure we are on the correct offset. - r.seek(SeekFrom::Start(0)).await?; - - Ok((RpRead::new(end - start), r)) + let r = oio::TokioReader::new(f); + Ok((RpRead::new(), r)) } async fn write(&self, path: &str, op: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -378,7 +340,7 @@ impl Accessor for FsBackend { if op.append() && tokio::fs::try_exists(&target_path) .await - .map_err(parse_io_error)? + .map_err(new_std_io_error)? { (target_path, None) } else { @@ -401,7 +363,7 @@ impl Accessor for FsBackend { let f = open_options .open(tmp_path.as_ref().unwrap_or(&target_path)) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; Ok((RpWrite::new(), FsWriter::new(target_path, tmp_path, f))) } @@ -410,11 +372,11 @@ impl Accessor for FsBackend { let from = self.root.join(from.trim_end_matches('/')); // try to get the metadata of the source file to ensure it exists - tokio::fs::metadata(&from).await.map_err(parse_io_error)?; + tokio::fs::metadata(&from).await.map_err(new_std_io_error)?; let to = Self::ensure_write_abs_path(&self.root, to.trim_end_matches('/')).await?; - tokio::fs::copy(from, to).await.map_err(parse_io_error)?; + tokio::fs::copy(from, to).await.map_err(new_std_io_error)?; Ok(RpCopy::default()) } @@ -423,11 +385,13 @@ impl Accessor for FsBackend { let from = self.root.join(from.trim_end_matches('/')); // try to get the metadata of the source file to ensure it exists - tokio::fs::metadata(&from).await.map_err(parse_io_error)?; + tokio::fs::metadata(&from).await.map_err(new_std_io_error)?; let to = Self::ensure_write_abs_path(&self.root, to.trim_end_matches('/')).await?; - tokio::fs::rename(from, to).await.map_err(parse_io_error)?; + tokio::fs::rename(from, to) + .await + .map_err(new_std_io_error)?; Ok(RpRename::default()) } @@ -435,7 +399,7 @@ impl Accessor for FsBackend { async fn stat(&self, path: &str, _: OpStat) -> Result { let p = self.root.join(path.trim_end_matches('/')); - let meta = tokio::fs::metadata(&p).await.map_err(parse_io_error)?; + let meta = tokio::fs::metadata(&p).await.map_err(new_std_io_error)?; if self.enable_path_check && meta.is_dir() != path.ends_with('/') { return Err(Error::new( @@ -456,7 +420,7 @@ impl Accessor for FsBackend { .with_last_modified( meta.modified() .map(DateTime::from) - .map_err(parse_io_error)?, + .map_err(new_std_io_error)?, ); Ok(RpStat::new(m)) @@ -470,15 +434,15 @@ impl Accessor for FsBackend { match meta { Ok(meta) => { if meta.is_dir() { - tokio::fs::remove_dir(&p).await.map_err(parse_io_error)?; + tokio::fs::remove_dir(&p).await.map_err(new_std_io_error)?; } else { - tokio::fs::remove_file(&p).await.map_err(parse_io_error)?; + tokio::fs::remove_file(&p).await.map_err(new_std_io_error)?; } Ok(RpDelete::default()) } Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(RpDelete::default()), - Err(err) => Err(parse_io_error(err)), + Err(err) => Err(new_std_io_error(err)), } } @@ -491,7 +455,7 @@ impl Accessor for FsBackend { return if e.kind() == std::io::ErrorKind::NotFound { Ok((RpList::default(), None)) } else { - Err(parse_io_error(e)) + Err(new_std_io_error(e)) }; } }; @@ -504,24 +468,22 @@ impl Accessor for FsBackend { fn blocking_create_dir(&self, path: &str, _: OpCreateDir) -> Result { let p = self.root.join(path.trim_end_matches('/')); - std::fs::create_dir_all(p).map_err(parse_io_error)?; + std::fs::create_dir_all(p).map_err(new_std_io_error)?; Ok(RpCreateDir::default()) } - fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { - use oio::BlockingRead; - + fn blocking_read(&self, path: &str, _: OpRead) -> Result<(RpRead, Self::BlockingReader)> { let p = self.root.join(path.trim_end_matches('/')); - let mut f = std::fs::OpenOptions::new() + let f = std::fs::OpenOptions::new() .read(true) .open(p) - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; - let total_length = if self.enable_path_check { + if self.enable_path_check { // Get fs metadata of file at given path, ensuring it is not a false-positive due to slash normalization. - let meta = f.metadata().map_err(parse_io_error)?; + let meta = f.metadata().map_err(new_std_io_error)?; if meta.is_dir() != path.ends_with('/') { return Err(Error::new( ErrorKind::NotFound, @@ -534,39 +496,11 @@ impl Accessor for FsBackend { "given path is a directory", )); } + } - meta.len() - } else { - use std::io::Seek; - - f.seek(SeekFrom::End(0)).map_err(parse_io_error)? - }; - - let br = args.range(); - let (start, end) = match (br.offset(), br.size()) { - // Read a specific range. - (Some(offset), Some(size)) => (offset, min(offset + size, total_length)), - // Read from offset. - (Some(offset), None) => (offset, total_length), - // Read the last size bytes. - (None, Some(size)) => ( - if total_length > size { - total_length - size - } else { - 0 - }, - total_length, - ), - // Read the whole file. - (None, None) => (0, total_length), - }; - - let mut r: oio::FromFileReader = oio::into_read_from_file(f, start, end); - - // Rewind to make sure we are on the correct offset. - r.seek(SeekFrom::Start(0))?; + let r = oio::StdReader::new(f); - Ok((RpRead::new(end - start), r)) + Ok((RpRead::new(), r)) } fn blocking_write(&self, path: &str, op: OpWrite) -> Result<(RpWrite, Self::BlockingWriter)> { @@ -579,7 +513,7 @@ impl Accessor for FsBackend { if op.append() && Path::new(&target_path) .try_exists() - .map_err(parse_io_error)? + .map_err(new_std_io_error)? { (target_path, None) } else { @@ -602,7 +536,7 @@ impl Accessor for FsBackend { let f = f .open(tmp_path.as_ref().unwrap_or(&target_path)) - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; Ok((RpWrite::new(), FsWriter::new(target_path, tmp_path, f))) } @@ -611,11 +545,11 @@ impl Accessor for FsBackend { let from = self.root.join(from.trim_end_matches('/')); // try to get the metadata of the source file to ensure it exists - std::fs::metadata(&from).map_err(parse_io_error)?; + std::fs::metadata(&from).map_err(new_std_io_error)?; let to = Self::blocking_ensure_write_abs_path(&self.root, to.trim_end_matches('/'))?; - std::fs::copy(from, to).map_err(parse_io_error)?; + std::fs::copy(from, to).map_err(new_std_io_error)?; Ok(RpCopy::default()) } @@ -624,11 +558,11 @@ impl Accessor for FsBackend { let from = self.root.join(from.trim_end_matches('/')); // try to get the metadata of the source file to ensure it exists - std::fs::metadata(&from).map_err(parse_io_error)?; + std::fs::metadata(&from).map_err(new_std_io_error)?; let to = Self::blocking_ensure_write_abs_path(&self.root, to.trim_end_matches('/'))?; - std::fs::rename(from, to).map_err(parse_io_error)?; + std::fs::rename(from, to).map_err(new_std_io_error)?; Ok(RpRename::default()) } @@ -636,7 +570,7 @@ impl Accessor for FsBackend { fn blocking_stat(&self, path: &str, _: OpStat) -> Result { let p = self.root.join(path.trim_end_matches('/')); - let meta = std::fs::metadata(p).map_err(parse_io_error)?; + let meta = std::fs::metadata(p).map_err(new_std_io_error)?; if self.enable_path_check && meta.is_dir() != path.ends_with('/') { return Err(Error::new( @@ -657,7 +591,7 @@ impl Accessor for FsBackend { .with_last_modified( meta.modified() .map(DateTime::from) - .map_err(parse_io_error)?, + .map_err(new_std_io_error)?, ); Ok(RpStat::new(m)) @@ -671,15 +605,15 @@ impl Accessor for FsBackend { match meta { Ok(meta) => { if meta.is_dir() { - std::fs::remove_dir(&p).map_err(parse_io_error)?; + std::fs::remove_dir(&p).map_err(new_std_io_error)?; } else { - std::fs::remove_file(&p).map_err(parse_io_error)?; + std::fs::remove_file(&p).map_err(new_std_io_error)?; } Ok(RpDelete::default()) } Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(RpDelete::default()), - Err(err) => Err(parse_io_error(err)), + Err(err) => Err(new_std_io_error(err)), } } @@ -692,7 +626,7 @@ impl Accessor for FsBackend { return if e.kind() == std::io::ErrorKind::NotFound { Ok((RpList::default(), None)) } else { - Err(parse_io_error(e)) + Err(new_std_io_error(e)) }; } }; diff --git a/core/src/services/fs/error.rs b/core/src/services/fs/error.rs deleted file mode 100644 index de9f710ce773..000000000000 --- a/core/src/services/fs/error.rs +++ /dev/null @@ -1,41 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::io; - -use crate::Error; -use crate::ErrorKind; - -/// Parse all io related errors. -pub fn parse_io_error(err: io::Error) -> Error { - use io::ErrorKind::*; - - let (kind, retryable) = match err.kind() { - NotFound => (ErrorKind::NotFound, false), - PermissionDenied => (ErrorKind::PermissionDenied, false), - Interrupted | UnexpectedEof | TimedOut | WouldBlock => (ErrorKind::Unexpected, true), - _ => (ErrorKind::Unexpected, true), - }; - - let mut err = Error::new(kind, &err.kind().to_string()).set_source(err); - - if retryable { - err = err.set_temporary(); - } - - err -} diff --git a/core/src/services/fs/mod.rs b/core/src/services/fs/mod.rs index aa2a5fca1f65..28aae0814c1c 100644 --- a/core/src/services/fs/mod.rs +++ b/core/src/services/fs/mod.rs @@ -18,6 +18,5 @@ mod backend; pub use backend::FsBuilder as Fs; -mod error; mod pager; mod writer; diff --git a/core/src/services/fs/pager.rs b/core/src/services/fs/pager.rs index f15ac8eaa9cb..1c1e1fcce9ac 100644 --- a/core/src/services/fs/pager.rs +++ b/core/src/services/fs/pager.rs @@ -20,7 +20,6 @@ use std::path::PathBuf; use async_trait::async_trait; -use super::error::parse_io_error; use crate::raw::*; use crate::EntryMode; use crate::Metadata; @@ -49,7 +48,7 @@ impl oio::Page for FsPager { let mut oes: Vec = Vec::with_capacity(self.size); for _ in 0..self.size { - let de = match self.rd.next_entry().await.map_err(parse_io_error)? { + let de = match self.rd.next_entry().await.map_err(new_std_io_error)? { Some(de) => de, None => break, }; @@ -67,7 +66,7 @@ impl oio::Page for FsPager { // (no extra system calls needed), but some Unix platforms may // require the equivalent call to symlink_metadata to learn about // the target file type. - let file_type = de.file_type().await.map_err(parse_io_error)?; + let file_type = de.file_type().await.map_err(new_std_io_error)?; let d = if file_type.is_file() { oio::Entry::new(&rel_path, Metadata::new(EntryMode::FILE)) @@ -91,7 +90,7 @@ impl oio::BlockingPage for FsPager { for _ in 0..self.size { let de = match self.rd.next() { - Some(de) => de.map_err(parse_io_error)?, + Some(de) => de.map_err(new_std_io_error)?, None => break, }; @@ -108,7 +107,7 @@ impl oio::BlockingPage for FsPager { // (no extra system calls needed), but some Unix platforms may // require the equivalent call to symlink_metadata to learn about // the target file type. - let file_type = de.file_type().map_err(parse_io_error)?; + let file_type = de.file_type().map_err(new_std_io_error)?; let d = if file_type.is_file() { oio::Entry::new(&rel_path, Metadata::new(EntryMode::FILE)) diff --git a/core/src/services/fs/writer.rs b/core/src/services/fs/writer.rs index d1283d4ca8c1..bd41f5611338 100644 --- a/core/src/services/fs/writer.rs +++ b/core/src/services/fs/writer.rs @@ -28,7 +28,6 @@ use futures::FutureExt; use tokio::io::AsyncWrite; use tokio::io::AsyncWriteExt; -use super::error::parse_io_error; use crate::raw::*; use crate::*; @@ -64,7 +63,7 @@ impl oio::Write for FsWriter { Pin::new(f) .poll_write_vectored(cx, &bs.vectored_chunk()) - .map_err(parse_io_error) + .map_err(new_std_io_error) } fn poll_close(&mut self, cx: &mut Context<'_>) -> Poll> { @@ -79,13 +78,13 @@ impl oio::Write for FsWriter { let tmp_path = self.tmp_path.clone(); let target_path = self.target_path.clone(); self.fut = Some(Box::pin(async move { - f.flush().await.map_err(parse_io_error)?; - f.sync_all().await.map_err(parse_io_error)?; + f.flush().await.map_err(new_std_io_error)?; + f.sync_all().await.map_err(new_std_io_error)?; if let Some(tmp_path) = &tmp_path { tokio::fs::rename(tmp_path, &target_path) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; } Ok(()) @@ -107,7 +106,7 @@ impl oio::Write for FsWriter { if let Some(tmp_path) = &tmp_path { tokio::fs::remove_file(tmp_path) .await - .map_err(parse_io_error) + .map_err(new_std_io_error) } else { Err(Error::new( ErrorKind::Unsupported, @@ -124,15 +123,15 @@ impl oio::BlockingWrite for FsWriter { let f = self.f.as_mut().expect("FsWriter must be initialized"); f.write_vectored(&bs.vectored_chunk()) - .map_err(parse_io_error) + .map_err(new_std_io_error) } fn close(&mut self) -> Result<()> { if let Some(f) = self.f.take() { - f.sync_all().map_err(parse_io_error)?; + f.sync_all().map_err(new_std_io_error)?; if let Some(tmp_path) = &self.tmp_path { - std::fs::rename(tmp_path, &self.target_path).map_err(parse_io_error)?; + std::fs::rename(tmp_path, &self.target_path).map_err(new_std_io_error)?; } } diff --git a/core/src/services/ftp/backend.rs b/core/src/services/ftp/backend.rs index 2fe8ffe9a8c9..0fe5a17784b2 100644 --- a/core/src/services/ftp/backend.rs +++ b/core/src/services/ftp/backend.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::cmp::min; use std::collections::HashMap; use std::fmt::Debug; use std::fmt::Formatter; @@ -319,37 +318,38 @@ impl Accessor for FtpBackend { return Ok(RpCreateDir::default()); } + /// TODO: migrate to FileReader maybe? async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { let mut ftp_stream = self.ftp_connect(Operation::Read).await?; let meta = self.ftp_stat(path).await?; let br = args.range(); - let (r, size): (Box, _) = match (br.offset(), br.size()) { + let r: Box = match (br.offset(), br.size()) { (Some(offset), Some(size)) => { ftp_stream.resume_transfer(offset as usize).await?; let ds = ftp_stream.retr_as_stream(path).await?.take(size); - (Box::new(ds), min(size, meta.size() as u64 - offset)) + Box::new(ds) } (Some(offset), None) => { ftp_stream.resume_transfer(offset as usize).await?; let ds = ftp_stream.retr_as_stream(path).await?; - (Box::new(ds), meta.size() as u64 - offset) + Box::new(ds) } (None, Some(size)) => { ftp_stream .resume_transfer((meta.size() as u64 - size) as usize) .await?; let ds = ftp_stream.retr_as_stream(path).await?; - (Box::new(ds), size) + Box::new(ds) } (None, None) => { let ds = ftp_stream.retr_as_stream(path).await?; - (Box::new(ds), meta.size() as u64) + Box::new(ds) } }; - Ok((RpRead::new(size), FtpReader::new(r, ftp_stream))) + Ok((RpRead::new(), FtpReader::new(r, ftp_stream))) } async fn write(&self, path: &str, _: OpWrite) -> Result<(RpWrite, Self::Writer)> { diff --git a/core/src/services/gcs/backend.rs b/core/src/services/gcs/backend.rs index e65fb155de91..0522a8d5c143 100644 --- a/core/src/services/gcs/backend.rs +++ b/core/src/services/gcs/backend.rs @@ -389,8 +389,10 @@ impl Accessor for GcsBackend { let resp = self.core.gcs_get_object(path, &args).await?; if resp.status().is_success() { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) + } else if resp.status() == StatusCode::RANGE_NOT_SATISFIABLE { + Ok((RpRead::new(), IncomingAsyncBody::empty())) } else { Err(parse_error(resp).await?) } diff --git a/core/src/services/gdrive/backend.rs b/core/src/services/gdrive/backend.rs index 879eab7cc27a..4e1274d217e5 100644 --- a/core/src/services/gdrive/backend.rs +++ b/core/src/services/gdrive/backend.rs @@ -118,25 +118,14 @@ impl Accessor for GdriveBackend { } async fn read(&self, path: &str, _args: OpRead) -> Result<(RpRead, Self::Reader)> { - // We need to request for metadata and body separately here. - // Request for metadata first to check if the file exists. - let resp = self.core.gdrive_stat(path).await?; + let resp = self.core.gdrive_get(path).await?; let status = resp.status(); match status { StatusCode::OK => { - let body = resp.into_body().bytes().await?; - let meta = self.parse_metadata(body)?; - - let resp = self.core.gdrive_get(path).await?; - - let status = resp.status(); - - match status { - StatusCode::OK => Ok((RpRead::with_metadata(meta), resp.into_body())), - _ => Err(parse_error(resp).await?), - } + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) } _ => Err(parse_error(resp).await?), } diff --git a/core/src/services/ghac/backend.rs b/core/src/services/ghac/backend.rs index ea1545db4d28..b163a8f9028d 100644 --- a/core/src/services/ghac/backend.rs +++ b/core/src/services/ghac/backend.rs @@ -327,9 +327,10 @@ impl Accessor for GhacBackend { let status = resp.status(); match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) } + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/hdfs/backend.rs b/core/src/services/hdfs/backend.rs index a6078c89ebe3..a52d9c2842f6 100644 --- a/core/src/services/hdfs/backend.rs +++ b/core/src/services/hdfs/backend.rs @@ -15,18 +15,15 @@ // specific language governing permissions and limitations // under the License. -use std::cmp::min; use std::collections::HashMap; use std::fmt::Debug; use std::io; -use std::io::SeekFrom; use std::path::PathBuf; use std::sync::Arc; use async_trait::async_trait; use log::debug; -use super::error::parse_io_error; use super::pager::HdfsPager; use super::writer::HdfsWriter; use crate::raw::*; @@ -128,14 +125,14 @@ impl Builder for HdfsBuilder { builder = builder.with_user(user.as_str()); } - let client = builder.connect().map_err(parse_io_error)?; + let client = builder.connect().map_err(new_std_io_error)?; // Create root dir if not exist. if let Err(e) = client.metadata(&root) { if e.kind() == io::ErrorKind::NotFound { debug!("root {} is not exist, creating now", root); - client.create_dir(&root).map_err(parse_io_error)? + client.create_dir(&root).map_err(new_std_io_error)? } } @@ -160,8 +157,8 @@ unsafe impl Sync for HdfsBackend {} #[async_trait] impl Accessor for HdfsBackend { - type Reader = oio::FromFileReader; - type BlockingReader = oio::FromFileReader; + type Reader = oio::FuturesReader; + type BlockingReader = oio::StdReader; type Writer = HdfsWriter; type BlockingWriter = HdfsWriter; type Pager = Option; @@ -176,7 +173,6 @@ impl Accessor for HdfsBackend { read: true, read_can_seek: true, - read_with_range: true, write: true, // TODO: wait for https://github.com/apache/incubator-opendal/pull/2715 @@ -199,44 +195,25 @@ impl Accessor for HdfsBackend { async fn create_dir(&self, path: &str, _: OpCreateDir) -> Result { let p = build_rooted_abs_path(&self.root, path); - self.client.create_dir(&p).map_err(parse_io_error)?; + self.client.create_dir(&p).map_err(new_std_io_error)?; Ok(RpCreateDir::default()) } - async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - use oio::ReadExt; - + async fn read(&self, path: &str, _: OpRead) -> Result<(RpRead, Self::Reader)> { let p = build_rooted_abs_path(&self.root, path); - // This will be addressed by https://github.com/apache/incubator-opendal/issues/506 - let meta = self.client.metadata(&p).map_err(parse_io_error)?; - let f = self .client .open_file() .read(true) .async_open(&p) .await - .map_err(parse_io_error)?; - - let br = args.range(); - let (start, end) = match (br.offset(), br.size()) { - // Read a specific range. - (Some(offset), Some(size)) => (offset, min(offset + size, meta.len())), - // Read from offset. - (Some(offset), None) => (offset, meta.len()), - // Read the last size bytes. - (None, Some(size)) => (meta.len() - size, meta.len()), - // Read the whole file. - (None, None) => (0, meta.len()), - }; + .map_err(new_std_io_error)?; - let mut r = oio::into_read_from_file(f, start, end); - // Rewind to make sure we are on the correct offset. - r.seek(SeekFrom::Start(0)).await?; + let r = oio::FuturesReader::new(f); - Ok((RpRead::new(end - start), r)) + Ok((RpRead::new(), r)) } async fn write(&self, path: &str, op: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -255,7 +232,7 @@ impl Accessor for HdfsBackend { self.client .create_dir(&parent.to_string_lossy()) - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; let mut open_options = self.client.open_file(); open_options.create(true); @@ -265,7 +242,10 @@ impl Accessor for HdfsBackend { open_options.write(true); } - let f = open_options.async_open(&p).await.map_err(parse_io_error)?; + let f = open_options + .async_open(&p) + .await + .map_err(new_std_io_error)?; Ok((RpWrite::new(), HdfsWriter::new(f))) } @@ -273,7 +253,7 @@ impl Accessor for HdfsBackend { async fn stat(&self, path: &str, _: OpStat) -> Result { let p = build_rooted_abs_path(&self.root, path); - let meta = self.client.metadata(&p).map_err(parse_io_error)?; + let meta = self.client.metadata(&p).map_err(new_std_io_error)?; let mode = if meta.is_dir() { EntryMode::DIR @@ -298,7 +278,7 @@ impl Accessor for HdfsBackend { return if err.kind() == io::ErrorKind::NotFound { Ok(RpDelete::default()) } else { - Err(parse_io_error(err)) + Err(new_std_io_error(err)) }; } @@ -311,7 +291,7 @@ impl Accessor for HdfsBackend { self.client.remove_file(&p) }; - result.map_err(parse_io_error)?; + result.map_err(new_std_io_error)?; Ok(RpDelete::default()) } @@ -325,7 +305,7 @@ impl Accessor for HdfsBackend { return if e.kind() == io::ErrorKind::NotFound { Ok((RpList::default(), None)) } else { - Err(parse_io_error(e)) + Err(new_std_io_error(e)) } } }; @@ -338,43 +318,24 @@ impl Accessor for HdfsBackend { fn blocking_create_dir(&self, path: &str, _: OpCreateDir) -> Result { let p = build_rooted_abs_path(&self.root, path); - self.client.create_dir(&p).map_err(parse_io_error)?; + self.client.create_dir(&p).map_err(new_std_io_error)?; Ok(RpCreateDir::default()) } - fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { - use oio::BlockingRead; - + fn blocking_read(&self, path: &str, _: OpRead) -> Result<(RpRead, Self::BlockingReader)> { let p = build_rooted_abs_path(&self.root, path); - // This will be addressed by https://github.com/apache/incubator-opendal/issues/506 - let meta = self.client.metadata(&p).map_err(parse_io_error)?; - let f = self .client .open_file() .read(true) .open(&p) - .map_err(parse_io_error)?; - - let br = args.range(); - let (start, end) = match (br.offset(), br.size()) { - // Read a specific range. - (Some(offset), Some(size)) => (offset, min(offset + size, meta.len())), - // Read from offset. - (Some(offset), None) => (offset, meta.len()), - // Read the last size bytes. - (None, Some(size)) => (meta.len() - size, meta.len()), - // Read the whole file. - (None, None) => (0, meta.len()), - }; + .map_err(new_std_io_error)?; - let mut r = oio::into_read_from_file(f, start, end); - // Rewind to make sure we are on the correct offset. - r.seek(SeekFrom::Start(0))?; + let r = oio::StdReader::new(f); - Ok((RpRead::new(end - start), r)) + Ok((RpRead::new(), r)) } fn blocking_write(&self, path: &str, _: OpWrite) -> Result<(RpWrite, Self::BlockingWriter)> { @@ -393,7 +354,7 @@ impl Accessor for HdfsBackend { self.client .create_dir(&parent.to_string_lossy()) - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; let f = self .client @@ -401,7 +362,7 @@ impl Accessor for HdfsBackend { .create(true) .write(true) .open(&p) - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; Ok((RpWrite::new(), HdfsWriter::new(f))) } @@ -409,7 +370,7 @@ impl Accessor for HdfsBackend { fn blocking_stat(&self, path: &str, _: OpStat) -> Result { let p = build_rooted_abs_path(&self.root, path); - let meta = self.client.metadata(&p).map_err(parse_io_error)?; + let meta = self.client.metadata(&p).map_err(new_std_io_error)?; let mode = if meta.is_dir() { EntryMode::DIR @@ -434,7 +395,7 @@ impl Accessor for HdfsBackend { return if err.kind() == io::ErrorKind::NotFound { Ok(RpDelete::default()) } else { - Err(parse_io_error(err)) + Err(new_std_io_error(err)) }; } @@ -447,7 +408,7 @@ impl Accessor for HdfsBackend { self.client.remove_file(&p) }; - result.map_err(parse_io_error)?; + result.map_err(new_std_io_error)?; Ok(RpDelete::default()) } @@ -461,7 +422,7 @@ impl Accessor for HdfsBackend { return if e.kind() == io::ErrorKind::NotFound { Ok((RpList::default(), None)) } else { - Err(parse_io_error(e)) + Err(new_std_io_error(e)) } } }; diff --git a/core/src/services/hdfs/mod.rs b/core/src/services/hdfs/mod.rs index c1e98a3b6288..996a654fac3d 100644 --- a/core/src/services/hdfs/mod.rs +++ b/core/src/services/hdfs/mod.rs @@ -18,6 +18,5 @@ mod backend; pub use backend::HdfsBuilder as Hdfs; -mod error; mod pager; mod writer; diff --git a/core/src/services/hdfs/writer.rs b/core/src/services/hdfs/writer.rs index a436a8416e4d..4990df40a99e 100644 --- a/core/src/services/hdfs/writer.rs +++ b/core/src/services/hdfs/writer.rs @@ -23,7 +23,6 @@ use std::task::Poll; use async_trait::async_trait; use futures::AsyncWrite; -use super::error::parse_io_error; use crate::raw::*; use crate::*; @@ -42,7 +41,7 @@ impl oio::Write for HdfsWriter { fn poll_write(&mut self, cx: &mut Context<'_>, bs: &dyn oio::WriteBuf) -> Poll> { Pin::new(&mut self.f) .poll_write(cx, bs.chunk()) - .map_err(parse_io_error) + .map_err(new_std_io_error) } fn poll_abort(&mut self, _: &mut Context<'_>) -> Poll> { @@ -53,17 +52,19 @@ impl oio::Write for HdfsWriter { } fn poll_close(&mut self, cx: &mut Context<'_>) -> Poll> { - Pin::new(&mut self.f).poll_close(cx).map_err(parse_io_error) + Pin::new(&mut self.f) + .poll_close(cx) + .map_err(new_std_io_error) } } impl oio::BlockingWrite for HdfsWriter { fn write(&mut self, bs: &dyn oio::WriteBuf) -> Result { - self.f.write(bs.chunk()).map_err(parse_io_error) + self.f.write(bs.chunk()).map_err(new_std_io_error) } fn close(&mut self) -> Result<()> { - self.f.flush().map_err(parse_io_error)?; + self.f.flush().map_err(new_std_io_error)?; Ok(()) } diff --git a/core/src/services/http/backend.rs b/core/src/services/http/backend.rs index af50631cca47..233237f8721f 100644 --- a/core/src/services/http/backend.rs +++ b/core/src/services/http/backend.rs @@ -238,9 +238,10 @@ impl Accessor for HttpBackend { match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) } + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/ipfs/backend.rs b/core/src/services/ipfs/backend.rs index ebf1e327e881..6a557d6a476e 100644 --- a/core/src/services/ipfs/backend.rs +++ b/core/src/services/ipfs/backend.rs @@ -194,10 +194,7 @@ impl Accessor for IpfsBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/ipmfs/backend.rs b/core/src/services/ipmfs/backend.rs index e7999767b93d..303b1db8df3c 100644 --- a/core/src/services/ipmfs/backend.rs +++ b/core/src/services/ipmfs/backend.rs @@ -112,10 +112,7 @@ impl Accessor for IpmfsBackend { let status = resp.status(); match status { - StatusCode::OK => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/memcached/ascii.rs b/core/src/services/memcached/ascii.rs index 12ba7589d70c..6a790889396c 100644 --- a/core/src/services/memcached/ascii.rs +++ b/core/src/services/memcached/ascii.rs @@ -21,7 +21,7 @@ use tokio::io::AsyncWriteExt; use tokio::io::BufReader; use tokio::net::TcpStream; -use super::backend::parse_io_error; +use crate::raw::*; use crate::*; pub struct Connection { @@ -43,8 +43,8 @@ impl Connection { writer .write_all(&[b"get ", key.as_bytes(), b"\r\n"].concat()) .await - .map_err(parse_io_error)?; - writer.flush().await.map_err(parse_io_error)?; + .map_err(new_std_io_error)?; + writer.flush().await.map_err(new_std_io_error)?; // Read response header let header = self.read_header().await?; @@ -71,7 +71,7 @@ impl Connection { self.io .read_exact(&mut buffer) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; // Read the trailing header self.read_line().await?; // \r\n @@ -85,10 +85,10 @@ impl Connection { self.io .write_all(header.as_bytes()) .await - .map_err(parse_io_error)?; - self.io.write_all(val).await.map_err(parse_io_error)?; - self.io.write_all(b"\r\n").await.map_err(parse_io_error)?; - self.io.flush().await.map_err(parse_io_error)?; + .map_err(new_std_io_error)?; + self.io.write_all(val).await.map_err(new_std_io_error)?; + self.io.write_all(b"\r\n").await.map_err(new_std_io_error)?; + self.io.flush().await.map_err(new_std_io_error)?; // Read response header let header = self.read_header().await?; @@ -110,8 +110,8 @@ impl Connection { self.io .write_all(header.as_bytes()) .await - .map_err(parse_io_error)?; - self.io.flush().await.map_err(parse_io_error)?; + .map_err(new_std_io_error)?; + self.io.flush().await.map_err(new_std_io_error)?; // Read response header let header = self.read_header().await?; @@ -132,8 +132,8 @@ impl Connection { self.io .write_all(b"version\r\n") .await - .map_err(parse_io_error)?; - self.io.flush().await.map_err(parse_io_error)?; + .map_err(new_std_io_error)?; + self.io.flush().await.map_err(new_std_io_error)?; // Read response header let header = self.read_header().await?; @@ -151,7 +151,7 @@ impl Connection { async fn read_line(&mut self) -> Result<&[u8]> { let Self { io, buf } = self; buf.clear(); - io.read_until(b'\n', buf).await.map_err(parse_io_error)?; + io.read_until(b'\n', buf).await.map_err(new_std_io_error)?; if buf.last().copied() != Some(b'\n') { return Err(Error::new( ErrorKind::ContentIncomplete, diff --git a/core/src/services/memcached/backend.rs b/core/src/services/memcached/backend.rs index 2731a70a15ee..91c127de8a77 100644 --- a/core/src/services/memcached/backend.rs +++ b/core/src/services/memcached/backend.rs @@ -250,7 +250,7 @@ impl bb8::ManageConnection for MemcacheConnectionManager { async fn connect(&self) -> std::result::Result { let conn = TcpStream::connect(&self.address) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; Ok(ascii::Connection::new(conn)) } @@ -262,7 +262,3 @@ impl bb8::ManageConnection for MemcacheConnectionManager { false } } - -pub fn parse_io_error(err: std::io::Error) -> Error { - Error::new(ErrorKind::Unexpected, &err.kind().to_string()).set_source(err) -} diff --git a/core/src/services/obs/backend.rs b/core/src/services/obs/backend.rs index aa5a86864c63..311af30a6a4d 100644 --- a/core/src/services/obs/backend.rs +++ b/core/src/services/obs/backend.rs @@ -360,9 +360,10 @@ impl Accessor for ObsBackend { match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) } + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/onedrive/backend.rs b/core/src/services/onedrive/backend.rs index 0a7952e4a27f..95e052654184 100644 --- a/core/src/services/onedrive/backend.rs +++ b/core/src/services/onedrive/backend.rs @@ -94,10 +94,9 @@ impl Accessor for OnedriveBackend { match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) } - _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/oss/backend.rs b/core/src/services/oss/backend.rs index 1bab5f845bf6..93323b3a3156 100644 --- a/core/src/services/oss/backend.rs +++ b/core/src/services/oss/backend.rs @@ -475,9 +475,10 @@ impl Accessor for OssBackend { match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) } + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/s3/backend.rs b/core/src/services/s3/backend.rs index f9e160502e8b..f2157c5dc6f5 100644 --- a/core/src/services/s3/backend.rs +++ b/core/src/services/s3/backend.rs @@ -970,9 +970,10 @@ impl Accessor for S3Backend { match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) } + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/sftp/backend.rs b/core/src/services/sftp/backend.rs index 0f9b7f3bd6fe..792e27f49644 100644 --- a/core/src/services/sftp/backend.rs +++ b/core/src/services/sftp/backend.rs @@ -15,25 +15,25 @@ // specific language governing permissions and limitations // under the License. -use std::cmp::min; use std::collections::HashMap; use std::fmt::Debug; use std::fmt::Formatter; use std::path::Path; use std::path::PathBuf; +use std::pin::Pin; use async_trait::async_trait; use futures::StreamExt; use log::debug; use openssh::KnownHosts; use openssh::SessionBuilder; +use openssh_sftp_client::file::TokioCompatFile; use openssh_sftp_client::Sftp; use openssh_sftp_client::SftpOptions; use super::error::is_not_found; use super::error::is_sftp_protocol_error; use super::pager::SftpPager; -use super::utils::SftpReader; use super::writer::SftpWriter; use crate::raw::*; use crate::*; @@ -224,7 +224,7 @@ impl Debug for SftpBackend { #[async_trait] impl Accessor for SftpBackend { - type Reader = SftpReader; + type Reader = oio::TokioReader>>; type BlockingReader = (); type Writer = SftpWriter; type BlockingWriter = (); @@ -239,7 +239,6 @@ impl Accessor for SftpBackend { stat: true, read: true, - read_with_range: true, read_can_seek: true, write: true, @@ -284,42 +283,24 @@ impl Accessor for SftpBackend { return Ok(RpCreateDir::default()); } - async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { + async fn read(&self, path: &str, _: OpRead) -> Result<(RpRead, Self::Reader)> { let client = self.connect().await?; let mut fs = client.fs(); fs.set_cwd(&self.root); let path = fs.canonicalize(path).await?; - let mut file = client.open(path.as_path()).await?; - - let total_length = file.metadata().await?.len().ok_or(Error::new( - ErrorKind::NotFound, - format!("file not found: {}", path.to_str().unwrap()).as_str(), - ))?; - - let br = args.range(); - let (start, end) = match (br.offset(), br.size()) { - // Read a specific range. - (Some(offset), Some(size)) => (offset, min(offset + size, total_length)), - // Read from offset. - (Some(offset), None) => (offset, total_length), - // Read the last size bytes. - (None, Some(size)) => ( - if total_length > size { - total_length - size - } else { - 0 - }, - total_length, - ), - // Read the whole file. - (None, None) => (0, total_length), - }; + let f = client.open(path.as_path()).await?; - let r = SftpReader::new(file, start, end).await?; + // Sorry for the ugly code... + // + // - `f` is a openssh file. + // - `TokioCompatFile::new(f)` makes it implements tokio AsyncRead + AsyncSeek for openssh File. + // - `Box::pin(x)` to make sure this reader implements `Unpin`, since `TokioCompatFile` is not. + // - `oio::TokioReader::new(x)` makes it a `oio::TokioReader` which implements `oio::Read`. + let r = oio::TokioReader::new(Box::pin(TokioCompatFile::new(f))); - Ok((RpRead::new(end - start), r)) + Ok((RpRead::new(), r)) } async fn write(&self, path: &str, op: OpWrite) -> Result<(RpWrite, Self::Writer)> { diff --git a/core/src/services/sftp/error.rs b/core/src/services/sftp/error.rs index 9c0124e77576..adb8d079e924 100644 --- a/core/src/services/sftp/error.rs +++ b/core/src/services/sftp/error.rs @@ -45,6 +45,7 @@ impl From for Error { } } +/// REMOVE ME: it's not allowed to impl for Error. impl From for Error { fn from(e: SshError) -> Self { Error::new(ErrorKind::Unexpected, "ssh error").set_source(e) diff --git a/core/src/services/sftp/utils.rs b/core/src/services/sftp/utils.rs index fc8bb8a6f64c..5e627b89adc1 100644 --- a/core/src/services/sftp/utils.rs +++ b/core/src/services/sftp/utils.rs @@ -15,83 +15,10 @@ // specific language governing permissions and limitations // under the License. -use std::io::SeekFrom; -use std::pin::Pin; -use std::task::Context; -use std::task::Poll; - -use async_compat::Compat; -use futures::AsyncBufRead; -use futures::AsyncRead; -use futures::AsyncSeek; -use openssh_sftp_client::file::File; -use openssh_sftp_client::file::TokioCompatFile; use openssh_sftp_client::metadata::MetaData as SftpMeta; -use crate::raw::oio; -use crate::raw::oio::FromFileReader; -use crate::raw::oio::ReadExt; use crate::EntryMode; use crate::Metadata; -use crate::Result; - -pub struct SftpReaderInner { - file: Pin>>, -} -pub type SftpReader = FromFileReader; - -impl SftpReaderInner { - pub async fn new(file: File) -> Self { - let file = Compat::new(file.into()); - Self { - file: Box::pin(file), - } - } -} - -impl SftpReader { - /// Create a new reader from a file, starting at the given offset and ending at the given offset. - pub async fn new(file: File, start: u64, end: u64) -> Result { - let file = SftpReaderInner::new(file).await; - let mut r = oio::into_read_from_file(file, start, end); - r.seek(SeekFrom::Start(0)).await?; - Ok(r) - } -} - -impl AsyncRead for SftpReaderInner { - fn poll_read( - self: Pin<&mut Self>, - cx: &mut Context, - buf: &mut [u8], - ) -> Poll> { - let this = self.get_mut(); - Pin::new(&mut this.file).poll_read(cx, buf) - } -} - -impl AsyncBufRead for SftpReaderInner { - fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { - let this = self.get_mut(); - Pin::new(&mut this.file).poll_fill_buf(cx) - } - - fn consume(self: Pin<&mut Self>, amt: usize) { - let this = self.get_mut(); - Pin::new(&mut this.file).consume(amt) - } -} - -impl AsyncSeek for SftpReaderInner { - fn poll_seek( - self: Pin<&mut Self>, - cx: &mut Context, - pos: SeekFrom, - ) -> Poll> { - let this = self.get_mut(); - Pin::new(&mut this.file).poll_seek(cx, pos) - } -} impl From for Metadata { fn from(meta: SftpMeta) -> Self { diff --git a/core/src/services/sftp/writer.rs b/core/src/services/sftp/writer.rs index c6a2aa6bef33..c80d8de9b5c5 100644 --- a/core/src/services/sftp/writer.rs +++ b/core/src/services/sftp/writer.rs @@ -45,11 +45,14 @@ impl oio::Write for SftpWriter { self.file .as_mut() .poll_write(cx, bs.chunk()) - .map_err(parse_io_error) + .map_err(new_std_io_error) } fn poll_close(&mut self, cx: &mut Context<'_>) -> Poll> { - self.file.as_mut().poll_shutdown(cx).map_err(parse_io_error) + self.file + .as_mut() + .poll_shutdown(cx) + .map_err(new_std_io_error) } fn poll_abort(&mut self, _: &mut Context<'_>) -> Poll> { @@ -60,6 +63,6 @@ impl oio::Write for SftpWriter { } } -fn parse_io_error(err: std::io::Error) -> Error { +fn new_std_io_error(err: std::io::Error) -> Error { Error::new(ErrorKind::Unexpected, "read from sftp").set_source(err) } diff --git a/core/src/services/supabase/backend.rs b/core/src/services/supabase/backend.rs index a5d0d3db1053..b96bbd3efd4a 100644 --- a/core/src/services/supabase/backend.rs +++ b/core/src/services/supabase/backend.rs @@ -215,10 +215,7 @@ impl Accessor for SupabaseBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/vercel_artifacts/backend.rs b/core/src/services/vercel_artifacts/backend.rs index 9a0ae95cf719..432b82b68fd9 100644 --- a/core/src/services/vercel_artifacts/backend.rs +++ b/core/src/services/vercel_artifacts/backend.rs @@ -74,10 +74,7 @@ impl Accessor for VercelArtifactsBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } diff --git a/core/src/services/wasabi/backend.rs b/core/src/services/wasabi/backend.rs index a7a41042f037..c2f746e77578 100644 --- a/core/src/services/wasabi/backend.rs +++ b/core/src/services/wasabi/backend.rs @@ -738,10 +738,7 @@ impl Accessor for WasabiBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index 4c4cf6b834a2..93e0a70ca264 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -268,9 +268,10 @@ impl Accessor for WebdavBackend { let status = resp.status(); match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) } + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/webhdfs/backend.rs b/core/src/services/webhdfs/backend.rs index 4adbc6ab9e93..db72a346736d 100644 --- a/core/src/services/webhdfs/backend.rs +++ b/core/src/services/webhdfs/backend.rs @@ -28,6 +28,7 @@ use log::debug; use tokio::sync::OnceCell; use super::error::parse_error; +use super::error::parse_error_msg; use super::message::BooleanResp; use super::message::DirectoryListingWrapper; use super::message::FileStatusType; @@ -466,9 +467,21 @@ impl Accessor for WebhdfsBackend { let resp = self.webhdfs_read_file(path, range).await?; match resp.status() { StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) } + // WebHDFS will returns 403 when range is outside of the end. + StatusCode::FORBIDDEN => { + let (parts, body) = resp.into_parts(); + let bs = body.bytes().await?; + let s = String::from_utf8_lossy(&bs); + if s.contains("out of the range") { + Ok((RpRead::new(), IncomingAsyncBody::empty())) + } else { + Err(parse_error_msg(parts, &s)?) + } + } + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/webhdfs/error.rs b/core/src/services/webhdfs/error.rs index 449725e632e3..d243b84811d1 100644 --- a/core/src/services/webhdfs/error.rs +++ b/core/src/services/webhdfs/error.rs @@ -46,7 +46,7 @@ pub(super) async fn parse_error(resp: Response) -> Result Result { +pub(super) fn parse_error_msg(parts: Parts, body: &str) -> Result { let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => (ErrorKind::PermissionDenied, false), diff --git a/core/src/types/operator/blocking_operator.rs b/core/src/types/operator/blocking_operator.rs index 3dfa0d121ae9..f55a1c48a82c 100644 --- a/core/src/types/operator/blocking_operator.rs +++ b/core/src/types/operator/blocking_operator.rs @@ -15,11 +15,10 @@ // specific language governing permissions and limitations // under the License. -use std::io::Read; - use bytes::Bytes; use super::operator_functions::*; +use crate::raw::oio::BlockingRead; use crate::raw::oio::WriteBuf; use crate::raw::*; use crate::*; @@ -339,22 +338,12 @@ impl BlockingOperator { ); } - let (rp, mut s) = inner.blocking_read(&path, args)?; - let mut buffer = Vec::with_capacity(rp.into_metadata().content_length() as usize); + let (_, mut s) = inner.blocking_read(&path, args)?; - match s.read_to_end(&mut buffer) { - Ok(n) => { - buffer.truncate(n); - Ok(buffer) - } - Err(err) => Err( - Error::new(ErrorKind::Unexpected, "blocking read_with failed") - .with_operation("BlockingOperator::read_with") - .with_context("service", inner.info().scheme().into_static()) - .with_context("path", &path) - .set_source(err), - ), - } + let mut buf = Vec::new(); + s.read_to_end(&mut buf)?; + + Ok(buf) }, )) } diff --git a/core/src/types/operator/operator.rs b/core/src/types/operator/operator.rs index 00bb13e10b2e..1abb1f0276f4 100644 --- a/core/src/types/operator/operator.rs +++ b/core/src/types/operator/operator.rs @@ -20,15 +20,13 @@ use std::time::Duration; use bytes::Buf; use bytes::Bytes; use futures::stream; -use futures::AsyncReadExt; use futures::Stream; use futures::StreamExt; use futures::TryStreamExt; -use tokio::io::ReadBuf; use super::BlockingOperator; use crate::operator_futures::*; -use crate::raw::oio::WriteExt; +use crate::raw::oio::{ReadExt, WriteExt}; use crate::raw::*; use crate::*; @@ -368,32 +366,11 @@ impl Operator { .with_context("path", &path)); } - let br = args.range(); - let (rp, mut s) = inner.read(&path, args).await?; + let (_, mut s) = inner.read(&path, args).await?; + let mut buf = Vec::new(); + s.read_to_end(&mut buf).await?; - let length = rp.into_metadata().content_length() as usize; - let mut buffer = Vec::with_capacity(length); - - let dst = buffer.spare_capacity_mut(); - let mut buf = ReadBuf::uninit(dst); - - // Safety: the input buffer is created with_capacity(length). - unsafe { buf.assume_init(length) }; - - // TODO: use native read api - s.read_exact(buf.initialized_mut()).await.map_err(|err| { - Error::new(ErrorKind::Unexpected, "read from storage") - .with_operation("read") - .with_context("service", inner.info().scheme().into_static()) - .with_context("path", &path) - .with_context("range", br.to_string()) - .set_source(err) - })?; - - // Safety: read_exact makes sure this buffer has been filled. - unsafe { buffer.set_len(length) } - - Ok(buffer) + Ok(buf) }; Box::pin(fut) diff --git a/core/tests/behavior/fuzz.rs b/core/tests/behavior/fuzz.rs index 6ad261795903..772a2c181a3b 100644 --- a/core/tests/behavior/fuzz.rs +++ b/core/tests/behavior/fuzz.rs @@ -25,7 +25,12 @@ use opendal::raw::BytesRange; use crate::*; pub fn behavior_fuzz_tests(op: &Operator) -> Vec { - async_trials!(op, test_fuzz_issue_2717) + async_trials!( + op, + test_fuzz_issue_2717, + test_fuzz_pr_3395_case_1, + test_fuzz_pr_3395_case_2 + ) } async fn test_fuzz_read( @@ -96,3 +101,81 @@ pub async fn test_fuzz_issue_2717(op: Operator) -> Result<()> { test_fuzz_read(op, 2, .., &actions).await } + +/// This fuzz test is to reproduce bug inside . +/// +/// The simplified cases could be seen as: +/// +/// ``` +/// FuzzInput { +/// path: "06ae5d93-c0e9-43f2-ae5a-225cfaaa40a0", +/// size: 1, +/// range: BytesRange( +/// Some( +/// 0, +/// ), +/// None, +/// ), +/// actions: [ +/// Seek( +/// Current( +/// 1, +/// ), +/// ), +/// Next, +/// Seek( +/// End( +/// -1, +/// ), +/// ), +/// ], +/// } +/// ``` +pub async fn test_fuzz_pr_3395_case_1(op: Operator) -> Result<()> { + let actions = [ + ReadAction::Seek(SeekFrom::Current(1)), + ReadAction::Next, + ReadAction::Seek(SeekFrom::End(-1)), + ]; + test_fuzz_read(op, 1, 0.., &actions).await +} + +/// This fuzz test is to reproduce bug inside . +/// +/// The simplified cases could be seen as: +/// +/// ``` +/// FuzzInput { +/// path: "e6056989-7c7c-4075-b975-5ae380884333", +/// size: 1, +/// range: BytesRange( +/// Some( +/// 0, +/// ), +/// None, +/// ), +/// actions: [ +/// Next, +/// Seek( +/// Current( +/// 1, +/// ), +/// ), +/// Next, +/// Seek( +/// End( +/// 0, +/// ), +/// ), +/// ], +/// } +/// ``` +pub async fn test_fuzz_pr_3395_case_2(op: Operator) -> Result<()> { + let actions = [ + ReadAction::Next, + ReadAction::Seek(SeekFrom::Current(1)), + ReadAction::Next, + ReadAction::Seek(SeekFrom::End(0)), + ]; + test_fuzz_read(op, 1, 0.., &actions).await +} diff --git a/integrations/dav-server/Cargo.toml b/integrations/dav-server/Cargo.toml index 5bb32948c749..911812f47214 100644 --- a/integrations/dav-server/Cargo.toml +++ b/integrations/dav-server/Cargo.toml @@ -29,10 +29,10 @@ version.workspace = true [dependencies] anyhow = "1" -chrono = "0.4.28" -dirs = "5.0.0" bytes = { version = "1.4.0" } +chrono = "0.4.28" dav-server = { version = "0.5.5" } +dirs = "5.0.0" futures = "0.3" futures-util = { version = "0.3.16" } opendal.workspace = true @@ -44,4 +44,3 @@ tokio = { version = "1.27", features = [ "rt-multi-thread", "io-std", ] } -