From 657ef3664f231d68241f1d7f52bbf3cc33bf4adb Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 23 Dec 2024 23:59:53 +0800 Subject: [PATCH 1/6] rfc: Operator From Uri Signed-off-by: Xuanwo --- core/src/docs/rfcs/0000_operator_from_uri.md | 144 +++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 core/src/docs/rfcs/0000_operator_from_uri.md diff --git a/core/src/docs/rfcs/0000_operator_from_uri.md b/core/src/docs/rfcs/0000_operator_from_uri.md new file mode 100644 index 000000000000..df7112f9b552 --- /dev/null +++ b/core/src/docs/rfcs/0000_operator_from_uri.md @@ -0,0 +1,144 @@ +- Proposal Name: `operator_from_uri` +- Start Date: 2024-12-23 +- RFC PR: [apache/opendal#0000](https://github.com/apache/opendal/pull/0000) +- Tracking Issue: [apache/opendal#0000](https://github.com/apache/opendal/issues/0000) + +# Summary + +This RFC proposes adding URI-based configuration support to OpenDAL, allowing users to create operators directly from URIs. The proposal introduces a new `from_uri` API in both the `Operator` and `Configurator` traits, along with an `OperatorRegistry` to manage operator factories. + +# Motivation + +Currently, creating an operator in OpenDAL requires explicit configuration through builder patterns. While this approach provides type safety and clear documentation, it can be verbose and inflexible for simple use cases. Many storage systems are naturally identified by URIs (e.g., `s3://bucket/path`, `fs:///path/to/dir`). + +Adding URI-based configuration would: + +- Simplify operator creation for common use cases +- Enable configuration via connection strings (common in many applications) +- Make OpenDAL more approachable for new users +- Allow dynamic operator creation based on runtime configuration + +# Guide-level explanation + +The new API allows creating operators directly from URIs: + +```rust +// Create an operator using URI +let op = Operator::from_uri("s3://my-bucket/path", vec![ + ("access_key_id".to_string(), "xxx".to_string()), + ("secret_key_key".to_string(), "yyy".to_string()), +])?; + +// Create a file system operator +let op = Operator::from_uri("fs:///tmp/test", vec![])?; + +// Using with custom registry +let registry = OperatorRegistry::new(); +registry.register("custom", my_factory); +let op = registry.parse("custom://endpoint", options)?; +``` + +# Reference-level explanation + +The implementation consists of three main components: + +1. The `OperatorRegistry`: + +```rust +type OperatorFactory = fn(http::Uri, HashMap) -> Result; + +pub struct OperatorRegistry { + register: Arc>>, +} + +impl OperatorRegistry { + fn register(&self, scheme: &str, factory: OperatorFactory) { + ... + } + + fn parse(&self, uri: &str, options: impl IntoIterator) -> Result { + ... + } +} +``` + +2. The `Configurator` trait extension: + +```rust +impl Configurator for S3Config { + fn from_uri(uri: &str, options: impl IntoIterator) -> Result { + ... + } +} +``` + +3. The `Operator` factory method: + +```rust +impl Operator { + pub fn from_uri( + uri: &str, + options: impl IntoIterator, + ) -> Result { + static REGISTRY: Lazy = Lazy::new(|| { + let registry = OperatorRegistry::new(); + // Register built-in operators + registry.register("s3", s3_factory); + registry.register("fs", fs_factory); + // ... + registry + }); + + REGISTRY.parse(uri, options) + } +} +``` + +We are intentionally using `&str` instead of `Scheme` here to simplify working with external components outside this crate. Additionally, we plan to remove `Scheme` from our public API soon to enable splitting OpenDAL into multiple crates. + +# Drawbacks + +- Increases API surface area +- Less type safety compared to builder patterns +- Potential for confusing error messages with invalid URIs +- Need to maintain backwards compatibility + +# Rationale and alternatives + +Alternatives considered: + +1. Connection string format instead of URIs +2. Builder pattern with URI parsing +3. Macro-based configuration + +URI-based configuration was chosen because: + +- URIs are widely understood +- Natural fit for storage locations +- Extensible through custom schemes +- Common in similar tools + +# Prior art + +Similar patterns exist in: + +- Rust's `url` crate +- Database connection strings (PostgreSQL, MongoDB) +- AWS SDK endpoint configuration +- Python's `urllib` + +# Unresolved questions + +- Should we support custom URI parsing per operator? +- How to handle scheme conflicts? +- Should we support URI validation? +- How to handle complex configurations that don't map well to URIs? + +# Future possibilities + +- Support for connection string format +- URI templates for batch operations +- Custom scheme handlers +- Configuration presets +- URI validation middleware +- Dynamic operator loading based on URI schemes From d4f8b52d64925b36447a67cdc661c3929a29f0ef Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 24 Dec 2024 00:01:50 +0800 Subject: [PATCH 2/6] Assign number Signed-off-by: Xuanwo --- .../{0000_operator_from_uri.md => 5444_operator_from_uri.md} | 0 core/src/docs/rfcs/mod.rs | 4 ++++ 2 files changed, 4 insertions(+) rename core/src/docs/rfcs/{0000_operator_from_uri.md => 5444_operator_from_uri.md} (100%) diff --git a/core/src/docs/rfcs/0000_operator_from_uri.md b/core/src/docs/rfcs/5444_operator_from_uri.md similarity index 100% rename from core/src/docs/rfcs/0000_operator_from_uri.md rename to core/src/docs/rfcs/5444_operator_from_uri.md diff --git a/core/src/docs/rfcs/mod.rs b/core/src/docs/rfcs/mod.rs index a2c042ed598f..8fc19aea113b 100644 --- a/core/src/docs/rfcs/mod.rs +++ b/core/src/docs/rfcs/mod.rs @@ -240,3 +240,7 @@ pub mod rfc_4638_executor {} /// Remove metakey #[doc = include_str!("5314_remove_metakey.md")] pub mod rfc_5314_remove_metakey {} + +/// Operator from uri +#[doc = include_str!("5444_operator_from_uri.md")] +pub mod rfc_5444_operator_from_uri {} From b85c95c98b320e59a875bd801ce67a0565537758 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 24 Dec 2024 00:03:38 +0800 Subject: [PATCH 3/6] Fix link Signed-off-by: Xuanwo --- core/src/docs/rfcs/5444_operator_from_uri.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/docs/rfcs/5444_operator_from_uri.md b/core/src/docs/rfcs/5444_operator_from_uri.md index df7112f9b552..c12e80738665 100644 --- a/core/src/docs/rfcs/5444_operator_from_uri.md +++ b/core/src/docs/rfcs/5444_operator_from_uri.md @@ -1,7 +1,7 @@ - Proposal Name: `operator_from_uri` - Start Date: 2024-12-23 -- RFC PR: [apache/opendal#0000](https://github.com/apache/opendal/pull/0000) -- Tracking Issue: [apache/opendal#0000](https://github.com/apache/opendal/issues/0000) +- RFC PR: [apache/opendal#5444](https://github.com/apache/opendal/pull/5444) +- Tracking Issue: [apache/opendal#5445](https://github.com/apache/opendal/issues/5445) # Summary From c3a835967805a17ada71a4f08de48075f427eb7e Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 24 Dec 2024 20:45:48 +0800 Subject: [PATCH 4/6] Polish RFCs Signed-off-by: Xuanwo --- core/src/docs/rfcs/5444_operator_from_uri.md | 51 ++++++++------------ 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/core/src/docs/rfcs/5444_operator_from_uri.md b/core/src/docs/rfcs/5444_operator_from_uri.md index c12e80738665..95cfc29ed756 100644 --- a/core/src/docs/rfcs/5444_operator_from_uri.md +++ b/core/src/docs/rfcs/5444_operator_from_uri.md @@ -25,13 +25,16 @@ The new API allows creating operators directly from URIs: ```rust // Create an operator using URI let op = Operator::from_uri("s3://my-bucket/path", vec![ - ("access_key_id".to_string(), "xxx".to_string()), - ("secret_key_key".to_string(), "yyy".to_string()), + ("endpoint".to_string(), "http://localhost:8080"to_string()), ])?; // Create a file system operator let op = Operator::from_uri("fs:///tmp/test", vec![])?; +``` + +OpenDAL will, by default, register services enabled by features in a global `OperatorRegistry`. Users can also create custom operator registries to support their own schemes or additional options. +``` // Using with custom registry let registry = OperatorRegistry::new(); registry.register("custom", my_factory); @@ -42,14 +45,14 @@ let op = registry.parse("custom://endpoint", options)?; The implementation consists of three main components: -1. The `OperatorRegistry`: +1. The `OperatorFactory` and `OperatorRegistry`: + +`OperatorFactory` is a function type that takes a URI and a map of options and returns an `Operator`. `OperatorRegistry` manages operator factories for different schemes. ```rust type OperatorFactory = fn(http::Uri, HashMap) -> Result; -pub struct OperatorRegistry { - register: Arc>>, -} +pub struct OperatorRegistry { ... } impl OperatorRegistry { fn register(&self, scheme: &str, factory: OperatorFactory) { @@ -64,6 +67,10 @@ impl OperatorRegistry { 2. The `Configurator` trait extension: +`Configurator` will add a new API to create a configuration from a URI and options. OpenDAL will provides default implementations for common configurations. But services can override this method to support their own special needs. + +For example, S3 might need to extract the `bucket` and `region` from the URI when possible. + ```rust impl Configurator for S3Config { fn from_uri(uri: &str, options: impl IntoIterator) -> Result { @@ -72,7 +79,9 @@ impl Configurator for S3Config { } ``` -3. The `Operator` factory method: +3. The `Operator` `from_uri` method: + +The `Operator` trait will add a new `from_uri` method to create an operator from a URI and options. This method will use the global `OperatorRegistry` to find the appropriate factory for the scheme. ```rust impl Operator { @@ -80,16 +89,7 @@ impl Operator { uri: &str, options: impl IntoIterator, ) -> Result { - static REGISTRY: Lazy = Lazy::new(|| { - let registry = OperatorRegistry::new(); - // Register built-in operators - registry.register("s3", s3_factory); - registry.register("fs", fs_factory); - // ... - registry - }); - - REGISTRY.parse(uri, options) + ... } } ``` @@ -122,23 +122,14 @@ URI-based configuration was chosen because: Similar patterns exist in: -- Rust's `url` crate - Database connection strings (PostgreSQL, MongoDB) -- AWS SDK endpoint configuration -- Python's `urllib` +- [`object_store::parse_url`](https://docs.rs/object_store/latest/object_store/fn.parse_url.html) # Unresolved questions -- Should we support custom URI parsing per operator? -- How to handle scheme conflicts? -- Should we support URI validation? -- How to handle complex configurations that don't map well to URIs? +None # Future possibilities -- Support for connection string format -- URI templates for batch operations -- Custom scheme handlers -- Configuration presets -- URI validation middleware -- Dynamic operator loading based on URI schemes +- Support for connection string format. +- Configuration presets like `r2` and `s3` with directory bucket enabled. From 88483f0da0fddce33b93178310031ada9b6ee82c Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 24 Dec 2024 21:01:18 +0800 Subject: [PATCH 5/6] Fix typo Signed-off-by: Xuanwo --- core/src/docs/rfcs/5444_operator_from_uri.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/docs/rfcs/5444_operator_from_uri.md b/core/src/docs/rfcs/5444_operator_from_uri.md index 95cfc29ed756..807cbaa26cde 100644 --- a/core/src/docs/rfcs/5444_operator_from_uri.md +++ b/core/src/docs/rfcs/5444_operator_from_uri.md @@ -67,7 +67,7 @@ impl OperatorRegistry { 2. The `Configurator` trait extension: -`Configurator` will add a new API to create a configuration from a URI and options. OpenDAL will provides default implementations for common configurations. But services can override this method to support their own special needs. +`Configurator` will add a new API to create a configuration from a URI and options. OpenDAL will provide default implementations for common configurations. But services can override this method to support their own special needs. For example, S3 might need to extract the `bucket` and `region` from the URI when possible. From 9763a6e4ac7c0c203a4dd2ef179fa26f801b9bef Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 26 Dec 2024 23:33:43 +0800 Subject: [PATCH 6/6] Add example Signed-off-by: Xuanwo --- core/src/docs/rfcs/5444_operator_from_uri.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/core/src/docs/rfcs/5444_operator_from_uri.md b/core/src/docs/rfcs/5444_operator_from_uri.md index 807cbaa26cde..a2c0c9def1d7 100644 --- a/core/src/docs/rfcs/5444_operator_from_uri.md +++ b/core/src/docs/rfcs/5444_operator_from_uri.md @@ -28,6 +28,12 @@ let op = Operator::from_uri("s3://my-bucket/path", vec![ ("endpoint".to_string(), "http://localhost:8080"to_string()), ])?; +// Users can pass options through the URI along with additional key-value pairs +// The extra options will override identical options specified in the URI +let op = Operator::from_uri("s3://my-bucket/path?region=us-east-1", vec![ + ("endpoint".to_string(), "http://localhost:8080"to_string()), +])?; + // Create a file system operator let op = Operator::from_uri("fs:///tmp/test", vec![])?; ```