From 619acce47d08062d33e42bdfb23698acaa80345c Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Sun, 3 Nov 2024 16:43:44 +0100 Subject: [PATCH 01/12] feat: gcs user metadata --- core/src/services/gcs/backend.rs | 15 +++++++++++++++ core/src/services/gcs/core.rs | 25 +++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/core/src/services/gcs/backend.rs b/core/src/services/gcs/backend.rs index 4c7bc7eca69a..d7c05ffbd3d1 100644 --- a/core/src/services/gcs/backend.rs +++ b/core/src/services/gcs/backend.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use std::collections::HashMap; use std::fmt::Debug; use std::fmt::Formatter; use std::sync::Arc; @@ -363,6 +364,7 @@ impl Access for GcsBackend { write_can_empty: true, write_can_multi: true, write_with_content_type: true, + write_with_user_metadata: true, // The min multipart size of Gcs is 5 MiB. // // ref: @@ -424,6 +426,10 @@ impl Access for GcsBackend { m.set_last_modified(parse_datetime_from_rfc3339(&meta.updated)?); + if let Some(user_metadata) = meta.metadata { + m.with_user_metadata(user_metadata); + } + Ok(RpStat::new(m)) } @@ -593,6 +599,15 @@ struct GetObjectJsonResponse { /// /// For example: `"contentType": "image/png",` content_type: String, + /// Custom metadata of this object. + /// + /// For example: + /// ``` + /// "metadata" : { + /// "my-key": "my-value" + /// } + /// ``` + metadata: Option>, } #[cfg(test)] diff --git a/core/src/services/gcs/core.rs b/core/src/services/gcs/core.rs index 130af0afc460..27d6756b9801 100644 --- a/core/src/services/gcs/core.rs +++ b/core/src/services/gcs/core.rs @@ -24,6 +24,8 @@ use std::time::Duration; use backon::ExponentialBuilder; use backon::Retryable; use bytes::Bytes; +use constants::X_GOOG_ACL; +use constants::X_GOOG_STORAGE_CLASS; use http::header::CONTENT_LENGTH; use http::header::CONTENT_TYPE; use http::header::HOST; @@ -44,6 +46,13 @@ use serde_json::json; use super::uri::percent_encode_path; use crate::raw::*; use crate::*; +use constants::*; + +pub mod constants { + pub const X_GOOG_ACL: &str = "x-goog-acl"; + pub const X_GOOG_STORAGE_CLASS: &str = "x-goog-storage-class"; + pub const X_GOOG_META_PREFIX: &str = "x-goog-meta-"; +} pub struct GcsCore { pub endpoint: String, @@ -267,6 +276,12 @@ impl GcsCore { let mut req = Request::post(&url); + if let Some(user_metadata) = op.user_metadata() { + for (key, value) in user_metadata { + req = req.header(format!("{X_GOOG_META_PREFIX}{key}"), value) + } + } + req = req.header(CONTENT_LENGTH, size.unwrap_or_default()); if metadata.is_empty() { @@ -318,16 +333,22 @@ impl GcsCore { let mut req = Request::put(&url); + if let Some(user_metadata) = args.user_metadata() { + for (key, value) in user_metadata { + req = req.header(format!("{X_GOOG_META_PREFIX}{key}"), value) + } + } + if let Some(content_type) = args.content_type() { req = req.header(CONTENT_TYPE, content_type); } if let Some(acl) = &self.predefined_acl { - req = req.header("x-goog-acl", acl); + req = req.header(X_GOOG_ACL, acl); } if let Some(storage_class) = &self.default_storage_class { - req = req.header("x-goog-storage-class", storage_class); + req = req.header(X_GOOG_STORAGE_CLASS, storage_class); } let req = req.body(body).map_err(new_request_build_error)?; From b7a7c7ef651e245f485a716c3400e9991c534443 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Sun, 3 Nov 2024 16:56:57 +0100 Subject: [PATCH 02/12] docs: update broken doctest --- core/src/services/gcs/backend.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/core/src/services/gcs/backend.rs b/core/src/services/gcs/backend.rs index d7c05ffbd3d1..b8cfa88970d4 100644 --- a/core/src/services/gcs/backend.rs +++ b/core/src/services/gcs/backend.rs @@ -601,12 +601,7 @@ struct GetObjectJsonResponse { content_type: String, /// Custom metadata of this object. /// - /// For example: - /// ``` - /// "metadata" : { - /// "my-key": "my-value" - /// } - /// ``` + /// For example: `"metadata" : { "my-key": "my-value" }` metadata: Option>, } From 330bd74f3ace74f15966dfb2465191aa7c1ae5a6 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Sun, 3 Nov 2024 17:02:18 +0100 Subject: [PATCH 03/12] test: include gcs metadata deserialization in unit test --- core/src/services/gcs/backend.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/core/src/services/gcs/backend.rs b/core/src/services/gcs/backend.rs index b8cfa88970d4..96c81aae8244 100644 --- a/core/src/services/gcs/backend.rs +++ b/core/src/services/gcs/backend.rs @@ -628,7 +628,10 @@ mod tests { "etag": "CKWasoTgyPkCEAE=", "timeCreated": "2022-08-15T11:33:34.866Z", "updated": "2022-08-15T11:33:34.866Z", - "timeStorageClassUpdated": "2022-08-15T11:33:34.866Z" + "timeStorageClassUpdated": "2022-08-15T11:33:34.866Z", + "metadata" : { + "location" : "everywhere" + } }"#; let meta: GetObjectJsonResponse = @@ -639,5 +642,12 @@ mod tests { assert_eq!(meta.md5_hash, "fHcEH1vPwA6eTPqxuasXcg=="); assert_eq!(meta.etag, "CKWasoTgyPkCEAE="); assert_eq!(meta.content_type, "image/png"); + assert_eq!( + meta.metadata, + Some(HashMap::from_iter([( + "location".to_string(), + "everywhere".to_string() + )])) + ); } } From a6478efa70ac655a3e1f0ae796c79a7eac75537b Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Sun, 3 Nov 2024 18:30:09 +0100 Subject: [PATCH 04/12] feat: remove Optional in gcp stat response metadata --- core/src/services/gcs/backend.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/core/src/services/gcs/backend.rs b/core/src/services/gcs/backend.rs index 96c81aae8244..39f9b882901e 100644 --- a/core/src/services/gcs/backend.rs +++ b/core/src/services/gcs/backend.rs @@ -426,7 +426,7 @@ impl Access for GcsBackend { m.set_last_modified(parse_datetime_from_rfc3339(&meta.updated)?); - if let Some(user_metadata) = meta.metadata { + if !meta.metadata.is_empty() { m.with_user_metadata(user_metadata); } @@ -602,7 +602,7 @@ struct GetObjectJsonResponse { /// Custom metadata of this object. /// /// For example: `"metadata" : { "my-key": "my-value" }` - metadata: Option>, + metadata: HashMap, } #[cfg(test)] @@ -644,10 +644,7 @@ mod tests { assert_eq!(meta.content_type, "image/png"); assert_eq!( meta.metadata, - Some(HashMap::from_iter([( - "location".to_string(), - "everywhere".to_string() - )])) + HashMap::from_iter([("location".to_string(), "everywhere".to_string())]) ); } } From bcdc518579c70d2b47458b9c692c902b98c4f4f7 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Sun, 3 Nov 2024 18:46:21 +0100 Subject: [PATCH 05/12] fix: compilation issue --- core/src/services/gcs/backend.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/services/gcs/backend.rs b/core/src/services/gcs/backend.rs index 39f9b882901e..7ad6cfc17a5e 100644 --- a/core/src/services/gcs/backend.rs +++ b/core/src/services/gcs/backend.rs @@ -427,7 +427,7 @@ impl Access for GcsBackend { m.set_last_modified(parse_datetime_from_rfc3339(&meta.updated)?); if !meta.metadata.is_empty() { - m.with_user_metadata(user_metadata); + m.with_user_metadata(meta.metadata); } Ok(RpStat::new(m)) From c2074104d55118645770a48e92a509c07ad5e548 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Mon, 4 Nov 2024 23:25:12 +0100 Subject: [PATCH 06/12] feat: use multipart metadata --- core/src/services/gcs/core.rs | 91 ++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 38 deletions(-) diff --git a/core/src/services/gcs/core.rs b/core/src/services/gcs/core.rs index 27d6756b9801..9174e2bc4b69 100644 --- a/core/src/services/gcs/core.rs +++ b/core/src/services/gcs/core.rs @@ -41,7 +41,6 @@ use reqsign::GoogleToken; use reqsign::GoogleTokenLoader; use serde::Deserialize; use serde::Serialize; -use serde_json::json; use super::uri::percent_encode_path; use crate::raw::*; @@ -250,19 +249,18 @@ impl GcsCore { ) -> Result> { let p = build_abs_path(&self.root, path); - let mut metadata = HashMap::new(); - if let Some(storage_class) = &self.default_storage_class { - metadata.insert("storageClass", storage_class.as_str()); - } - if let Some(cache_control) = op.cache_control() { - metadata.insert("cacheControl", cache_control); - } + let mut request_metadata = InsertRequestMetadata::default(); + + request_metadata.storage_class = self.default_storage_class.as_ref().map(String::as_str); + request_metadata.cache_control = op.cache_control(); + request_metadata.content_type = op.content_type(); + request_metadata.metadata = op.user_metadata(); let mut url = format!( "{}/upload/storage/v1/b/{}/o?uploadType={}&name={}", self.endpoint, self.bucket, - if metadata.is_empty() { + if request_metadata.is_empty() { "media" } else { "multipart" @@ -276,45 +274,30 @@ impl GcsCore { let mut req = Request::post(&url); - if let Some(user_metadata) = op.user_metadata() { - for (key, value) in user_metadata { - req = req.header(format!("{X_GOOG_META_PREFIX}{key}"), value) - } - } - req = req.header(CONTENT_LENGTH, size.unwrap_or_default()); - if metadata.is_empty() { - if let Some(content_type) = op.content_type() { - req = req.header(CONTENT_TYPE, content_type); - } - + if request_metadata.is_empty() { + // If the metadata is empty, we do not set any `Content-Type` header, + // since if we had it in the `op.content_type()`, it would be alrady set in the + // `multipart` metadata body and this branch won't be executed. let req = req.body(body).map_err(new_request_build_error)?; Ok(req) } else { let mut multipart = Multipart::new(); - - multipart = multipart.part( - FormDataPart::new("metadata") - .header( - CONTENT_TYPE, - "application/json; charset=UTF-8".parse().unwrap(), - ) - .content(json!(metadata).to_string()), - ); - - let mut media_part = FormDataPart::new("media").content(body); - - if let Some(content_type) = op.content_type() { - media_part = media_part.header( + let metadata_part = FormDataPart::new("metadata") + .header( CONTENT_TYPE, - content_type - .parse() - .map_err(|_| Error::new(ErrorKind::Unexpected, "invalid header value"))?, + "application/json; charset=UTF-8".parse().unwrap(), + ) + .content( + serde_json::to_string(&request_metadata) + .expect("metadata serialization should success"), ); - } + multipart = multipart.part(metadata_part); + let media_part = FormDataPart::new("media").content(body); multipart = multipart.part(media_part); + let req = multipart.apply(Request::post(url))?; Ok(req) } @@ -629,6 +612,38 @@ impl GcsCore { } } +#[derive(Debug, Serialize)] +#[serde(default, rename_all = "camelCase")] +pub struct InsertRequestMetadata<'a> { + #[serde(skip_serializing_if = "Option::is_none")] + content_type: Option<&'a str>, + #[serde(skip_serializing_if = "Option::is_none")] + storage_class: Option<&'a str>, + #[serde(skip_serializing_if = "Option::is_none")] + cache_control: Option<&'a str>, + #[serde(skip_serializing_if = "Option::is_none")] + metadata: Option<&'a HashMap>, +} + +impl Default for InsertRequestMetadata<'_> { + fn default() -> Self { + Self { + content_type: None, + storage_class: None, + cache_control: None, + metadata: None, + } + } +} + +impl InsertRequestMetadata<'_> { + pub fn is_empty(&self) -> bool { + self.content_type.is_none() + && self.storage_class.is_none() + && self.cache_control.is_none() + && self.metadata.is_none() + } +} /// Response JSON from GCS list objects API. /// /// refer to https://cloud.google.com/storage/docs/json_api/v1/objects/list for details From 5166502b9731518b341967bc5bda1568d82813c5 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Mon, 4 Nov 2024 23:31:48 +0100 Subject: [PATCH 07/12] fix: typo --- core/src/services/gcs/core.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/services/gcs/core.rs b/core/src/services/gcs/core.rs index 9174e2bc4b69..48e0b045d1f8 100644 --- a/core/src/services/gcs/core.rs +++ b/core/src/services/gcs/core.rs @@ -278,7 +278,7 @@ impl GcsCore { if request_metadata.is_empty() { // If the metadata is empty, we do not set any `Content-Type` header, - // since if we had it in the `op.content_type()`, it would be alrady set in the + // since if we had it in the `op.content_type()`, it would be already set in the // `multipart` metadata body and this branch won't be executed. let req = req.body(body).map_err(new_request_build_error)?; Ok(req) From d7af0ca339148231439ae43c37e2a69811945f12 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Mon, 4 Nov 2024 23:32:41 +0100 Subject: [PATCH 08/12] feat: optimize imports --- core/src/services/gcs/core.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/src/services/gcs/core.rs b/core/src/services/gcs/core.rs index 48e0b045d1f8..f602694fd8d0 100644 --- a/core/src/services/gcs/core.rs +++ b/core/src/services/gcs/core.rs @@ -24,8 +24,6 @@ use std::time::Duration; use backon::ExponentialBuilder; use backon::Retryable; use bytes::Bytes; -use constants::X_GOOG_ACL; -use constants::X_GOOG_STORAGE_CLASS; use http::header::CONTENT_LENGTH; use http::header::CONTENT_TYPE; use http::header::HOST; From de8e57ad60520360c5ca091d15a7f4c87948a7db Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Mon, 4 Nov 2024 23:37:59 +0100 Subject: [PATCH 09/12] refactor: use Default derive --- core/src/services/gcs/core.rs | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/core/src/services/gcs/core.rs b/core/src/services/gcs/core.rs index f602694fd8d0..a1683c9ed131 100644 --- a/core/src/services/gcs/core.rs +++ b/core/src/services/gcs/core.rs @@ -610,7 +610,7 @@ impl GcsCore { } } -#[derive(Debug, Serialize)] +#[derive(Debug, Serialize, Default)] #[serde(default, rename_all = "camelCase")] pub struct InsertRequestMetadata<'a> { #[serde(skip_serializing_if = "Option::is_none")] @@ -623,17 +623,6 @@ pub struct InsertRequestMetadata<'a> { metadata: Option<&'a HashMap>, } -impl Default for InsertRequestMetadata<'_> { - fn default() -> Self { - Self { - content_type: None, - storage_class: None, - cache_control: None, - metadata: None, - } - } -} - impl InsertRequestMetadata<'_> { pub fn is_empty(&self) -> bool { self.content_type.is_none() From 3a919d307ad396569313887e0500127964773199 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Tue, 5 Nov 2024 00:00:50 +0100 Subject: [PATCH 10/12] fix: clippy lints --- core/src/services/gcs/core.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/core/src/services/gcs/core.rs b/core/src/services/gcs/core.rs index a1683c9ed131..255c3f906f3f 100644 --- a/core/src/services/gcs/core.rs +++ b/core/src/services/gcs/core.rs @@ -247,12 +247,12 @@ impl GcsCore { ) -> Result> { let p = build_abs_path(&self.root, path); - let mut request_metadata = InsertRequestMetadata::default(); - - request_metadata.storage_class = self.default_storage_class.as_ref().map(String::as_str); - request_metadata.cache_control = op.cache_control(); - request_metadata.content_type = op.content_type(); - request_metadata.metadata = op.user_metadata(); + let mut request_metadata = InsertRequestMetadata { + storage_class: self.default_storage_class.as_deref(), + cache_control: op.cache_control(), + content_type: op.content_type(), + metadata: op.user_metadata(), + }; let mut url = format!( "{}/upload/storage/v1/b/{}/o?uploadType={}&name={}", @@ -610,7 +610,7 @@ impl GcsCore { } } -#[derive(Debug, Serialize, Default)] +#[derive(Debug, Serialize)] #[serde(default, rename_all = "camelCase")] pub struct InsertRequestMetadata<'a> { #[serde(skip_serializing_if = "Option::is_none")] From af0cd9bfa2efa0540694a124bb994ca6c8f000ac Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Tue, 5 Nov 2024 00:13:37 +0100 Subject: [PATCH 11/12] fix: clippy lints --- core/src/services/gcs/core.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/services/gcs/core.rs b/core/src/services/gcs/core.rs index 255c3f906f3f..b00c0e19cae1 100644 --- a/core/src/services/gcs/core.rs +++ b/core/src/services/gcs/core.rs @@ -247,7 +247,7 @@ impl GcsCore { ) -> Result> { let p = build_abs_path(&self.root, path); - let mut request_metadata = InsertRequestMetadata { + let request_metadata = InsertRequestMetadata { storage_class: self.default_storage_class.as_deref(), cache_control: op.cache_control(), content_type: op.content_type(), From 5cd78085d24ac96f8faccb412bffdf441ed23de3 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Tue, 5 Nov 2024 18:12:55 +0100 Subject: [PATCH 12/12] feat: use serde_json::to_vec --- core/src/services/gcs/core.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/services/gcs/core.rs b/core/src/services/gcs/core.rs index b00c0e19cae1..86d73bc2e528 100644 --- a/core/src/services/gcs/core.rs +++ b/core/src/services/gcs/core.rs @@ -288,7 +288,7 @@ impl GcsCore { "application/json; charset=UTF-8".parse().unwrap(), ) .content( - serde_json::to_string(&request_metadata) + serde_json::to_vec(&request_metadata) .expect("metadata serialization should success"), ); multipart = multipart.part(metadata_part);