Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(core/services-gcs): support user defined metadata #5276

Merged
merged 14 commits into from
Nov 6, 2024
19 changes: 18 additions & 1 deletion core/src/services/gcs/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use std::collections::HashMap;
use std::fmt::Debug;
use std::fmt::Formatter;
use std::sync::Arc;
Expand Down Expand Up @@ -363,6 +364,7 @@ impl Access for GcsBackend {
write_can_empty: true,
write_can_multi: true,
write_with_content_type: true,
write_with_user_metadata: true,
// The min multipart size of Gcs is 5 MiB.
//
// ref: <https://cloud.google.com/storage/docs/xml-api/put-object-multipart>
Expand Down Expand Up @@ -424,6 +426,10 @@ impl Access for GcsBackend {

m.set_last_modified(parse_datetime_from_rfc3339(&meta.updated)?);

if !meta.metadata.is_empty() {
m.with_user_metadata(meta.metadata);
}

Ok(RpStat::new(m))
}

Expand Down Expand Up @@ -593,6 +599,10 @@ struct GetObjectJsonResponse {
///
/// For example: `"contentType": "image/png",`
content_type: String,
/// Custom metadata of this object.
///
/// For example: `"metadata" : { "my-key": "my-value" }`
metadata: HashMap<String, String>,
}

#[cfg(test)]
Expand All @@ -618,7 +628,10 @@ mod tests {
"etag": "CKWasoTgyPkCEAE=",
"timeCreated": "2022-08-15T11:33:34.866Z",
"updated": "2022-08-15T11:33:34.866Z",
"timeStorageClassUpdated": "2022-08-15T11:33:34.866Z"
"timeStorageClassUpdated": "2022-08-15T11:33:34.866Z",
"metadata" : {
"location" : "everywhere"
}
}"#;

let meta: GetObjectJsonResponse =
Expand All @@ -629,5 +642,9 @@ mod tests {
assert_eq!(meta.md5_hash, "fHcEH1vPwA6eTPqxuasXcg==");
assert_eq!(meta.etag, "CKWasoTgyPkCEAE=");
assert_eq!(meta.content_type, "image/png");
assert_eq!(
meta.metadata,
HashMap::from_iter([("location".to_string(), "everywhere".to_string())])
);
}
}
91 changes: 57 additions & 34 deletions core/src/services/gcs/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,17 @@ use reqsign::GoogleToken;
use reqsign::GoogleTokenLoader;
use serde::Deserialize;
use serde::Serialize;
use serde_json::json;

use super::uri::percent_encode_path;
use crate::raw::*;
use crate::*;
use constants::*;

pub mod constants {
pub const X_GOOG_ACL: &str = "x-goog-acl";
pub const X_GOOG_STORAGE_CLASS: &str = "x-goog-storage-class";
pub const X_GOOG_META_PREFIX: &str = "x-goog-meta-";
}

pub struct GcsCore {
pub endpoint: String,
Expand Down Expand Up @@ -241,19 +247,18 @@ impl GcsCore {
) -> Result<Request<Buffer>> {
let p = build_abs_path(&self.root, path);

let mut metadata = HashMap::new();
if let Some(storage_class) = &self.default_storage_class {
metadata.insert("storageClass", storage_class.as_str());
}
if let Some(cache_control) = op.cache_control() {
metadata.insert("cacheControl", cache_control);
}
let request_metadata = InsertRequestMetadata {
storage_class: self.default_storage_class.as_deref(),
cache_control: op.cache_control(),
content_type: op.content_type(),
metadata: op.user_metadata(),
};

let mut url = format!(
"{}/upload/storage/v1/b/{}/o?uploadType={}&name={}",
self.endpoint,
self.bucket,
if metadata.is_empty() {
if request_metadata.is_empty() {
"media"
} else {
"multipart"
Expand All @@ -269,37 +274,28 @@ impl GcsCore {

req = req.header(CONTENT_LENGTH, size.unwrap_or_default());

if metadata.is_empty() {
if let Some(content_type) = op.content_type() {
req = req.header(CONTENT_TYPE, content_type);
}

if request_metadata.is_empty() {
jorgehermo9 marked this conversation as resolved.
Show resolved Hide resolved
// If the metadata is empty, we do not set any `Content-Type` header,
// since if we had it in the `op.content_type()`, it would be already set in the
// `multipart` metadata body and this branch won't be executed.
let req = req.body(body).map_err(new_request_build_error)?;
Ok(req)
} else {
let mut multipart = Multipart::new();

multipart = multipart.part(
FormDataPart::new("metadata")
.header(
CONTENT_TYPE,
"application/json; charset=UTF-8".parse().unwrap(),
)
.content(json!(metadata).to_string()),
);

let mut media_part = FormDataPart::new("media").content(body);

if let Some(content_type) = op.content_type() {
media_part = media_part.header(
let metadata_part = FormDataPart::new("metadata")
.header(
CONTENT_TYPE,
content_type
.parse()
.map_err(|_| Error::new(ErrorKind::Unexpected, "invalid header value"))?,
"application/json; charset=UTF-8".parse().unwrap(),
)
.content(
serde_json::to_vec(&request_metadata)
.expect("metadata serialization should success"),
);
}
multipart = multipart.part(metadata_part);

let media_part = FormDataPart::new("media").content(body);
multipart = multipart.part(media_part);

let req = multipart.apply(Request::post(url))?;
Ok(req)
}
Expand All @@ -318,16 +314,22 @@ impl GcsCore {

let mut req = Request::put(&url);

if let Some(user_metadata) = args.user_metadata() {
jorgehermo9 marked this conversation as resolved.
Show resolved Hide resolved
for (key, value) in user_metadata {
jorgehermo9 marked this conversation as resolved.
Show resolved Hide resolved
req = req.header(format!("{X_GOOG_META_PREFIX}{key}"), value)
}
}

if let Some(content_type) = args.content_type() {
req = req.header(CONTENT_TYPE, content_type);
}

if let Some(acl) = &self.predefined_acl {
req = req.header("x-goog-acl", acl);
req = req.header(X_GOOG_ACL, acl);
jorgehermo9 marked this conversation as resolved.
Show resolved Hide resolved
}

if let Some(storage_class) = &self.default_storage_class {
req = req.header("x-goog-storage-class", storage_class);
req = req.header(X_GOOG_STORAGE_CLASS, storage_class);
}

let req = req.body(body).map_err(new_request_build_error)?;
Expand Down Expand Up @@ -608,6 +610,27 @@ impl GcsCore {
}
}

#[derive(Debug, Serialize)]
#[serde(default, rename_all = "camelCase")]
pub struct InsertRequestMetadata<'a> {
#[serde(skip_serializing_if = "Option::is_none")]
content_type: Option<&'a str>,
#[serde(skip_serializing_if = "Option::is_none")]
storage_class: Option<&'a str>,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<&'a str>,
#[serde(skip_serializing_if = "Option::is_none")]
metadata: Option<&'a HashMap<String, String>>,
}

impl InsertRequestMetadata<'_> {
pub fn is_empty(&self) -> bool {
self.content_type.is_none()
&& self.storage_class.is_none()
&& self.cache_control.is_none()
&& self.metadata.is_none()
}
}
/// Response JSON from GCS list objects API.
///
/// refer to https://cloud.google.com/storage/docs/json_api/v1/objects/list for details
Expand Down
Loading