Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wrap URLs in a newtype that enforces HTTP-ness #181

Merged
merged 1 commit into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ In Development
- Reduced the sizes of a number of streams & futures
- Added doc comments to much of the code
- Return 502 status when a backend returns an invalid response
- Require `--api-url` (and other URLs retrieved from APIs) to be HTTP(S)

v0.4.0 (2024-07-09)
-------------------
Expand Down
33 changes: 16 additions & 17 deletions src/dandi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pub(crate) use self::types::*;
pub(crate) use self::version_id::*;
use crate::consts::S3CLIENT_CACHE_SIZE;
use crate::dav::ErrorClass;
use crate::httputil::{urljoin_slashed, BuildClientError, Client, HttpError};
use crate::httputil::{BuildClientError, Client, HttpError, HttpUrl};
use crate::paths::{ParsePureDirPathError, PureDirPath, PurePath};
use crate::s3::{
BucketSpec, GetBucketRegionError, PrefixedS3Client, S3Client, S3Error, S3Location,
Expand All @@ -20,7 +20,6 @@ use serde::de::DeserializeOwned;
use smartstring::alias::CompactString;
use std::sync::Arc;
use thiserror::Error;
use url::Url;

/// A client for fetching data about Dandisets, their versions, and their
/// assets from a DANDI Archive instance
Expand All @@ -30,7 +29,7 @@ pub(crate) struct DandiClient {
inner: Client,

/// The base API URL of the Archive instance
api_url: Url,
api_url: HttpUrl,

/// A cache of [`S3Client`] instances that are used for listing Zarr
/// entries on the Archive's S3 bucket.
Expand All @@ -51,7 +50,7 @@ impl DandiClient {
/// # Errors
///
/// Returns an error if construction of the inner `reqwest::Client` fails
pub(crate) fn new(api_url: Url) -> Result<Self, BuildClientError> {
pub(crate) fn new(api_url: HttpUrl) -> Result<Self, BuildClientError> {
let inner = Client::new()?;
let s3clients = CacheBuilder::new(S3CLIENT_CACHE_SIZE)
.name("s3clients")
Expand All @@ -65,24 +64,26 @@ impl DandiClient {

/// Return the URL formed by appending the given path segments and a
/// trailing slash to the path of the API base URL
fn get_url<I>(&self, segments: I) -> Url
fn get_url<I>(&self, segments: I) -> HttpUrl
where
I: IntoIterator,
I::Item: AsRef<str>,
{
urljoin_slashed(&self.api_url, segments)
let mut url = self.api_url.clone();
url.extend(segments).ensure_dirpath();
url
}

/// Perform a `GET` request to the given URL and return the deserialized
/// JSON response body
async fn get<T: DeserializeOwned>(&self, url: Url) -> Result<T, DandiError> {
async fn get<T: DeserializeOwned>(&self, url: HttpUrl) -> Result<T, DandiError> {
self.inner.get_json(url).await.map_err(Into::into)
}

/// Return a [`futures_util::Stream`] that makes paginated `GET` requests
/// to the given URL and its subsequent pages and yields a `Result<T,
/// DandiError>` value for each item deserialized from the responses
fn paginate<T: DeserializeOwned + 'static>(&self, url: Url) -> Paginate<T> {
fn paginate<T: DeserializeOwned + 'static>(&self, url: HttpUrl) -> Paginate<T> {
Paginate::new(self, url)
}

Expand Down Expand Up @@ -158,7 +159,7 @@ impl DandiClient {

/// Return the URL for the metadata for the given version of the given
/// Dandiset
fn version_metadata_url(&self, dandiset_id: &DandisetId, version_id: &VersionId) -> Url {
fn version_metadata_url(&self, dandiset_id: &DandisetId, version_id: &VersionId) -> HttpUrl {
self.get_url([
"dandisets",
dandiset_id.as_ref(),
Expand Down Expand Up @@ -396,7 +397,7 @@ impl<'a> VersionEndpoint<'a> {
}

/// Return the URL for the version's metadata
fn metadata_url(&self) -> Url {
fn metadata_url(&self) -> HttpUrl {
self.client
.version_metadata_url(&self.dandiset_id, &self.version_id)
}
Expand All @@ -421,7 +422,7 @@ impl<'a> VersionEndpoint<'a> {

/// Return the URL for the metadata of the asset in this version with the
/// given asset ID
fn asset_metadata_url(&self, asset_id: &str) -> Url {
fn asset_metadata_url(&self, asset_id: &str) -> HttpUrl {
self.client.get_url([
"dandisets",
self.dandiset_id.as_ref(),
Expand All @@ -447,10 +448,9 @@ impl<'a> VersionEndpoint<'a> {
self.version_id.as_ref(),
"assets",
]);
url.query_pairs_mut()
.append_pair("path", path.as_ref())
.append_pair("metadata", "1")
.append_pair("order", "path");
url.append_query_param("path", path.as_ref());
url.append_query_param("metadata", "1");
url.append_query_param("order", "path");
let dirpath = path.to_dir_path();
let mut stream = self.client.paginate::<RawAsset>(url.clone());
while let Some(asset) = stream.try_next().await? {
Expand Down Expand Up @@ -480,8 +480,7 @@ impl<'a> VersionEndpoint<'a> {
"paths",
]);
if let Some(path) = path {
url.query_pairs_mut()
.append_pair("path_prefix", path.as_ref());
url.append_query_param("path_prefix", path.as_ref());
}
self.client.paginate(url)
}
Expand Down
16 changes: 10 additions & 6 deletions src/dandi/streams.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
use super::types::Page;
use super::{DandiClient, DandiError};
use crate::httputil::{Client, HttpError};
use crate::httputil::{Client, HttpError, HttpUrl};
use futures_util::{future::BoxFuture, FutureExt, Stream};
use pin_project::pin_project;
use serde::de::DeserializeOwned;
use serde::{de::DeserializeOwned, Deserialize};
use std::pin::Pin;
use std::task::{ready, Context, Poll};
use url::Url;

// Implementing paginate() as a manually-implemented Stream instead of via
// async_stream lets us save about 4700 bytes on dandidav's top-level Futures.
Expand All @@ -21,13 +19,13 @@ enum PaginateState<T> {
Requesting(BoxFuture<'static, Result<Page<T>, HttpError>>),
Yielding {
results: std::vec::IntoIter<T>,
next: Option<Url>,
next: Option<HttpUrl>,
},
Done,
}

impl<T> Paginate<T> {
pub(super) fn new(client: &DandiClient, url: Url) -> Self {
pub(super) fn new(client: &DandiClient, url: HttpUrl) -> Self {
Paginate {
client: client.inner.clone(),
state: PaginateState::Yielding {
Expand Down Expand Up @@ -78,3 +76,9 @@ where
}
}
}

#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
struct Page<T> {
next: Option<HttpUrl>,
results: Vec<T>,
}
30 changes: 12 additions & 18 deletions src/dandi/types.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
use super::{DandisetId, VersionId};
use crate::httputil::HttpUrl;
use crate::paths::{PureDirPath, PurePath};
use crate::s3::{PrefixedS3Client, S3Entry, S3Folder, S3Location, S3Object};
use serde::Deserialize;
use thiserror::Error;
use time::OffsetDateTime;
use url::Url;

#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
pub(super) struct Page<T> {
pub(super) next: Option<Url>,
pub(super) results: Vec<T>,
}

#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
pub(super) struct RawDandiset {
Expand Down Expand Up @@ -67,7 +61,7 @@ pub(super) struct RawDandisetVersion {
}

impl RawDandisetVersion {
pub(super) fn with_metadata_url(self, metadata_url: Url) -> DandisetVersion {
pub(super) fn with_metadata_url(self, metadata_url: HttpUrl) -> DandisetVersion {
DandisetVersion {
version: self.version,
size: self.size,
Expand All @@ -84,7 +78,7 @@ pub(crate) struct DandisetVersion {
pub(crate) size: i64,
pub(crate) created: OffsetDateTime,
pub(crate) modified: OffsetDateTime,
pub(crate) metadata_url: Url,
pub(crate) metadata_url: HttpUrl,
}

#[derive(Clone, Debug, Eq, PartialEq)]
Expand Down Expand Up @@ -166,7 +160,7 @@ pub(crate) struct BlobAsset {
pub(crate) created: OffsetDateTime,
pub(crate) modified: OffsetDateTime,
pub(crate) metadata: AssetMetadata,
pub(crate) metadata_url: Url,
pub(crate) metadata_url: HttpUrl,
}

impl BlobAsset {
Expand All @@ -178,18 +172,18 @@ impl BlobAsset {
self.metadata.digest.dandi_etag.as_deref()
}

pub(crate) fn archive_url(&self) -> Option<&Url> {
pub(crate) fn archive_url(&self) -> Option<&HttpUrl> {
self.metadata
.content_url
.iter()
.find(|url| S3Location::parse_url(url).is_err())
.find(|url| S3Location::parse_url(url.as_url()).is_err())
}

pub(crate) fn s3_url(&self) -> Option<&Url> {
pub(crate) fn s3_url(&self) -> Option<&HttpUrl> {
self.metadata
.content_url
.iter()
.find(|url| S3Location::parse_url(url).is_ok())
.find(|url| S3Location::parse_url(url.as_url()).is_ok())
}
}

Expand All @@ -202,15 +196,15 @@ pub(crate) struct ZarrAsset {
pub(crate) created: OffsetDateTime,
pub(crate) modified: OffsetDateTime,
pub(crate) metadata: AssetMetadata,
pub(crate) metadata_url: Url,
pub(crate) metadata_url: HttpUrl,
}

impl ZarrAsset {
pub(crate) fn s3location(&self) -> Option<S3Location> {
self.metadata
.content_url
.iter()
.find_map(|url| S3Location::parse_url(url).ok())
.find_map(|url| S3Location::parse_url(url.as_url()).ok())
}

pub(crate) fn make_resource(&self, value: S3Entry) -> DandiResource {
Expand Down Expand Up @@ -246,7 +240,7 @@ impl ZarrAsset {
#[serde(rename_all = "camelCase")]
pub(crate) struct AssetMetadata {
encoding_format: Option<String>,
content_url: Vec<Url>,
content_url: Vec<HttpUrl>,
digest: AssetDigests,
}

Expand Down Expand Up @@ -374,7 +368,7 @@ pub(crate) struct ZarrEntry {
pub(crate) size: i64,
pub(crate) modified: OffsetDateTime,
pub(crate) etag: String,
pub(crate) url: Url,
pub(crate) url: HttpUrl,
}

#[derive(Clone, Debug)]
Expand Down
12 changes: 6 additions & 6 deletions src/dav/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ use super::xml::{PropValue, Property};
use super::VersionSpec;
use crate::consts::{DEFAULT_CONTENT_TYPE, YAML_CONTENT_TYPE};
use crate::dandi::*;
use crate::httputil::HttpUrl;
use crate::paths::{PureDirPath, PurePath};
use crate::zarrman::*;
use enum_dispatch::enum_dispatch;
use serde::{ser::Serializer, Serialize};
use time::OffsetDateTime;
use url::Url;

/// Trait for querying the values of WebDAV properties from WebDAV resources
///
Expand Down Expand Up @@ -282,7 +282,7 @@ pub(super) struct DavCollection {

/// A URL for retrieving the resource's associated metadata (if any) from
/// the Archive instance
pub(super) metadata_url: Option<Url>,
pub(super) metadata_url: Option<HttpUrl>,
}

impl DavCollection {
Expand Down Expand Up @@ -552,7 +552,7 @@ pub(super) struct DavItem {

/// A URL for retrieving the resource's associated metadata (if any) from
/// the Archive instance
pub(super) metadata_url: Option<Url>,
pub(super) metadata_url: Option<HttpUrl>,
}

impl DavItem {
Expand Down Expand Up @@ -727,19 +727,19 @@ pub(super) enum DavContent {
#[derive(Clone, Debug, Eq, PartialEq)]
pub(super) enum Redirect {
/// A single URL to always redirect to
Direct(Url),
Direct(HttpUrl),

/// An S3 URL and an Archive instance URL, to be selected between based on
/// whether `--prefer-s3-redirects` was supplied at program invocation
Alt { s3: Url, archive: Url },
Alt { s3: HttpUrl, archive: HttpUrl },
}

impl Redirect {
/// Resolve to a single URL.
///
/// If `prefer_s3` is `true`, `Alt` variants resolve to their `s3` field;
/// otherwise, they resolve to their `archive` field.
pub(super) fn get_url(&self, prefer_s3: bool) -> &Url {
pub(super) fn get_url(&self, prefer_s3: bool) -> &HttpUrl {
match self {
Redirect::Direct(u) => u,
Redirect::Alt { s3, archive } => {
Expand Down
11 changes: 6 additions & 5 deletions src/dav/util.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use super::VersionSpec;
use crate::consts::DAV_XML_CONTENT_TYPE;
use crate::dandi::DandisetId;
use crate::httputil::HttpUrl;
use crate::paths::PureDirPath;
use axum::{
async_trait,
Expand Down Expand Up @@ -129,14 +130,14 @@ impl AsRef<str> for Href {
}
}

impl From<url::Url> for Href {
fn from(value: url::Url) -> Href {
Href(value.into())
impl From<HttpUrl> for Href {
fn from(value: HttpUrl) -> Href {
Href(value.to_string())
}
}

impl From<&url::Url> for Href {
fn from(value: &url::Url) -> Href {
impl From<&HttpUrl> for Href {
fn from(value: &HttpUrl) -> Href {
Href(value.to_string())
}
}
Expand Down
4 changes: 3 additions & 1 deletion src/dav/xml/multistatus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ pub(crate) enum ToXmlError {
#[cfg(test)]
mod tests {
use super::*;
use crate::httputil::HttpUrl;
use indoc::indoc;
use pretty_assertions::assert_eq;

Expand Down Expand Up @@ -222,7 +223,8 @@ mod tests {
status: "HTTP/1.1 307 TEMPORARY REDIRECT".into(),
}],
location: Some(
url::Url::parse("https://www.example.com/data/quux.dat")
"https://www.example.com/data/quux.dat"
.parse::<HttpUrl>()
.unwrap()
.into(),
),
Expand Down
Loading