From b54b4039204a1a49096113a791fb55c6cdde8c4d Mon Sep 17 00:00:00 2001 From: Jessica Black Date: Fri, 18 Oct 2024 20:07:11 -0700 Subject: [PATCH] Simplify parser, fix `ToSchema` types --- Cargo.toml | 3 +- src/lib.rs | 110 +++++++++++------------ src/locator.rs | 200 +++++++++++++++++++++++------------------ src/locator_package.rs | 55 +++++++----- src/locator_strict.rs | 56 +++++++----- 5 files changed, 233 insertions(+), 191 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8ed42ab..3acd657 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,9 +6,7 @@ edition = "2021" [dependencies] alphanumeric-sort = "1.5.3" getset = "0.1.2" -lazy_static = "1.4.0" pretty_assertions = "1.4.0" -regex = "1.6.0" serde = { version = "1.0.140", features = ["derive"] } strum = { version = "0.24.1", features = ["derive"] } thiserror = "1.0.31" @@ -18,6 +16,7 @@ documented = "0.7.1" semver = "1.0.23" bon = "2.3.0" duplicate = "2.0.0" +lazy-regex = { version = "3.3.0", features = ["regex"] } [dev-dependencies] assert_matches = "1.5.0" diff --git a/src/lib.rs b/src/lib.rs index 98333b4..69b68eb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,12 +3,10 @@ #![deny(missing_docs)] #![warn(rust_2018_idioms)] -use std::{borrow::Cow, str::FromStr}; +use std::{borrow::Cow, num::ParseIntError, str::FromStr}; use documented::Documented; use duplicate::duplicate; -use lazy_static::lazy_static; -use regex::Regex; use serde::{Deserialize, Serialize}; use strum::{AsRefStr, Display, EnumIter, EnumString}; use utoipa::ToSchema; @@ -198,6 +196,15 @@ impl From for OrgId { } } +impl FromStr for OrgId { + type Err = ParseIntError; + + fn from_str(s: &str) -> Result { + let id = s.parse()?; + Ok(Self(id)) + } +} + duplicate! { [ number; @@ -391,41 +398,34 @@ impl std::cmp::PartialOrd for Revision { } /// Optionally parse an org ID and trimmed package out of a package string. -fn parse_org_package(package: &str) -> Result<(Option, Package), PackageParseError> { - lazy_static! { - static ref RE: Regex = Regex::new(r"^(?:(?P\d+)/)?(?P.+)") - .expect("Package parsing expression must compile"); - } - - let mut captures = RE.captures_iter(package); - let capture = captures.next().ok_or_else(|| PackageParseError::Package { - package: package.to_string(), - })?; - - let trimmed_package = - capture - .name("package") - .map(|m| m.as_str()) - .ok_or_else(|| PackageParseError::Field { - package: package.to_string(), - field: String::from("package"), - })?; - - // If we fail to parse the org_id as a valid number, don't fail the overall parse; - // just don't namespace to org ID and return the input unmodified. - match capture - .name("org_id") - .map(|m| m.as_str()) - .map(OrgId::try_from) - { - // An org ID was provided and validly parsed, use it. - Some(Ok(org_id)) => Ok((Some(org_id), Package::from(trimmed_package))), - - // Otherwise, if we either didn't get an org ID section, - // or it wasn't a valid org ID, - // just use the package as-is. - _ => Ok((None, Package::from(package))), +fn parse_org_package(input: &str) -> (Option, Package) { + macro_rules! construct { + ($org_id:expr, $package:expr) => { + return (Some($org_id), Package::from($package)) + }; + ($package:expr) => { + return (None, Package::from($package)) + }; } + + // No `/`? This must not be namespaced. + let Some((org_id, package)) = input.split_once('/') else { + construct!(input); + }; + + // Nothing before or after the `/`? Still not namespaced. + if org_id.is_empty() || package.is_empty() { + construct!(input); + }; + + // If the part before the `/` isn't a number, it can't be a namespaced org id. + let Ok(org_id) = org_id.parse() else { + construct!(input) + }; + + // Ok, there was text before and after the `/`, and the content before was a number. + // Finally, we've got a namespaced package. + construct!(org_id, package) } #[cfg(test)] @@ -449,27 +449,23 @@ mod tests { }; } - #[test_case(Some(OrgId(0)), Package::new("name"); "0/name")] - #[test_case(Some(OrgId(1)), Package::new("name"); "1/name")] - #[test_case(Some(OrgId(1)), Package::new("name/foo"); "1/name/foo")] - #[test_case(Some(OrgId(9809572)), Package::new("name/foo"); "9809572/name/foo")] - #[test_case(None, Package::new("name/foo"); "name/foo")] - #[test_case(None, Package::new("name"); "name")] - #[test_case(None, Package::new("/name/foo"); "/name/foo")] - #[test_case(None, Package::new("/name"); "/name")] - #[test_case(None, Package::new("abcd/1234/name"); "abcd/1234/name")] - #[test_case(None, Package::new("1abc2/name"); "1abc2/name")] + #[test_case("0/name", Some(OrgId(0)), Package::new("name"); "0/name")] + #[test_case("1/name", Some(OrgId(1)), Package::new("name"); "1/name")] + #[test_case("1/name/foo", Some(OrgId(1)), Package::new("name/foo"); "1/name/foo")] + #[test_case("1//name/foo", Some(OrgId(1)), Package::new("/name/foo"); "doubleslash_1/name/foo")] + #[test_case("9809572/name/foo", Some(OrgId(9809572)), Package::new("name/foo"); "9809572/name/foo")] + #[test_case("name/foo", None, Package::new("name/foo"); "name/foo")] + #[test_case("name", None, Package::new("name"); "name")] + #[test_case("/name/foo", None, Package::new("/name/foo"); "/name/foo")] + #[test_case("/name", None, Package::new("/name"); "/name")] + #[test_case("abcd/1234/name", None, Package::new("abcd/1234/name"); "abcd/1234/name")] + #[test_case("1abc2/name", None, Package::new("1abc2/name"); "1abc2/name")] + #[test_case("name/1234", None, Package::new("name/1234"); "name/1234")] #[test] - fn parse_org_package(org: Option, package: Package) { - let test = match org { - Some(id) => format!("{id}/{package}"), - None => format!("{package}"), - }; - let Ok((org_id, name)) = parse_org_package(&test) else { - panic!("must parse '{test}'") - }; - assert_eq!(org_id, org, "'org_id' must match in '{test}'"); - assert_eq!(package, name, "'package' must match in '{test}"); + fn parse_org_package(input: &str, org: Option, package: Package) { + let (org_id, name) = parse_org_package(input); + assert_eq!(org_id, org, "'org_id' must match in '{input}'"); + assert_eq!(package, name, "'package' must match in '{input}"); } #[test_case(r#""rpm-generic""#, Fetcher::LinuxRpm; "rpm-generic")] diff --git a/src/locator.rs b/src/locator.rs index d40ab9d..1d72828 100644 --- a/src/locator.rs +++ b/src/locator.rs @@ -1,12 +1,14 @@ -use std::{fmt::Display, str::FromStr}; +use std::{borrow::Cow, fmt::Display, str::FromStr}; use bon::Builder; use documented::Documented; use getset::{CopyGetters, Getters}; -use lazy_static::lazy_static; -use regex::Regex; use serde::{Deserialize, Serialize}; -use utoipa::ToSchema; +use serde_json::json; +use utoipa::{ + openapi::{schema, ObjectBuilder, RefOr, Schema}, + PartialSchema, ToSchema, +}; use crate::{ parse_org_package, Error, Fetcher, OrgId, Package, PackageLocator, ParseError, Revision, @@ -61,6 +63,29 @@ macro_rules! locator { }; } +/// The regular expression used to parse the locator. +/// +/// ``` +/// # use locator::locator_regex; +/// +/// // Get the raw string used for the expression. +/// let expression = locator_regex!(); +/// +/// // Parse the regular expression. +/// // The expression is compiled once per callsite. +/// let parsed = locator_regex!(parse => "git+github.com/fossas/locator-rs$v2.2.0"); +/// ``` +#[macro_export] +#[doc(hidden)] +macro_rules! locator_regex { + () => { + r"^(?:([a-z-]+)\+|)([^$]+)(?:\$|)(.+|)$" + }; + (parse => $input:expr) => { + lazy_regex::regex_captures!(r"^(?:([a-z-]+)\+|)([^$]+)(?:\$|)(.+|)$", $input) + }; +} + /// Core, and most services that interact with Core, /// refer to open source packages via the `Locator` type. /// @@ -111,30 +136,11 @@ macro_rules! locator { /// /// This parse function is based on the function used in FOSSA Core for maximal compatibility. #[derive( - Clone, - Eq, - PartialEq, - Ord, - PartialOrd, - Hash, - Debug, - Builder, - Getters, - CopyGetters, - Documented, - ToSchema, -)] -#[schema( - examples( - json!("git+github.com/fossas/example$1234"), - json!("npm+lodash$1.0.0"), - json!("npm+lodash"), - json!("mvn+1234/org.custom.mylib:mylib$1.0.0:jar")), + Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Builder, Getters, CopyGetters, Documented, )] pub struct Locator { /// Determines which fetcher is used to download this package. #[getset(get_copy = "pub")] - #[schema(ignore)] fetcher: Fetcher, /// Specifies the organization ID to which this package is namespaced. @@ -157,7 +163,6 @@ pub struct Locator { /// - A private NPM package that is hosted on NPM but requires credentials is namespaced. #[builder(into)] #[getset(get_copy = "pub")] - #[schema(ignore)] org_id: Option, /// Specifies the unique identifier for the package by fetcher. @@ -166,7 +171,6 @@ pub struct Locator { /// uses a value in the form of `{user_name}/{package_name}`. #[builder(into)] #[getset(get = "pub")] - #[schema(ignore)] package: Package, /// Specifies the version for the package by fetcher. @@ -176,76 +180,77 @@ pub struct Locator { /// and the fetcher disambiguates. #[builder(into)] #[getset(get = "pub")] - #[schema(ignore)] revision: Option, } impl Locator { + /// The regular expression used to parse locators. + pub const REGEX: &'static str = locator_regex!(); + /// Parse a `Locator`. /// For details, see the parsing section on [`Locator`]. pub fn parse(locator: &str) -> Result { - lazy_static! { - static ref RE: Regex = Regex::new( - r"^(?:(?P[a-z-]+)\+|)(?P[^$]+)(?:\$|)(?P.+|)$" - ) - .expect("Locator parsing expression must compile"); + macro_rules! fatal { + (syntax; $inner:expr) => { + ParseError::Syntax { + input: $inner.into(), + } + }; + (field => $name:expr; $inner:expr) => { + ParseError::Field { + input: $inner.into(), + field: $name.into(), + } + }; + (fetcher => $fetcher:expr, error => $err:expr; $inner:expr) => { + ParseError::Fetcher { + input: $inner.into(), + fetcher: $fetcher.into(), + error: $err.into(), + } + }; + (package => $package:expr, error => $err:expr; $inner:expr) => { + ParseError::Package { + input: $inner.into(), + package: $package.into(), + error: $err.into(), + } + }; } - let mut captures = RE.captures_iter(locator); - let capture = captures.next().ok_or_else(|| ParseError::Syntax { - input: locator.to_string(), - })?; - - let fetcher = - capture - .name("fetcher") - .map(|m| m.as_str()) - .ok_or_else(|| ParseError::Field { - input: locator.to_owned(), - field: "fetcher".to_string(), - })?; - - let fetcher = Fetcher::try_from(fetcher).map_err(|error| ParseError::Fetcher { - input: locator.to_owned(), - fetcher: fetcher.to_string(), - error, - })?; - - let package = capture - .name("package") - .map(|m| m.as_str().to_owned()) - .ok_or_else(|| ParseError::Field { - input: locator.to_owned(), - field: "package".to_string(), - })?; - - let revision = capture.name("revision").map(|m| m.as_str()).and_then(|s| { - if s.is_empty() { - None - } else { - Some(Revision::from(s)) - } - }); + macro_rules! bail { + ($($tt:tt)*) => { + return Err(Error::from(fatal!($($tt)*))) + }; + } - match parse_org_package(&package) { - Ok((org_id @ Some(_), package)) => Ok(Locator { - fetcher, - org_id, - package, - revision, - }), - Ok((org_id @ None, _)) => Ok(Locator { - fetcher, - org_id, - package: Package::from(package.as_str()), - revision, - }), - Err(error) => Err(Error::Parse(ParseError::Package { - input: locator.to_owned(), - package, - error, - })), + let Some((_, fetcher, package, revision)) = locator_regex!(parse => locator) else { + bail!(syntax; locator); + }; + + if fetcher.is_empty() { + bail!(field => "fetcher"; locator); } + let fetcher = Fetcher::try_from(fetcher) + .map_err(|err| fatal!(fetcher => fetcher, error => err; locator))?; + + if package.is_empty() { + bail!(field => "package"; locator); + } + + let revision = if revision.is_empty() { + None + } else { + Some(Revision::from(revision)) + }; + + let (org_id, package) = parse_org_package(package); + Ok(Locator { + fetcher, + org_id, + package, + revision, + }) } /// Promote a `Locator` to a [`StrictLocator`] by providing the default value to use @@ -394,6 +399,31 @@ impl FromStr for Locator { } } +impl ToSchema for Locator { + fn name() -> Cow<'static, str> { + Cow::Borrowed("Locator") + } + + fn schemas(schemas: &mut Vec<(String, RefOr)>) { + schemas.push((Self::name().into(), Self::schema())); + } +} + +impl PartialSchema for Locator { + fn schema() -> RefOr { + ObjectBuilder::new() + .schema_type(schema::Type::String) + .description(Some(Self::DOCS)) + .pattern(Some(Locator::REGEX)) + .examples([ + json!("git+github.com/fossas/some-repo$abcd1234"), + json!("npm+lodash"), + json!("mvn+123/org.internal.MyProject:MyProject$1.1.3:jar"), + ]) + .into() + } +} + #[cfg(test)] mod tests { use std::borrow::Cow; diff --git a/src/locator_package.rs b/src/locator_package.rs index 72decdd..1ae615e 100644 --- a/src/locator_package.rs +++ b/src/locator_package.rs @@ -1,10 +1,14 @@ -use std::{fmt::Display, str::FromStr}; +use std::{borrow::Cow, fmt::Display, str::FromStr}; use bon::Builder; use documented::Documented; use getset::{CopyGetters, Getters}; use serde::{Deserialize, Serialize}; -use utoipa::ToSchema; +use serde_json::json; +use utoipa::{ + openapi::{schema, ObjectBuilder, RefOr, Schema}, + PartialSchema, ToSchema, +}; use crate::{Error, Fetcher, Locator, OrgId, Package, StrictLocator}; @@ -68,29 +72,11 @@ macro_rules! package { /// /// This implementation ignores the `revision` segment if it exists. If this is not preferred, use [`Locator`] instead. #[derive( - Clone, - Eq, - PartialEq, - Ord, - PartialOrd, - Hash, - Debug, - Builder, - Getters, - CopyGetters, - Documented, - ToSchema, -)] -#[schema( - examples( - json!("git+github.com/fossas/example"), - json!("npm+lodash"), - json!("mvn+1234/org.custom.mylib:mylib")), + Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Builder, Getters, CopyGetters, Documented, )] pub struct PackageLocator { /// Determines which fetcher is used to download this package. #[getset(get_copy = "pub")] - #[schema(ignore)] fetcher: Fetcher, /// Specifies the organization ID to which this package is namespaced. @@ -113,7 +99,6 @@ pub struct PackageLocator { /// - A private NPM package that is hosted on NPM but requires credentials is namespaced. #[builder(into)] #[getset(get_copy = "pub")] - #[schema(ignore)] org_id: Option, /// Specifies the unique identifier for the package by fetcher. @@ -122,7 +107,6 @@ pub struct PackageLocator { /// uses a value in the form of `{user_name}/{package_name}`. #[builder(into)] #[getset(get = "pub")] - #[schema(ignore)] package: Package, } @@ -250,6 +234,31 @@ impl FromStr for PackageLocator { } } +impl ToSchema for PackageLocator { + fn name() -> Cow<'static, str> { + Cow::Borrowed("Locator") + } + + fn schemas(schemas: &mut Vec<(String, RefOr)>) { + schemas.push((Self::name().into(), Self::schema())); + } +} + +impl PartialSchema for PackageLocator { + fn schema() -> RefOr { + ObjectBuilder::new() + .schema_type(schema::Type::String) + .description(Some(Self::DOCS)) + .pattern(Some(Locator::REGEX)) + .examples([ + json!("git+github.com/fossas/some-repo"), + json!("npm+lodash"), + json!("mvn+123/org.internal.MyProject:MyProject"), + ]) + .into() + } +} + #[cfg(test)] mod tests { use assert_matches::assert_matches; diff --git a/src/locator_strict.rs b/src/locator_strict.rs index c27d60a..e223bb1 100644 --- a/src/locator_strict.rs +++ b/src/locator_strict.rs @@ -1,10 +1,14 @@ -use std::{fmt::Display, str::FromStr}; +use std::{borrow::Cow, fmt::Display, str::FromStr}; use bon::Builder; use documented::Documented; use getset::{CopyGetters, Getters}; use serde::{Deserialize, Serialize}; -use utoipa::ToSchema; +use serde_json::json; +use utoipa::{ + openapi::{schema, ObjectBuilder, RefOr, Schema}, + PartialSchema, ToSchema, +}; use crate::{Error, Fetcher, Locator, OrgId, Package, PackageLocator, ParseError, Revision}; @@ -68,29 +72,11 @@ macro_rules! strict { /// {fetcher}+{org_id}/{package}${revision} /// ``` #[derive( - Clone, - Eq, - PartialEq, - Ord, - PartialOrd, - Hash, - Debug, - Builder, - Getters, - CopyGetters, - Documented, - ToSchema, -)] -#[schema( - examples( - json!("git+github.com/fossas/example$1234"), - json!("npm+lodash$1.0.0"), - json!("mvn+1234/org.custom.mylib:mylib$1.0.0:jar")), + Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Builder, Getters, CopyGetters, Documented, )] pub struct StrictLocator { /// Determines which fetcher is used to download this package. #[getset(get_copy = "pub")] - #[schema(ignore)] fetcher: Fetcher, /// Specifies the organization ID to which this package is namespaced. @@ -113,7 +99,6 @@ pub struct StrictLocator { /// - A private NPM package that is hosted on NPM but requires credentials is namespaced. #[builder(into)] #[getset(get_copy = "pub")] - #[schema(ignore)] org_id: Option, /// Specifies the unique identifier for the package by fetcher. @@ -122,7 +107,6 @@ pub struct StrictLocator { /// uses a value in the form of `{user_name}/{package_name}`. #[builder(into)] #[getset(get = "pub")] - #[schema(ignore)] package: Package, /// Specifies the version for the package by fetcher. @@ -132,7 +116,6 @@ pub struct StrictLocator { /// and the fetcher disambiguates. #[builder(into)] #[getset(get = "pub")] - #[schema(ignore)] revision: Revision, } @@ -226,6 +209,31 @@ impl FromStr for StrictLocator { } } +impl ToSchema for StrictLocator { + fn name() -> Cow<'static, str> { + Cow::Borrowed("StrictLocator") + } + + fn schemas(schemas: &mut Vec<(String, RefOr)>) { + schemas.push((Self::name().into(), Self::schema())); + } +} + +impl PartialSchema for StrictLocator { + fn schema() -> RefOr { + ObjectBuilder::new() + .schema_type(schema::Type::String) + .description(Some(Self::DOCS)) + .pattern(Some(Locator::REGEX)) + .examples([ + json!("git+github.com/fossas/some-repo$abcd1234"), + json!("npm+lodash$1.0.0"), + json!("mvn+123/org.internal.MyProject:MyProject$1.1.3:jar"), + ]) + .into() + } +} + #[cfg(test)] mod tests { use assert_matches::assert_matches;