Skip to content

Commit

Permalink
Support filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
jssblck committed Dec 12, 2024
1 parent 7595d37 commit d4bad05
Show file tree
Hide file tree
Showing 6 changed files with 345 additions and 43 deletions.
2 changes: 1 addition & 1 deletion bin/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "circe"
version = "0.1.0"
version = "0.2.0"
edition = "2021"
authors = ["Jessica Black <[email protected]>", "FOSSA Inc. <[email protected]>"]
description = "Extracts and examines the contents of containers"
Expand Down
60 changes: 59 additions & 1 deletion bin/src/extract.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use circe_lib::{registry::Registry, Authentication, LayerDescriptor, Platform, Reference};
use circe_lib::{
registry::Registry, Authentication, Filters, LayerDescriptor, Platform, Reference,
};
use clap::{Parser, ValueEnum};
use color_eyre::eyre::{bail, Context, Result};
use derive_more::Debug;
Expand Down Expand Up @@ -38,6 +40,55 @@ pub struct Options {
#[arg(long, default_value = "squash")]
layers: Mode,

/// Glob filters for layers to extract
///
/// Filters are unix-style glob patterns, for example `sha256:1234*`
/// matches any layer with a sha256 digest starting with `1234`.
///
/// You can provide this multiple times to provide multiple filters.
/// If filters are provided, only layers whose digest matches any filter are extracted.
#[arg(long, alias = "lg")]
layer_glob: Option<Vec<String>>,

/// Glob filters for files to extract
///
/// Filters are unix-style glob patterns, for example `*.txt`
/// matches any file whose path ends with `.txt`.
/// Note that if you want to match regardless of directory depth
/// you must use `**` in the pattern, for example `**/*.txt` matches
/// any file with a `.txt` extension in any directory.
///
/// Non-unicode paths are lossily parsed as unicode for the purpose of glob comparison;
/// invalid unicode segments are replaced with `U+FFFD` (�).
///
/// You can provide this multiple times to provide multiple filters.
/// If filters are provided, only files whose path matches any filter are extracted.
#[arg(long, alias = "fg")]
file_glob: Option<Vec<String>>,

/// Regex filters for layers to extract
///
/// Filters are regex patterns, for example `sha256:1234.*`
/// matches any layer with a sha256 digest starting with `1234`.
///
/// You can provide this multiple times to provide multiple filters.
/// If filters are provided, only layers whose digest matches any filter are extracted.
#[arg(long, alias = "lr")]
layer_regex: Option<Vec<String>>,

/// Regex filters for files to extract
///
/// Filters are regex patterns, for example `.*\.txt$`
/// matches any file whose path ends with `.txt`.
///
/// Non-unicode paths are lossily parsed as unicode for the purpose of regex comparison;
/// invalid unicode segments are replaced with `U+FFFD` ().
///
/// You can provide this multiple times to provide multiple filters.
/// If filters are provided, only files whose path matches any filter are extracted.
#[arg(long, alias = "fr")]
file_regex: Option<Vec<String>>,

/// The username to use for authenticating to the registry
#[arg(long, requires = "password")]
username: Option<String>,
Expand Down Expand Up @@ -74,11 +125,18 @@ pub async fn main(opts: Options) -> Result<()> {
_ => Authentication::default(),
};

let layer_globs = Filters::parse_glob(opts.layer_glob.into_iter().flatten())?;
let file_globs = Filters::parse_glob(opts.file_glob.into_iter().flatten())?;
let layer_regexes = Filters::parse_regex(opts.layer_regex.into_iter().flatten())?;
let file_regexes = Filters::parse_regex(opts.file_regex.into_iter().flatten())?;

let output = canonicalize_output_dir(&opts.output_dir, opts.overwrite)?;
let registry = Registry::builder()
.maybe_platform(opts.platform)
.reference(opts.image)
.auth(auth)
.layer_filters(layer_globs + layer_regexes)
.file_filters(file_globs + file_regexes)
.build()
.await
.context("configure remote registry")?;
Expand Down
7 changes: 5 additions & 2 deletions lib/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "circe_lib"
version = "0.1.0"
version = "0.2.0"
edition = "2021"
authors = ["Jessica Black <[email protected]>", "FOSSA Inc. <[email protected]>"]
description = "Extracts and examines the contents of containers"
Expand All @@ -18,14 +18,16 @@ async-tempfile = "0.6.0"
bon = "3.3.0"
bytes = "1.9.0"
color-eyre = "0.6.3"
derive_more = { version = "1.0.0", features = ["debug", "display"] }
derive_more = { version = "1.0.0", features = ["debug", "display", "from"] }
enum-assoc = "1.2.4"
futures-lite = "2.5.0"
glob-match = "0.2.1"
hex = "0.4.3"
hex-magic = "0.0.2"
itertools = "0.13.0"
oci-client = { version = "0.14.0", features = ["rustls-tls"], default-features = false }
os_str_bytes = "7.0.0"
regex = "1.11.1"
static_assertions = "1.1.0"
strum = { version = "0.26.3", features = ["derive"] }
tap = "1.0.1"
Expand All @@ -35,6 +37,7 @@ tokio-util = { version = "0.7.13", features = ["io"] }
tracing = "0.1.41"

[dev-dependencies]
async-walkdir = "2.0.0"
pretty_assertions = "1.4.1"
proptest = "1.5.0"
simple_test_case = "1.2.0"
Expand Down
155 changes: 152 additions & 3 deletions lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ use color_eyre::{
eyre::{self, bail, eyre, Context},
Result, Section, SectionExt,
};
use derive_more::derive::{Debug, Display};
use derive_more::derive::{Debug, Display, From};
use enum_assoc::Assoc;
use itertools::Itertools;
use std::str::FromStr;
use std::{borrow::Cow, ops::Add, str::FromStr};
use strum::{AsRefStr, EnumIter, IntoEnumIterator};
use tap::Pipe;
use tap::{Pipe, Tap};
use tracing::debug;

mod ext;
Expand Down Expand Up @@ -687,3 +687,152 @@ impl std::fmt::Display for LayerMediaTypeFlag {
write!(f, "{}", self.as_ref())
}
}

/// Trait for filtering.
pub trait FilterMatch<T> {
/// Report whether the filter matches the given value.
/// Values that match are included in program operation.
fn matches(&self, value: T) -> bool;
}

/// A set of filters; if any filter in the set matches, the value is considered matched.
/// As a special case, if no filters are provided, the value is also considered matched.
#[derive(Debug, Clone, From, Default)]
pub struct Filters(Vec<Filter>);

impl Filters {
/// Create glob filters from the given strings.
pub fn parse_glob(globs: impl IntoIterator<Item = impl AsRef<str>>) -> Result<Self> {
globs
.into_iter()
.map(|s| Filter::parse_glob(s.as_ref()))
.collect::<Result<Vec<_>>>()
.map(Self)
}

/// Create regex filters from the given strings.
pub fn parse_regex(regexes: impl IntoIterator<Item = impl AsRef<str>>) -> Result<Self> {
regexes
.into_iter()
.map(|s| Filter::parse_regex(s.as_ref()))
.collect::<Result<Vec<_>>>()
.map(Self)
}
}

impl Add<Filter> for Filters {
type Output = Self;

fn add(mut self, filter: Filter) -> Self {
self.0.push(filter);
self
}
}

impl Add<Filters> for Filters {
type Output = Filters;

fn add(mut self, filters: Filters) -> Filters {
self.0.extend(filters.0);
self
}
}

impl<'a, T> FilterMatch<&'a T> for Filters
where
Filter: FilterMatch<&'a T>,
{
fn matches(&self, value: &'a T) -> bool {
self.0.is_empty() || self.0.iter().any(|filter| filter.matches(value))
}
}

/// Specifies general filtering options.
#[derive(Debug, Clone, From)]
pub enum Filter {
/// A regular expression to filter
Regex(Regex),

/// A glob to filter
Glob(Glob),
}

impl Filter {
/// Create a glob filter from the given string.
pub fn parse_glob(s: &str) -> Result<Self> {
Glob::from_str(s).map(Self::Glob)
}

/// Create a regex filter from the given string.
pub fn parse_regex(s: &str) -> Result<Self> {
Regex::from_str(s).map(Self::Regex)
}
}

impl FilterMatch<String> for Filter {
fn matches(&self, value: String) -> bool {
self.matches(&value)
}
}

impl FilterMatch<&String> for Filter {
fn matches(&self, value: &String) -> bool {
self.matches(value.as_str())
}
}

impl FilterMatch<Cow<'_, str>> for Filter {
fn matches(&self, value: Cow<'_, str>) -> bool {
self.matches(value.as_ref())
}
}

impl FilterMatch<&str> for Filter {
fn matches(&self, value: &str) -> bool {
match self {
Filter::Regex(regex) => regex.matches(value),
Filter::Glob(glob) => glob.matches(value),
}
}
}

/// A regular expression filter.
#[derive(Debug, Clone)]
pub struct Regex(regex::Regex);

impl FilterMatch<&str> for Regex {
fn matches(&self, value: &str) -> bool {
self.0
.is_match(value)
.tap(|matched| debug!(?value, expr = ?self.0, %matched, "regex: check filter"))
}
}

impl FromStr for Regex {
type Err = eyre::Error;

fn from_str(s: &str) -> Result<Self, Self::Err> {
regex::Regex::new(s)
.map_err(|e| eyre!("invalid regex: {e}"))
.map(Self)
}
}

/// A glob filter.
#[derive(Debug, Clone)]
pub struct Glob(String);

impl FilterMatch<&str> for Glob {
fn matches(&self, value: &str) -> bool {
glob_match::glob_match(&self.0, value)
.tap(|matched| debug!(?value, glob = ?self.0, %matched, "glob: check filter"))
}
}

impl FromStr for Glob {
type Err = eyre::Error;

fn from_str(s: &str) -> Result<Self, Self::Err> {
s.to_string().pipe(Self).pipe(Ok)
}
}
Loading

0 comments on commit d4bad05

Please sign in to comment.