From e71a5ac16fe865ebc8341018b383a8521230bb73 Mon Sep 17 00:00:00 2001 From: Jessica Black Date: Fri, 14 Jun 2024 15:41:19 -0700 Subject: [PATCH] Add new fingerprinter --- Cargo.toml | 1 + src/fingerprint/jar.rs | 10 ++++++++++ src/lib.rs | 10 ++++++++++ 3 files changed, 21 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index fb41c92..a60102b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,7 @@ strum = { version = "0.26.2", features = ["derive"] } alphanumeric-sort = "1.5.3" tap = "1.0.1" tracing = "0.1.40" +sha1 = "0.10.6" [dev-dependencies] pretty_assertions = "1.4.0" diff --git a/src/fingerprint/jar.rs b/src/fingerprint/jar.rs index 40949a8..7f02390 100644 --- a/src/fingerprint/jar.rs +++ b/src/fingerprint/jar.rs @@ -1,5 +1,6 @@ use std::io::{BufRead, Seek}; +use sha1::Sha1; use sha2::{Digest, Sha256}; use tap::Pipe; use tracing::warn; @@ -22,6 +23,15 @@ pub fn raw(stream: impl BufRead + Seek) -> Result, Error> { } } +/// Fingerprint the java archive the same way as Maven Central. +#[tracing::instrument(level = tracing::Level::DEBUG, skip_all, ret)] +pub fn maven_central(mut stream: impl BufRead + Seek) -> Result, Error> { + let mut hasher = Sha1::new(); + std::io::copy(&mut stream, &mut hasher)?; + let content = Content::from_digest(hasher); + Ok(Some(Fingerprint::new(Kind::JarMavenCentralV1, content))) +} + /// Fingerprint class files inside a java archive (a JAR). #[tracing::instrument(level = tracing::Level::DEBUG, skip_all, ret)] pub fn class(stream: impl BufRead + Seek) -> Result, Error> { diff --git a/src/lib.rs b/src/lib.rs index 4cd2313..a4270ce 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -123,6 +123,15 @@ pub enum Kind { #[strum(serialize = "v1.raw.jar")] JarRawV1, + /// Represents a fingerprint derived by hashing the raw contents of a JAR file in the same manner + /// as Maven Central. The idea is that such fingerprints can then be looked up via the + /// Maven Central REST API as a fallback to our own indexing. + /// + /// Specifically: + /// - The content of the JAR file is hashed as-is using the sha1 algorithm. + #[strum(serialize = "v1.mavencentral.jar")] + JarMavenCentralV1, + /// Represents a fingerprint derived by hashing the raw contents of a JAR file with the SHA256 algorithm /// in a platform-independent manner. /// @@ -323,6 +332,7 @@ impl Fingerprint { Kind::CommentStrippedSha256 => fingerprint::text::comment_stripped(stream), Kind::JarRawV1 => fingerprint::jar::raw(stream), Kind::JarClassV1 => fingerprint::jar::class(stream), + Kind::JarMavenCentralV1 => fingerprint::jar::maven_central(stream), } }