Skip to content

Commit

Permalink
Add new fingerprinter
Browse files Browse the repository at this point in the history
  • Loading branch information
jssblck committed Jun 14, 2024
1 parent 7b34840 commit e71a5ac
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 0 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ strum = { version = "0.26.2", features = ["derive"] }
alphanumeric-sort = "1.5.3"
tap = "1.0.1"
tracing = "0.1.40"
sha1 = "0.10.6"

[dev-dependencies]
pretty_assertions = "1.4.0"
Expand Down
10 changes: 10 additions & 0 deletions src/fingerprint/jar.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::io::{BufRead, Seek};

use sha1::Sha1;
use sha2::{Digest, Sha256};
use tap::Pipe;
use tracing::warn;
Expand All @@ -22,6 +23,15 @@ pub fn raw(stream: impl BufRead + Seek) -> Result<Option<Fingerprint>, Error> {
}
}

/// Fingerprint the java archive the same way as Maven Central.
#[tracing::instrument(level = tracing::Level::DEBUG, skip_all, ret)]
pub fn maven_central(mut stream: impl BufRead + Seek) -> Result<Option<Fingerprint>, Error> {
let mut hasher = Sha1::new();
std::io::copy(&mut stream, &mut hasher)?;
let content = Content::from_digest(hasher);
Ok(Some(Fingerprint::new(Kind::JarMavenCentralV1, content)))
}

/// Fingerprint class files inside a java archive (a JAR).
#[tracing::instrument(level = tracing::Level::DEBUG, skip_all, ret)]
pub fn class(stream: impl BufRead + Seek) -> Result<Option<Fingerprint>, Error> {
Expand Down
10 changes: 10 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,15 @@ pub enum Kind {
#[strum(serialize = "v1.raw.jar")]
JarRawV1,

/// Represents a fingerprint derived by hashing the raw contents of a JAR file in the same manner
/// as Maven Central. The idea is that such fingerprints can then be looked up via the
/// Maven Central REST API as a fallback to our own indexing.
///
/// Specifically:
/// - The content of the JAR file is hashed as-is using the sha1 algorithm.
#[strum(serialize = "v1.mavencentral.jar")]
JarMavenCentralV1,

/// Represents a fingerprint derived by hashing the raw contents of a JAR file with the SHA256 algorithm
/// in a platform-independent manner.
///
Expand Down Expand Up @@ -323,6 +332,7 @@ impl Fingerprint {
Kind::CommentStrippedSha256 => fingerprint::text::comment_stripped(stream),
Kind::JarRawV1 => fingerprint::jar::raw(stream),
Kind::JarClassV1 => fingerprint::jar::class(stream),
Kind::JarMavenCentralV1 => fingerprint::jar::maven_central(stream),
}
}

Expand Down

0 comments on commit e71a5ac

Please sign in to comment.