Skip to content

Commit

Permalink
improve signing process
Browse files Browse the repository at this point in the history
  • Loading branch information
vemonet committed Nov 26, 2023
1 parent cf18795 commit 588b6e5
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 137 deletions.
49 changes: 29 additions & 20 deletions lib/src/extract.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::constants::{NP_PREF_NS, NP_TEMP_URI};
use crate::error::{NpError, TermError};
use crate::utils::ns;

Expand All @@ -16,6 +17,7 @@ use std::fmt;
pub struct NpInfo {
pub uri: Iri<String>,
pub ns: Namespace<String>,
pub normalized_ns: String,
pub head: Iri<String>,
pub assertion: Iri<String>,
pub prov: Iri<String>,
Expand Down Expand Up @@ -125,6 +127,9 @@ pub fn extract_np_info(dataset: &LightDataset, check_pubinfo: bool) -> Result<Np
));
}

// Getting potential ns from head graph (removing the last frag from head)
let original_ns = &head_iri[..np_iri.len()];

// Remove last char if it is # or / to get the URI
let np_iri: Iri<String> =
if np_iri.ends_with('#') || np_iri.ends_with('/') || np_iri.ends_with('.') {
Expand All @@ -136,9 +141,6 @@ pub fn extract_np_info(dataset: &LightDataset, check_pubinfo: bool) -> Result<Np
np_iri
};

// Getting potential ns from head graph (removing the last frag from head)
let np_ns_str = &head_iri[..np_iri.len() + 1];

// Extract base URI, separator character (# or / or _), and trusty hash (if present) from the np URL
// Default to empty strings when nothing found
let mut base_uri: String = "".to_string();
Expand All @@ -156,7 +158,7 @@ pub fn extract_np_info(dataset: &LightDataset, check_pubinfo: bool) -> Result<Np
// Get the base URI and separators from the namespace
let re_trusty_ns = Regex::new(r"^(.*?)(/|#|\.)?(RA[a-zA-Z0-9-_]*)?([#/\.])?$")?;
// let re = Regex::new(r"^(.*?)(RA.*)?$")?;
if let Some(caps) = re_trusty_ns.captures(np_ns_str) {
if let Some(caps) = re_trusty_ns.captures(original_ns) {
// The first group captures everything up to a '/' or '#', non-greedy.
base_uri = caps.get(1).map_or("", |m| m.as_str()).to_string();
// The second group captures '/' or '#' if present, defaults to .
Expand All @@ -175,21 +177,27 @@ pub fn extract_np_info(dataset: &LightDataset, check_pubinfo: bool) -> Result<Np
separator_after_trusty = "#".to_string()
};

// TODO: handle diff if trusty or not (if not we use default, if trusty we only extract)
let np_ns =
if !np_ns_str.ends_with('#') && !np_ns_str.ends_with('/') && !np_ns_str.ends_with('.') {
if !trusty_hash.is_empty() {
// TODO: Change the after trusty part?
Namespace::new_unchecked(np_ns_str.to_string())
} else {
Namespace::new_unchecked(format!(
"{}.",
&np_ns_str.strip_suffix('_').unwrap_or(np_ns_str)
))
}
} else {
Namespace::new_unchecked(np_ns_str.to_string())
};
let np_ns = Namespace::new_unchecked(original_ns.to_string());
// println!(
// "DEBUG: Extracted URI and namespace: {} {} {}",
// np_iri,
// np_ns.get("")?,
// trusty_hash
// );

// Generate normalized namespace without trusty
let norm_ns = if !trusty_hash.is_empty() {
format!("{}{}", base_uri, separator_before_trusty)
} else if original_ns.starts_with(NP_TEMP_URI) {
NP_PREF_NS.to_string()
} else if !original_ns.ends_with('#')
&& !original_ns.ends_with('/')
&& !original_ns.ends_with('.')
{
format!("{}.", &original_ns)
} else {
original_ns.to_string()
};

// Extract signature and its subject URI
let pubinfo_iri: Iri<String> = Iri::new_unchecked(pubinfo);
Expand Down Expand Up @@ -230,7 +238,7 @@ pub fn extract_np_info(dataset: &LightDataset, check_pubinfo: bool) -> Result<Np
// Extract ORCID
let mut orcid: Option<String> = None;
for q in dataset.quads_matching(
[&np_iri, &Iri::new_unchecked(np_ns_str.to_string())],
[&np_iri, &Iri::new_unchecked(original_ns.to_string())],
[
ns("dct").get("creator")?,
ns("prov").get("wasAttributedTo")?,
Expand Down Expand Up @@ -314,6 +322,7 @@ pub fn extract_np_info(dataset: &LightDataset, check_pubinfo: bool) -> Result<Np
let np_info = NpInfo {
uri: np_iri,
ns: np_ns,
normalized_ns: norm_ns,
head: head_iri,
assertion: assertion_iri,
prov: prov_iri,
Expand Down
87 changes: 42 additions & 45 deletions lib/src/nanopub.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@ use crate::utils::{ns, parse_rdf, serialize_rdf};

use base64;
use base64::{engine, Engine as _};
use chrono::Utc;
use rsa::pkcs8::DecodePublicKey;
use rsa::{sha2::Digest, sha2::Sha256, Pkcs1v15Sign, RsaPublicKey};
use serde::Serialize;
use sophia::api::dataset::{Dataset, MutableDataset};
use sophia::api::ns::{rdf, Namespace};
use sophia::api::ns::{rdf, xsd, Namespace};
use sophia::api::term::matcher::Any;
use sophia::api::term::{SimpleTerm, Term};
// use sophia::api::;
use sophia::inmem::dataset::LightDataset;
use sophia::iri::Iri;
use sophia::iri::{AsIriRef, Iri};
use std::{fmt, str};

/// Trait to provide the nanopub RDF as string or sophia dataset
Expand Down Expand Up @@ -124,12 +127,6 @@ impl Nanopub {
let mut dataset = rdf.get_dataset()?;
let np_info = extract_np_info(&dataset, true)?;

let norm_ns = if !np_info.trusty_hash.is_empty() {
format!("{}{}", np_info.base_uri, np_info.separator_before_trusty)
} else {
NP_PREF_NS.to_string()
};

let mut msg: String = "".to_string();
if np_info.trusty_hash.is_empty() {
msg = format!("{}1 valid (not trusty)", msg);
Expand All @@ -138,7 +135,7 @@ impl Nanopub {
let expected_hash = make_trusty(
&dataset,
&np_info.ns,
&norm_ns,
&np_info.normalized_ns,
&np_info.separator_after_trusty,
)?;
if expected_hash != np_info.trusty_hash {
Expand All @@ -160,7 +157,7 @@ impl Nanopub {
let norm_quads = normalize_dataset(
&dataset,
&np_info.ns,
&norm_ns,
&np_info.normalized_ns,
&np_info.separator_after_trusty,
)?;
// println!("NORMED QUADS CHECK\n{}", norm_quads);
Expand Down Expand Up @@ -253,30 +250,33 @@ impl Nanopub {
// TODO: if not already set, automatically add the current date to pubinfo created
// But there is an error when trying to cast the string to xsd::dateTime
// np_uri dct:created "2023-11-17T14:13:52.560Z"^^xsd:dateTime ;
// if dataset
// .quads_matching(
// [
// &np_info.uri,
// &Iri::new_unchecked(np_info.ns.get("")?.to_string()),
// ],
// [get_ns("dct").get("created")?],
// Any,
// [Some(&np_info.pubinfo)],
// )
// .next()
// .is_none()
// {
// let now = Utc::now();
// let datetime_str = now.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
// // TODO: error when trying to convert to datetime
// // let lit_date = "2019" * xsd::dateTime;
// dataset.insert(
// &np_info.uri,
// get_ns("dct").get("created")?,
// &*datetime_str,
// Some(&np_info.pubinfo),
// )?;
// }
if dataset
.quads_matching(
[
&np_info.uri,
&Iri::new_unchecked(np_info.ns.get("")?.to_string()),
],
[ns("dct").get("created")?],
Any,
[Some(&np_info.pubinfo)],
)
.next()
.is_none()
{
let now = Utc::now();
let datetime_str = now.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
// TODO: error when trying to convert to datetime
// let lit_date = "2019" * xsd::dateTime;
// let lit_date = datetime_str.as_str() * xsd::dateTime;
let lit_date = SimpleTerm::LiteralDatatype(datetime_str.into(), xsd::dateTime.iriref());
dataset.insert(
np_info.ns.as_iri_ref(),
ns("dct").get("created")?,
lit_date,
// &*datetime_str * xsd::dateTime.iriref(),
Some(&np_info.pubinfo),
)?;
}

// If ORCID provided and not already provided, add to pubinfo graph
if !profile.orcid_id.is_empty()
Expand All @@ -298,24 +298,18 @@ impl Nanopub {
.is_none()
{
dataset.insert(
&np_info.uri,
np_info.ns.as_iri_ref(),
ns("dct").get("creator")?,
Iri::new_unchecked(profile.orcid_id.clone()),
Some(&np_info.pubinfo),
)?;
}

let norm_ns = if np_info.ns.starts_with(NP_TEMP_URI) {
NP_PREF_NS
} else {
&np_info.ns
};

// Normalize nanopub nquads to a string
let norm_quads = normalize_dataset(
&dataset,
np_info.ns.as_str(),
norm_ns,
&np_info.normalized_ns,
&np_info.separator_after_trusty,
)?;
// println!("NORMED QUADS sign before add signature\n{}", norm_quads);
Expand All @@ -338,10 +332,10 @@ impl Nanopub {
let trusty_hash = make_trusty(
&dataset,
&np_info.ns,
norm_ns,
&np_info.normalized_ns,
&np_info.separator_after_trusty,
)?;
let trusty_uri = format!("{norm_ns}{trusty_hash}");
let trusty_uri = format!("{}{trusty_hash}", np_info.normalized_ns);
let trusty_ns = format!("{trusty_uri}#");
dataset =
replace_ns_in_quads(&dataset, &np_info.ns, &np_info.uri, &trusty_ns, &trusty_uri)?;
Expand Down Expand Up @@ -421,7 +415,10 @@ impl Nanopub {
} else {
println!("\n❌ Issue publishing the Nanopublication \n{}", np);
// TODO: when publish fails, should we return a Nanopub struct with published=false, or throw an error?
// return Err(NpError(format!("Issue publishing the Nanopublication \n{}", np)))
return Err(NpError(format!(
"Issue publishing the Nanopublication \n{}",
np
)));
}
np.set_published(published);
Ok(np)
Expand Down
14 changes: 13 additions & 1 deletion lib/src/publish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,19 @@ pub async fn publish_np(server: &str, np: &str) -> Result<bool, NpError> {
.header(reqwest::header::CONTENT_TYPE, "application/trig")
.send()
.await?;
Ok(res.status() == 201)
// println!("DEBUG: publish resp: {:#?}", res);
// Ok(res.status() == 201)
match res.status() {
reqwest::StatusCode::CREATED => Ok(true),
_ => {
// Get the error message from the response body
let error_msg = res
.text()
.await
.unwrap_or_else(|_| "Unknown error while publishing the nanopub".to_string());
Err(NpError(error_msg))
}
}
}

/// Fetch nanopub from its URI
Expand Down
53 changes: 31 additions & 22 deletions lib/src/sign.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,10 @@ pub fn replace_ns_in_quads(
new_ns: &str,
new_uri: &str,
) -> Result<LightDataset, NpError> {
let old_ns = old_ns.strip_suffix('.').unwrap_or(old_ns);
// println!(
// "DEBUG: REPLACE_NS_IN_QUADS: Old ns: {} old_uri: {} new ns: {} new_uri: {}",
// old_ns, old_uri, new_ns, new_uri
// );
let mut new = LightDataset::new();
for quad in dataset.quads() {
let quad = quad?;
Expand Down Expand Up @@ -222,18 +225,25 @@ struct NormQuad {

/// Fix normed URIs last fragments. Make sure it starts with #
pub fn fix_normed_uri(uri: &str, separator: &str) -> String {
if let Some(last_slash_index) = uri.rfind(' ') {
let last_frag = &uri[last_slash_index + 1..];
if last_frag.starts_with(separator) || last_frag.is_empty() {
if let Some(space_index) = uri.rfind(' ') {
let last_frag = &uri[space_index + 1..];
// println!(
// "DEBUG: last frag: '{}' URI: '{}' SEP: '{}'",
// last_frag, uri, separator
// );
if uri.ends_with(&format!(" {separator}")) || last_frag.is_empty() {
uri.strip_suffix(separator).unwrap_or(uri).to_string()
} else if last_frag.starts_with(separator) {
uri.to_string()
} else if last_frag.starts_with('/') || last_frag.starts_with('.') {
format!(
"{} {separator}{}",
&uri[..last_slash_index],
&uri[last_slash_index + 2..]
)
// TODO: remove those checks, there are not useful anymore?
// } else if last_frag.starts_with('/') || last_frag.starts_with('.') {
// format!(
// "{} {separator}{}",
// &uri[..space_index],
// &uri[space_index + 2..]
// )
} else {
format!("{} {separator}{}", &uri[..last_slash_index], last_frag)
format!("{} {separator}{}", &uri[..space_index], last_frag)
}
} else {
uri.to_string()
Expand All @@ -248,15 +258,14 @@ pub fn normalize_dataset(
separator: &str,
) -> Result<String, NpError> {
let mut quads_vec: Vec<NormQuad> = vec![];
let norm_base = format!("{} ", norm_ns.strip_suffix('#').unwrap_or(norm_ns));
let base_uri = match base_ns.chars().last() {
Some(_) => &base_ns[..base_ns.len() - 1],
None => base_ns,
};
let norm_uri = format!("{} ", norm_ns);
// println!("DEBUG: NORMALIZE {} {} {}", base_ns, norm_ns, separator);
// Example already signed: http://www.nextprot.org/nanopubs#NX_Q9Y6K8_ESTEvidence_TS-2083.RAr9ao0vjXtLf3d9U4glE_uQWSknfYoPlIzKBq6ybOO5k.
// Not signed yet: http://www.proteinatlas.org/about/nanopubs/ENSG00000000003_ih_TS_0030_head
// becomes http://www.proteinatlas.org/about/nanopubs/ENSG00000000003_ih_TS_0030.RAyBeXMqokAQZ5psoETKtkOeYzHnoIoXTgNFKRdLM8yzs#__head
// last char after trusty becomes # and before .
// Default tmp URI: http://purl.org/nanopub/temp/
// becomes: https://w3id.org/np/RAyBeXMqokAQZ5psoETKtkOeYzHnoIoXTgNFKRdLM8yzs#Head

// Convert dataset to a list of NormQuad struct
for quad in dataset.quads() {
Expand All @@ -268,18 +277,18 @@ pub fn normalize_dataset(
.ok_or(TermError())?
.to_string();
// Extract components of the quad and convert them to strings. Replace the base URI if present
let graph = fix_normed_uri(&graph.replace(base_uri, &norm_base), separator);
let graph = fix_normed_uri(&graph.replace(base_ns, &norm_uri), separator);

let subject = if quad.s().iri().ok_or(TermError())?.to_string() == base_ns {
fix_normed_uri(&norm_base, separator)
norm_uri.to_string()
} else {
fix_normed_uri(
&quad
.s()
.iri()
.ok_or(TermError())?
.to_string()
.replace(base_uri, &norm_base),
.replace(base_ns, &norm_uri),
separator,
)
};
Expand All @@ -289,19 +298,19 @@ pub fn normalize_dataset(
.iri()
.ok_or(TermError())?
.to_string()
.replace(base_uri, &norm_base);
.replace(base_ns, &norm_uri);

let object = if quad.o().is_iri() {
if quad.o().iri().ok_or(TermError())?.to_string() == base_ns {
fix_normed_uri(&norm_base, separator)
norm_uri.to_string()
} else {
fix_normed_uri(
&quad
.o()
.iri()
.ok_or(TermError())?
.to_string()
.replace(base_uri, &norm_base),
.replace(base_ns, &norm_uri),
separator,
)
}
Expand Down
Loading

0 comments on commit 588b6e5

Please sign in to comment.