improve signing process

vemonet · Nov 26, 2023 · 588b6e5 · 588b6e5
1 parent cf18795
commit 588b6e5
Show file tree

Hide file tree

Showing 7 changed files with 166 additions and 137 deletions.
diff --git a/lib/src/extract.rs b/lib/src/extract.rs
@@ -1,3 +1,4 @@
+use crate::constants::{NP_PREF_NS, NP_TEMP_URI};
 use crate::error::{NpError, TermError};
 use crate::utils::ns;
 
@@ -16,6 +17,7 @@ use std::fmt;
 pub struct NpInfo {
     pub uri: Iri<String>,
     pub ns: Namespace<String>,
+    pub normalized_ns: String,
     pub head: Iri<String>,
     pub assertion: Iri<String>,
     pub prov: Iri<String>,
@@ -125,6 +127,9 @@ pub fn extract_np_info(dataset: &LightDataset, check_pubinfo: bool) -> Result<Np
         ));
     }
 
+    // Getting potential ns from head graph (removing the last frag from head)
+    let original_ns = &head_iri[..np_iri.len()];
+
     // Remove last char if it is # or / to get the URI
     let np_iri: Iri<String> =
         if np_iri.ends_with('#') || np_iri.ends_with('/') || np_iri.ends_with('.') {
@@ -136,9 +141,6 @@ pub fn extract_np_info(dataset: &LightDataset, check_pubinfo: bool) -> Result<Np
             np_iri
         };
 
-    // Getting potential ns from head graph (removing the last frag from head)
-    let np_ns_str = &head_iri[..np_iri.len() + 1];
-
     // Extract base URI, separator character (# or / or _), and trusty hash (if present) from the np URL
     // Default to empty strings when nothing found
     let mut base_uri: String = "".to_string();
@@ -156,7 +158,7 @@ pub fn extract_np_info(dataset: &LightDataset, check_pubinfo: bool) -> Result<Np
     // Get the base URI and separators from the namespace
     let re_trusty_ns = Regex::new(r"^(.*?)(/|#|\.)?(RA[a-zA-Z0-9-_]*)?([#/\.])?$")?;
     // let re = Regex::new(r"^(.*?)(RA.*)?$")?;
-    if let Some(caps) = re_trusty_ns.captures(np_ns_str) {
+    if let Some(caps) = re_trusty_ns.captures(original_ns) {
         // The first group captures everything up to a '/' or '#', non-greedy.
         base_uri = caps.get(1).map_or("", |m| m.as_str()).to_string();
         // The second group captures '/' or '#' if present, defaults to .
@@ -175,21 +177,27 @@ pub fn extract_np_info(dataset: &LightDataset, check_pubinfo: bool) -> Result<Np
         separator_after_trusty = "#".to_string()
     };
 
-    // TODO: handle diff if trusty or not (if not we use default, if trusty we only extract)
-    let np_ns =
-        if !np_ns_str.ends_with('#') && !np_ns_str.ends_with('/') && !np_ns_str.ends_with('.') {
-            if !trusty_hash.is_empty() {
-                // TODO: Change the after trusty part?
-                Namespace::new_unchecked(np_ns_str.to_string())
-            } else {
-                Namespace::new_unchecked(format!(
-                    "{}.",
-                    &np_ns_str.strip_suffix('_').unwrap_or(np_ns_str)
-                ))
-            }
-        } else {
-            Namespace::new_unchecked(np_ns_str.to_string())
-        };
+    let np_ns = Namespace::new_unchecked(original_ns.to_string());
+    // println!(
+    //     "DEBUG: Extracted URI and namespace: {} {} {}",
+    //     np_iri,
+    //     np_ns.get("")?,
+    //     trusty_hash
+    // );
+
+    // Generate normalized namespace without trusty
+    let norm_ns = if !trusty_hash.is_empty() {
+        format!("{}{}", base_uri, separator_before_trusty)
+    } else if original_ns.starts_with(NP_TEMP_URI) {
+        NP_PREF_NS.to_string()
+    } else if !original_ns.ends_with('#')
+        && !original_ns.ends_with('/')
+        && !original_ns.ends_with('.')
+    {
+        format!("{}.", &original_ns)
+    } else {
+        original_ns.to_string()
+    };
 
     // Extract signature and its subject URI
     let pubinfo_iri: Iri<String> = Iri::new_unchecked(pubinfo);
@@ -230,7 +238,7 @@ pub fn extract_np_info(dataset: &LightDataset, check_pubinfo: bool) -> Result<Np
     // Extract ORCID
     let mut orcid: Option<String> = None;
     for q in dataset.quads_matching(
-        [&np_iri, &Iri::new_unchecked(np_ns_str.to_string())],
+        [&np_iri, &Iri::new_unchecked(original_ns.to_string())],
         [
             ns("dct").get("creator")?,
             ns("prov").get("wasAttributedTo")?,
@@ -314,6 +322,7 @@ pub fn extract_np_info(dataset: &LightDataset, check_pubinfo: bool) -> Result<Np
     let np_info = NpInfo {
         uri: np_iri,
         ns: np_ns,
+        normalized_ns: norm_ns,
         head: head_iri,
         assertion: assertion_iri,
         prov: prov_iri,

diff --git a/lib/src/nanopub.rs b/lib/src/nanopub.rs
@@ -8,14 +8,17 @@ use crate::utils::{ns, parse_rdf, serialize_rdf};
 
 use base64;
 use base64::{engine, Engine as _};
+use chrono::Utc;
 use rsa::pkcs8::DecodePublicKey;
 use rsa::{sha2::Digest, sha2::Sha256, Pkcs1v15Sign, RsaPublicKey};
 use serde::Serialize;
 use sophia::api::dataset::{Dataset, MutableDataset};
-use sophia::api::ns::{rdf, Namespace};
+use sophia::api::ns::{rdf, xsd, Namespace};
 use sophia::api::term::matcher::Any;
+use sophia::api::term::{SimpleTerm, Term};
+// use sophia::api::;
 use sophia::inmem::dataset::LightDataset;
-use sophia::iri::Iri;
+use sophia::iri::{AsIriRef, Iri};
 use std::{fmt, str};
 
 /// Trait to provide the nanopub RDF as string or sophia dataset
@@ -124,12 +127,6 @@ impl Nanopub {
         let mut dataset = rdf.get_dataset()?;
         let np_info = extract_np_info(&dataset, true)?;
 
-        let norm_ns = if !np_info.trusty_hash.is_empty() {
-            format!("{}{}", np_info.base_uri, np_info.separator_before_trusty)
-        } else {
-            NP_PREF_NS.to_string()
-        };
-
         let mut msg: String = "".to_string();
         if np_info.trusty_hash.is_empty() {
             msg = format!("{}1 valid (not trusty)", msg);
@@ -138,7 +135,7 @@ impl Nanopub {
             let expected_hash = make_trusty(
                 &dataset,
                 &np_info.ns,
-                &norm_ns,
+                &np_info.normalized_ns,
                 &np_info.separator_after_trusty,
             )?;
             if expected_hash != np_info.trusty_hash {
@@ -160,7 +157,7 @@ impl Nanopub {
             let norm_quads = normalize_dataset(
                 &dataset,
                 &np_info.ns,
-                &norm_ns,
+                &np_info.normalized_ns,
                 &np_info.separator_after_trusty,
             )?;
             // println!("NORMED QUADS CHECK\n{}", norm_quads);
@@ -253,30 +250,33 @@ impl Nanopub {
         // TODO: if not already set, automatically add the current date to pubinfo created
         // But there is an error when trying to cast the string to xsd::dateTime
         // np_uri dct:created "2023-11-17T14:13:52.560Z"^^xsd:dateTime ;
-        // if dataset
-        //     .quads_matching(
-        //         [
-        //             &np_info.uri,
-        //             &Iri::new_unchecked(np_info.ns.get("")?.to_string()),
-        //         ],
-        //         [get_ns("dct").get("created")?],
-        //         Any,
-        //         [Some(&np_info.pubinfo)],
-        //     )
-        //     .next()
-        //     .is_none()
-        // {
-        //     let now = Utc::now();
-        //     let datetime_str = now.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
-        //     // TODO: error when trying to convert to datetime
-        //     //  let lit_date = "2019" * xsd::dateTime;
-        //     dataset.insert(
-        //         &np_info.uri,
-        //         get_ns("dct").get("created")?,
-        //         &*datetime_str,
-        //         Some(&np_info.pubinfo),
-        //     )?;
-        // }
+        if dataset
+            .quads_matching(
+                [
+                    &np_info.uri,
+                    &Iri::new_unchecked(np_info.ns.get("")?.to_string()),
+                ],
+                [ns("dct").get("created")?],
+                Any,
+                [Some(&np_info.pubinfo)],
+            )
+            .next()
+            .is_none()
+        {
+            let now = Utc::now();
+            let datetime_str = now.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
+            // TODO: error when trying to convert to datetime
+            // let lit_date = "2019" * xsd::dateTime;
+            // let lit_date = datetime_str.as_str() * xsd::dateTime;
+            let lit_date = SimpleTerm::LiteralDatatype(datetime_str.into(), xsd::dateTime.iriref());
+            dataset.insert(
+                np_info.ns.as_iri_ref(),
+                ns("dct").get("created")?,
+                lit_date,
+                // &*datetime_str * xsd::dateTime.iriref(),
+                Some(&np_info.pubinfo),
+            )?;
+        }
 
         // If ORCID provided and not already provided, add to pubinfo graph
         if !profile.orcid_id.is_empty()
@@ -298,24 +298,18 @@ impl Nanopub {
                 .is_none()
         {
             dataset.insert(
-                &np_info.uri,
+                np_info.ns.as_iri_ref(),
                 ns("dct").get("creator")?,
                 Iri::new_unchecked(profile.orcid_id.clone()),
                 Some(&np_info.pubinfo),
             )?;
         }
 
-        let norm_ns = if np_info.ns.starts_with(NP_TEMP_URI) {
-            NP_PREF_NS
-        } else {
-            &np_info.ns
-        };
-
         // Normalize nanopub nquads to a string
         let norm_quads = normalize_dataset(
             &dataset,
             np_info.ns.as_str(),
-            norm_ns,
+            &np_info.normalized_ns,
             &np_info.separator_after_trusty,
         )?;
         // println!("NORMED QUADS sign before add signature\n{}", norm_quads);
@@ -338,10 +332,10 @@ impl Nanopub {
         let trusty_hash = make_trusty(
             &dataset,
             &np_info.ns,
-            norm_ns,
+            &np_info.normalized_ns,
             &np_info.separator_after_trusty,
         )?;
-        let trusty_uri = format!("{norm_ns}{trusty_hash}");
+        let trusty_uri = format!("{}{trusty_hash}", np_info.normalized_ns);
         let trusty_ns = format!("{trusty_uri}#");
         dataset =
             replace_ns_in_quads(&dataset, &np_info.ns, &np_info.uri, &trusty_ns, &trusty_uri)?;
@@ -421,7 +415,10 @@ impl Nanopub {
         } else {
             println!("\n❌ Issue publishing the Nanopublication \n{}", np);
             // TODO: when publish fails, should we return a Nanopub struct with published=false, or throw an error?
-            // return Err(NpError(format!("Issue publishing the Nanopublication \n{}", np)))
+            return Err(NpError(format!(
+                "Issue publishing the Nanopublication \n{}",
+                np
+            )));
         }
         np.set_published(published);
         Ok(np)

diff --git a/lib/src/publish.rs b/lib/src/publish.rs
@@ -11,7 +11,19 @@ pub async fn publish_np(server: &str, np: &str) -> Result<bool, NpError> {
         .header(reqwest::header::CONTENT_TYPE, "application/trig")
         .send()
         .await?;
-    Ok(res.status() == 201)
+    // println!("DEBUG: publish resp: {:#?}", res);
+    // Ok(res.status() == 201)
+    match res.status() {
+        reqwest::StatusCode::CREATED => Ok(true),
+        _ => {
+            // Get the error message from the response body
+            let error_msg = res
+                .text()
+                .await
+                .unwrap_or_else(|_| "Unknown error while publishing the nanopub".to_string());
+            Err(NpError(error_msg))
+        }
+    }
 }
 
 /// Fetch nanopub from its URI

diff --git a/lib/src/sign.rs b/lib/src/sign.rs
@@ -155,7 +155,10 @@ pub fn replace_ns_in_quads(
     new_ns: &str,
     new_uri: &str,
 ) -> Result<LightDataset, NpError> {
-    let old_ns = old_ns.strip_suffix('.').unwrap_or(old_ns);
+    // println!(
+    //     "DEBUG: REPLACE_NS_IN_QUADS: Old ns: {} old_uri: {} new ns: {} new_uri: {}",
+    //     old_ns, old_uri, new_ns, new_uri
+    // );
     let mut new = LightDataset::new();
     for quad in dataset.quads() {
         let quad = quad?;
@@ -222,18 +225,25 @@ struct NormQuad {
 
 /// Fix normed URIs last fragments. Make sure it starts with #
 pub fn fix_normed_uri(uri: &str, separator: &str) -> String {
-    if let Some(last_slash_index) = uri.rfind(' ') {
-        let last_frag = &uri[last_slash_index + 1..];
-        if last_frag.starts_with(separator) || last_frag.is_empty() {
+    if let Some(space_index) = uri.rfind(' ') {
+        let last_frag = &uri[space_index + 1..];
+        // println!(
+        //     "DEBUG: last frag: '{}' URI: '{}' SEP: '{}'",
+        //     last_frag, uri, separator
+        // );
+        if uri.ends_with(&format!(" {separator}")) || last_frag.is_empty() {
+            uri.strip_suffix(separator).unwrap_or(uri).to_string()
+        } else if last_frag.starts_with(separator) {
             uri.to_string()
-        } else if last_frag.starts_with('/') || last_frag.starts_with('.') {
-            format!(
-                "{} {separator}{}",
-                &uri[..last_slash_index],
-                &uri[last_slash_index + 2..]
-            )
+        // TODO: remove those checks, there are not useful anymore?
+        // } else if last_frag.starts_with('/') || last_frag.starts_with('.') {
+        //     format!(
+        //         "{} {separator}{}",
+        //         &uri[..space_index],
+        //         &uri[space_index + 2..]
+        //     )
         } else {
-            format!("{} {separator}{}", &uri[..last_slash_index], last_frag)
+            format!("{} {separator}{}", &uri[..space_index], last_frag)
         }
     } else {
         uri.to_string()
@@ -248,15 +258,14 @@ pub fn normalize_dataset(
     separator: &str,
 ) -> Result<String, NpError> {
     let mut quads_vec: Vec<NormQuad> = vec![];
-    let norm_base = format!("{} ", norm_ns.strip_suffix('#').unwrap_or(norm_ns));
-    let base_uri = match base_ns.chars().last() {
-        Some(_) => &base_ns[..base_ns.len() - 1],
-        None => base_ns,
-    };
+    let norm_uri = format!("{} ", norm_ns);
+    // println!("DEBUG: NORMALIZE {} {} {}", base_ns, norm_ns, separator);
     // Example already signed: http://www.nextprot.org/nanopubs#NX_Q9Y6K8_ESTEvidence_TS-2083.RAr9ao0vjXtLf3d9U4glE_uQWSknfYoPlIzKBq6ybOO5k.
     // Not signed yet: http://www.proteinatlas.org/about/nanopubs/ENSG00000000003_ih_TS_0030_head
     //   becomes http://www.proteinatlas.org/about/nanopubs/ENSG00000000003_ih_TS_0030.RAyBeXMqokAQZ5psoETKtkOeYzHnoIoXTgNFKRdLM8yzs#__head
     //   last char after trusty becomes # and before .
+    // Default tmp URI: http://purl.org/nanopub/temp/
+    //   becomes: https://w3id.org/np/RAyBeXMqokAQZ5psoETKtkOeYzHnoIoXTgNFKRdLM8yzs#Head
 
     // Convert dataset to a list of NormQuad struct
     for quad in dataset.quads() {
@@ -268,18 +277,18 @@ pub fn normalize_dataset(
             .ok_or(TermError())?
             .to_string();
         // Extract components of the quad and convert them to strings. Replace the base URI if present
-        let graph = fix_normed_uri(&graph.replace(base_uri, &norm_base), separator);
+        let graph = fix_normed_uri(&graph.replace(base_ns, &norm_uri), separator);
 
         let subject = if quad.s().iri().ok_or(TermError())?.to_string() == base_ns {
-            fix_normed_uri(&norm_base, separator)
+            norm_uri.to_string()
         } else {
             fix_normed_uri(
                 &quad
                     .s()
                     .iri()
                     .ok_or(TermError())?
                     .to_string()
-                    .replace(base_uri, &norm_base),
+                    .replace(base_ns, &norm_uri),
                 separator,
             )
         };
@@ -289,19 +298,19 @@ pub fn normalize_dataset(
             .iri()
             .ok_or(TermError())?
             .to_string()
-            .replace(base_uri, &norm_base);
+            .replace(base_ns, &norm_uri);
 
         let object = if quad.o().is_iri() {
             if quad.o().iri().ok_or(TermError())?.to_string() == base_ns {
-                fix_normed_uri(&norm_base, separator)
+                norm_uri.to_string()
             } else {
                 fix_normed_uri(
                     &quad
                         .o()
                         .iri()
                         .ok_or(TermError())?
                         .to_string()
-                        .replace(base_uri, &norm_base),
+                        .replace(base_ns, &norm_uri),
                     separator,
                 )
             }