From 640b39e970c009285127f78135a056103437c614 Mon Sep 17 00:00:00 2001 From: rhigman <73792779+rhigman@users.noreply.github.com> Date: Mon, 9 Sep 2024 10:43:15 +0100 Subject: [PATCH 01/23] Omit chapters with no DOI from Crossref output --- .../src/xml/doideposit_crossref.rs | 40 ++++++++++--------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/thoth-export-server/src/xml/doideposit_crossref.rs b/thoth-export-server/src/xml/doideposit_crossref.rs index f7b675f3..53c52fce 100644 --- a/thoth-export-server/src/xml/doideposit_crossref.rs +++ b/thoth-export-server/src/xml/doideposit_crossref.rs @@ -132,7 +132,11 @@ impl XmlElementBlock for Work { .iter() .filter(|r| r.relation_type == RelationType::HAS_CHILD) { - XmlElementBlock::::xml_element(chapter, w)?; + // If chapter has no DOI, nothing to output (`content_item` element + // representing chapter must contain `doi_data` element with `doi`) + if chapter.related_work.doi.is_some() { + XmlElementBlock::::xml_element(chapter, w)?; + } } Ok(()) }) @@ -692,12 +696,8 @@ fn write_chapter_doi_collection( )); } } else { - // `doi_data` element is mandatory for `content_item`, and must contain - // both `doi` element and `resource` (landing page) element - return Err(ThothError::IncompleteMetadataRecord( - DEPOSIT_ERROR.to_string(), - "Missing chapter DOI".to_string(), - )); + // Caller should only pass in chapters which have DOIs + unreachable!() } Ok(()) } @@ -1436,17 +1436,6 @@ mod tests { output, "Could not generate doideposit::crossref: Missing chapter Landing Page".to_string() ); - - // Restore landing page but remove DOI. Result: error, as above - test_relations.related_work.edition = None; - test_relations.related_work.landing_page = - Some("https://www.book.com/chapter_one".to_string()); - test_relations.related_work.doi = None; - let output = generate_test_output(false, &test_relations); - assert_eq!( - output, - "Could not generate doideposit::crossref: Missing chapter DOI".to_string() - ); } #[test] @@ -1974,6 +1963,9 @@ mod tests { test_work.publications[0].locations.clear(); // Remove last (hardback) publication test_work.publications.pop(); + // Change sole relation to chapter with no DOI + test_work.relations[0].relation_type = RelationType::HAS_CHILD; + test_work.relations[0].related_work.doi = None; let output = generate_test_output(true, &test_work); // Work type changed @@ -2022,6 +2014,18 @@ mod tests { assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" "#)); + // Only chapters with no DOI supplied: no `content_item` elements emitted + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" Part"#)); + assert!(!output.contains(r#" One"#)); + assert!(!output.contains(r#" 1"#)); + assert!(!output.contains(r#" 02"#)); + assert!(!output.contains(r#" 28"#)); + assert!(!output.contains(r#" 2000"#)); + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" 10"#)); + assert!(!output.contains(r#" 20"#)); + assert!(!output.contains(r#" https://www.book.com/part_one"#)); // Change work type, remove landing page, remove XML ISBN, // remove all but the omitted contributor From a378dacac3b9b2d41ff341c4dac4698c75fcf05a Mon Sep 17 00:00:00 2001 From: rhigman <73792779+rhigman@users.noreply.github.com> Date: Mon, 9 Sep 2024 12:33:41 +0100 Subject: [PATCH 02/23] Raise error if no DOIs are found for either parent work or chapters --- .../src/xml/doideposit_crossref.rs | 36 +++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/thoth-export-server/src/xml/doideposit_crossref.rs b/thoth-export-server/src/xml/doideposit_crossref.rs index 53c52fce..1d519c43 100644 --- a/thoth-export-server/src/xml/doideposit_crossref.rs +++ b/thoth-export-server/src/xml/doideposit_crossref.rs @@ -79,14 +79,25 @@ impl XmlSpecification for DoiDepositCrossref { impl XmlElementBlock for Work { fn xml_element(&self, w: &mut EventWriter) -> ThothResult<()> { - let work_type = match &self.work_type { + let work = self; + if work.doi.is_none() + && !work + .relations + .iter() + .any(|r| r.relation_type == RelationType::HAS_CHILD && r.related_work.doi.is_some()) + { + return Err(ThothError::IncompleteMetadataRecord( + DEPOSIT_ERROR.to_string(), + "No work or chapter DOIs to deposit".to_string(), + )); + } + let work_type = match &work.work_type { WorkType::MONOGRAPH => "monograph", WorkType::EDITED_BOOK => "edited_book", WorkType::TEXTBOOK => "reference", WorkType::JOURNAL_ISSUE | WorkType::BOOK_SET | WorkType::BOOK_CHAPTER => "other", WorkType::Other(_) => unreachable!(), }; - let work = self; // As an alternative to `book_metadata` and `book_series_metadata` below, // `book_set_metadata` can be used for works which are part of a set. // Omitted at present but could be considered as a future enhancement. @@ -2069,18 +2080,31 @@ mod tests { assert!(!output.contains(r#" 10.00001/BOOK.0001"#)); assert!(!output.contains(r#" https://www.book.com"#)); - // Change work type again, replace landing page but remove DOI + // Remove DOI (so neither work nor chapter DOIs are present). Result: error + test_work.doi = None; + let output = generate_test_output(false, &test_work); + assert_eq!( + output, + "Could not generate doideposit::crossref: No work or chapter DOIs to deposit" + .to_string() + ); + + // Change work type again, replace landing page, replace chapter DOI test_work.work_type = WorkType::JOURNAL_ISSUE; test_work.landing_page = Some("https://www.book.com".to_string()); - test_work.doi = None; + test_work.relations[0].related_work.doi = + Some(Doi::from_str("https://doi.org/10.00001/PART.0001").unwrap()); let output = generate_test_output(true, &test_work); // Work type changed assert!(!output.contains(r#" "#)); assert!(output.contains(r#" "#)); - // No DOI: entire `doi_data` element omitted (even though landing page restored) - assert!(!output.contains(r#" "#)); + // No work DOI: entire work-specific `doi_data` element omitted (even though landing page restored) assert!(!output.contains(r#" 10.00001/BOOK.0001"#)); assert!(!output.contains(r#" https://www.book.com"#)); + // But chapter-specific `doi_data` element will be present (at same nesting level) + assert!(output.contains(r#" "#)); + assert!(output.contains(r#" 10.00001/PART.0001"#)); + assert!(output.contains(r#" https://www.book.com/part_one"#)); // Remove publication date. Result: error test_work.publication_date = None; From 57bb64b9f2f8c64210b19017975490fd9414a5c4 Mon Sep 17 00:00:00 2001 From: rhigman <73792779+rhigman@users.noreply.github.com> Date: Mon, 9 Sep 2024 12:36:50 +0100 Subject: [PATCH 03/23] Update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ffff930..694cc16d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Changed + - [551](https://github.com/thoth-pub/thoth/issues/551) - Only include chapters in Crossref metadata output if they have DOIs + +### Fixed + - [565](https://github.com/thoth-pub/thoth/issues/565) - Don't generate Crossref metadata output if no DOIs (work or chapter) are present ## [[0.12.9]](https://github.com/thoth-pub/thoth/releases/tag/v0.12.9) - 2024-09-06 ### Added From 7b2ed5002920a265cdf79235d9b7731705557f2b Mon Sep 17 00:00:00 2001 From: rhigman <73792779+rhigman@users.noreply.github.com> Date: Mon, 9 Sep 2024 16:04:12 +0100 Subject: [PATCH 04/23] Review markups: remove unnecessary variable, streamline unreachable check --- .../src/xml/doideposit_crossref.rs | 183 +++++++++--------- 1 file changed, 89 insertions(+), 94 deletions(-) diff --git a/thoth-export-server/src/xml/doideposit_crossref.rs b/thoth-export-server/src/xml/doideposit_crossref.rs index 1d519c43..d90d5233 100644 --- a/thoth-export-server/src/xml/doideposit_crossref.rs +++ b/thoth-export-server/src/xml/doideposit_crossref.rs @@ -79,9 +79,8 @@ impl XmlSpecification for DoiDepositCrossref { impl XmlElementBlock for Work { fn xml_element(&self, w: &mut EventWriter) -> ThothResult<()> { - let work = self; - if work.doi.is_none() - && !work + if self.doi.is_none() + && !self .relations .iter() .any(|r| r.relation_type == RelationType::HAS_CHILD && r.related_work.doi.is_some()) @@ -91,7 +90,7 @@ impl XmlElementBlock for Work { "No work or chapter DOIs to deposit".to_string(), )); } - let work_type = match &work.work_type { + let work_type = match &self.work_type { WorkType::MONOGRAPH => "monograph", WorkType::EDITED_BOOK => "edited_book", WorkType::TEXTBOOK => "reference", @@ -117,22 +116,22 @@ impl XmlElementBlock for Work { XmlElementBlock::::xml_element(series, w)?; ordinal = Some(ord); } - write_work_contributions(work, w)?; - write_work_title(work, w)?; - write_work_abstract(work, w)?; + write_work_contributions(self, w)?; + write_work_title(self, w)?; + write_work_abstract(self, w)?; if ordinal.is_some() { let ordinal_i64 = ordinal.unwrap_or(0); write_work_volume(ordinal_i64, w)?; } - write_work_edition(work, w)?; - write_work_publication_date(work, w)?; - write_work_publications(work, w)?; - write_publisher(work, w)?; - write_crossmark_funding_access(work, w)?; - write_doi_collection(work, w)?; - write_work_references(work, w)?; + write_work_edition(self, w)?; + write_work_publication_date(self, w)?; + write_work_publications(self, w)?; + write_publisher(self, w)?; + write_crossmark_funding_access(self, w)?; + write_doi_collection(self, w)?; + write_work_references(self, w)?; Ok(()) })?; @@ -627,88 +626,84 @@ fn write_chapter_doi_collection( chapter: &WorkRelations, w: &mut EventWriter, ) -> ThothResult<()> { - if let Some(doi) = &chapter.related_work.doi { - if let Some(landing_page) = &chapter.related_work.landing_page { - write_element_block("doi_data", w, |w| { - write_element_block("doi", w, |w| { - w.write(XmlEvent::Characters(&doi.to_string())) - .map_err(|e| e.into()) - })?; - write_element_block("resource", w, |w| { - w.write(XmlEvent::Characters(landing_page)) - .map_err(|e| e.into()) - })?; - if let Some(pdf_url) = chapter - .related_work - .publications - .iter() - .find(|p| { - p.publication_type.eq(&PublicationType::PDF) && !p.locations.is_empty() - }) - .and_then(|p| p.locations.iter().find(|l| l.canonical)) - .and_then(|l| l.full_text_url.as_ref()) - { - // Used for CrossRef Similarity Check. URL must point directly to full-text PDF. - // Alternatively, a direct link to full-text HTML can be used (not implemented here). - write_full_element_block( - "collection", - Some(vec![("property", "crawler-based")]), - w, - |w| { - for crawler in ["iParadigms", "google", "msn", "yahoo", "scirus"] { - write_full_element_block( - "item", - Some(vec![("crawler", crawler)]), - w, - |w| { - write_full_element_block( - "resource", - Some(vec![("mime_type", "application/pdf")]), - w, - |w| { - w.write(XmlEvent::Characters(pdf_url)) - .map_err(|e| e.into()) - }, - ) - }, - )?; - } - Ok(()) - }, - )?; - // Used for CrossRef Text and Data Mining. URL must point directly to full-text PDF. - // Alternatively, a direct link to full-text XML can be used (not implemented here). - write_full_element_block( - "collection", - Some(vec![("property", "text-mining")]), - w, - |w| { - write_element_block("item", w, |w| { - write_full_element_block( - "resource", - Some(vec![("mime_type", "application/pdf")]), - w, - |w| { - w.write(XmlEvent::Characters(pdf_url)).map_err(|e| e.into()) - }, - ) - }) - }, - )?; - } - Ok(()) + let doi = &chapter + .related_work + .doi + .as_ref() + .expect("Caller should only pass in chapters which have DOIs"); + if let Some(landing_page) = &chapter.related_work.landing_page { + write_element_block("doi_data", w, |w| { + write_element_block("doi", w, |w| { + w.write(XmlEvent::Characters(&doi.to_string())) + .map_err(|e| e.into()) })?; - } else { - // `doi_data` element is mandatory for `content_item`, and must contain - // both `doi` element and `resource` (landing page) element - return Err(ThothError::IncompleteMetadataRecord( - DEPOSIT_ERROR.to_string(), - "Missing chapter Landing Page".to_string(), - )); - } + write_element_block("resource", w, |w| { + w.write(XmlEvent::Characters(landing_page)) + .map_err(|e| e.into()) + })?; + if let Some(pdf_url) = chapter + .related_work + .publications + .iter() + .find(|p| p.publication_type.eq(&PublicationType::PDF) && !p.locations.is_empty()) + .and_then(|p| p.locations.iter().find(|l| l.canonical)) + .and_then(|l| l.full_text_url.as_ref()) + { + // Used for CrossRef Similarity Check. URL must point directly to full-text PDF. + // Alternatively, a direct link to full-text HTML can be used (not implemented here). + write_full_element_block( + "collection", + Some(vec![("property", "crawler-based")]), + w, + |w| { + for crawler in ["iParadigms", "google", "msn", "yahoo", "scirus"] { + write_full_element_block( + "item", + Some(vec![("crawler", crawler)]), + w, + |w| { + write_full_element_block( + "resource", + Some(vec![("mime_type", "application/pdf")]), + w, + |w| { + w.write(XmlEvent::Characters(pdf_url)) + .map_err(|e| e.into()) + }, + ) + }, + )?; + } + Ok(()) + }, + )?; + // Used for CrossRef Text and Data Mining. URL must point directly to full-text PDF. + // Alternatively, a direct link to full-text XML can be used (not implemented here). + write_full_element_block( + "collection", + Some(vec![("property", "text-mining")]), + w, + |w| { + write_element_block("item", w, |w| { + write_full_element_block( + "resource", + Some(vec![("mime_type", "application/pdf")]), + w, + |w| w.write(XmlEvent::Characters(pdf_url)).map_err(|e| e.into()), + ) + }) + }, + )?; + } + Ok(()) + })?; } else { - // Caller should only pass in chapters which have DOIs - unreachable!() + // `doi_data` element is mandatory for `content_item`, and must contain + // both `doi` element and `resource` (landing page) element + return Err(ThothError::IncompleteMetadataRecord( + DEPOSIT_ERROR.to_string(), + "Missing chapter Landing Page".to_string(), + )); } Ok(()) } From 2ffb3382468ecbad9ade343864dc83f8ea8b0d7c Mon Sep 17 00:00:00 2001 From: Javier Arias Date: Wed, 11 Sep 2024 12:26:40 +0100 Subject: [PATCH 05/23] Upgrade paperclip --- Cargo.lock | 31 +++++++++++++++++++++++-------- thoth-export-server/Cargo.toml | 2 +- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 795a3899..b6bf32a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1697,6 +1697,7 @@ checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown 0.12.3", + "serde", ] [[package]] @@ -2053,6 +2054,17 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" +[[package]] +name = "openapiv3" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33b83630305ecc3355e998ddd2f926f98aae8e105eb42652174a58757851ba47" +dependencies = [ + "indexmap 1.9.3", + "serde", + "serde_json", +] + [[package]] name = "openssl" version = "0.10.66" @@ -2099,13 +2111,14 @@ dependencies = [ [[package]] name = "paperclip" -version = "0.8.2" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2509afd8f138efe07cd367832289f5cc61d1eb1ec7f1eb75172abca6f7b9b66d" +checksum = "ac41f27e83168c22515ef52d62a0357b5f5b8df846aa391f8b903b0ed7719429" dependencies = [ "anyhow", "itertools", "once_cell", + "openapiv3", "paperclip-actix", "paperclip-core", "paperclip-macros", @@ -2120,15 +2133,16 @@ dependencies = [ [[package]] name = "paperclip-actix" -version = "0.6.2" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4adf797da91baee514bc03b020fdd6673d2f8c1af8a859e50d6d803a4b3dddd2" +checksum = "d5b6f4c003aa6167df82f1574c951638a8b9a43827059da0cda8b5df88d20ecf" dependencies = [ "actix-service", "actix-web", "futures", "mime_guess", "once_cell", + "openapiv3", "paperclip-core", "paperclip-macros", "serde_json", @@ -2136,13 +2150,14 @@ dependencies = [ [[package]] name = "paperclip-core" -version = "0.6.2" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8db363c823fa71c00da73ff8cee3d6902e1ad66b770cc224a74dc7cf54de3aad" +checksum = "8e2e742f71daf34eb8f62ccc5a1a5e1f029eb84be563523a2a5ee049366329f4" dependencies = [ "actix-web", "mime", "once_cell", + "openapiv3", "paperclip-macros", "pin-project-lite", "regex", @@ -2155,9 +2170,9 @@ dependencies = [ [[package]] name = "paperclip-macros" -version = "0.6.3" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6e25ce2c5362c8d48dc89e0f9ca076d507f7c1eabd04f0d593cdf5addff90c" +checksum = "0385be5ae9c886c46688290534363a229f2531aa2c5c2bfc3b3ddafed5143aaa" dependencies = [ "heck 0.4.1", "http", diff --git a/thoth-export-server/Cargo.toml b/thoth-export-server/Cargo.toml index f3607cd7..83395ec9 100644 --- a/thoth-export-server/Cargo.toml +++ b/thoth-export-server/Cargo.toml @@ -23,7 +23,7 @@ futures = "0.3.30" lazy_static = "1.5.0" log = "0.4.20" marc = { version = "3.1.1", features = ["xml"] } -paperclip = { version = "0.8.2", features = ["actix-base", "actix4", "uuid1", "v2"] } +paperclip = { version = "0.9.1", features = ["actix4", "uuid1", "v3"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" uuid = { version = "1.10.0", features = ["serde"] } From 52111e2597dfebe3c9511b83107d5e87f1284413 Mon Sep 17 00:00:00 2001 From: Javier Arias Date: Wed, 11 Sep 2024 12:27:58 +0100 Subject: [PATCH 06/23] Update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 694cc16d..e38eddc3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] + +### Changed + - Upgrade `paperclip` to v0.9.1 + ### Changed - [551](https://github.com/thoth-pub/thoth/issues/551) - Only include chapters in Crossref metadata output if they have DOIs From 4e5b9c76e7872c53fb64d0c1ac3f0bc484a75252 Mon Sep 17 00:00:00 2001 From: Javier Arias Date: Wed, 11 Sep 2024 12:28:27 +0100 Subject: [PATCH 07/23] Add terms of service to export API --- thoth-export-server/src/lib.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/thoth-export-server/src/lib.rs b/thoth-export-server/src/lib.rs index 7b85b470..7ac5ca87 100644 --- a/thoth-export-server/src/lib.rs +++ b/thoth-export-server/src/lib.rs @@ -31,7 +31,7 @@ struct ApiConfig { impl ApiConfig { pub fn new(public_url: String) -> Self { Self { - api_schema: format!("{public_url}/swagger.json"), + api_schema: format!("{public_url}/openapi.json"), } } } @@ -82,6 +82,7 @@ pub async fn start_server( "Obtain Thoth metadata records in various formats and platform specifications" .to_string(), ), + terms_of_service: Some("https://thoth.pub/policies/terms-thoth-free".to_string()), contact: Some(Contact { name: Some("Thoth Support".to_string()), url: Some("https://thoth.pub".to_string()), @@ -107,6 +108,7 @@ pub async fn start_server( .configure(platform::route) .configure(specification::route) .with_json_spec_at("/swagger.json") + .with_json_spec_v3_at("/openapi.json") .build() }) .workers(threads) From 9fccac18b38061db8cb99edf884ae55b1dbab9fc Mon Sep 17 00:00:00 2001 From: Javier Arias Date: Wed, 11 Sep 2024 12:29:57 +0100 Subject: [PATCH 08/23] Update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e38eddc3..19052082 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Upgrade `paperclip` to v0.9.1 +### Added + - Implement OpenAPI v3 schema in export API, served under `/openapi.json` + - Added terms of service to export API + ### Changed - [551](https://github.com/thoth-pub/thoth/issues/551) - Only include chapters in Crossref metadata output if they have DOIs From 87d49e6a3b1b6a4ef321ba0cb3b0b85594b4cca8 Mon Sep 17 00:00:00 2001 From: Javier Arias Date: Wed, 11 Sep 2024 12:32:11 +0100 Subject: [PATCH 09/23] Upgrade rust 1.81.0 --- Dockerfile | 2 +- Dockerfile.dev | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 16bf4592..3d87552c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG MUSL_IMAGE=clux/muslrust:1.80.1-stable +ARG MUSL_IMAGE=clux/muslrust:1.81.0-stable FROM ${MUSL_IMAGE} as build diff --git a/Dockerfile.dev b/Dockerfile.dev index a9035611..e3ff5cf2 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -1,4 +1,4 @@ -ARG RUST_VERSION=1.80.1 +ARG RUST_VERSION=1.81.0 FROM rust:${RUST_VERSION} From d88d1adc9a07438b61c258b609cb554e1e5d9de3 Mon Sep 17 00:00:00 2001 From: Javier Arias Date: Wed, 11 Sep 2024 12:33:56 +0100 Subject: [PATCH 10/23] Update changelog --- CHANGELOG.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 19052082..42f5924c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,15 +6,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -### Changed - - Upgrade `paperclip` to v0.9.1 - ### Added - - Implement OpenAPI v3 schema in export API, served under `/openapi.json` - - Added terms of service to export API + - [628](https://github.com/thoth-pub/thoth/pull/628) - Implement OpenAPI v3 schema in export API, served under `/openapi.json` + - [628](https://github.com/thoth-pub/thoth/pull/628) - Added terms of service to export API ### Changed - [551](https://github.com/thoth-pub/thoth/issues/551) - Only include chapters in Crossref metadata output if they have DOIs + - [628](https://github.com/thoth-pub/thoth/pull/628) - Upgrade `paperclip` to v0.9.1 + - [628](https://github.com/thoth-pub/thoth/pull/628) - Upgrade rust to `1.81.0` in production and development `Dockerfile` ### Fixed - [565](https://github.com/thoth-pub/thoth/issues/565) - Don't generate Crossref metadata output if no DOIs (work or chapter) are present From 39b4edce2c59e7361771ed1b047d43403e942a67 Mon Sep 17 00:00:00 2001 From: Javier Arias Date: Wed, 11 Sep 2024 12:34:18 +0100 Subject: [PATCH 11/23] Update changelog --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 42f5924c..ee583e9a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] - ### Added - [628](https://github.com/thoth-pub/thoth/pull/628) - Implement OpenAPI v3 schema in export API, served under `/openapi.json` - [628](https://github.com/thoth-pub/thoth/pull/628) - Added terms of service to export API From 2bb9ac869735f15ea8268abe40437876bf787fa9 Mon Sep 17 00:00:00 2001 From: rhigman <73792779+rhigman@users.noreply.github.com> Date: Mon, 16 Sep 2024 14:53:04 +0100 Subject: [PATCH 12/23] Restrict OA-specific output fields to works with licences: MARC --- .../src/marc21/marc21record_thoth.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/thoth-export-server/src/marc21/marc21record_thoth.rs b/thoth-export-server/src/marc21/marc21record_thoth.rs index 52d61e9d..9765d65c 100644 --- a/thoth-export-server/src/marc21/marc21record_thoth.rs +++ b/thoth-export-server/src/marc21/marc21record_thoth.rs @@ -319,12 +319,15 @@ impl Marc21Entry for Work { .and_then(|f| builder.add_field(f))?; } - // 506 - restrictions on access - FieldRepr::from((b"506", "0\\")) - .add_subfield(b"a", "Open Access") - .and_then(|f| f.add_subfield(b"f", "Unrestricted online access")) - .and_then(|f| f.add_subfield(b"2", "star")) - .and_then(|f| builder.add_field(f))?; + // Assume omission of licence means work is non-OA + if self.license.is_some() { + // 506 - restrictions on access + FieldRepr::from((b"506", "0\\")) + .add_subfield(b"a", "Open Access") + .and_then(|f| f.add_subfield(b"f", "Unrestricted online access")) + .and_then(|f| f.add_subfield(b"2", "star")) + .and_then(|f| builder.add_field(f))?; + } // 520 - abstract if let Some(mut long_abstract) = self.long_abstract.clone() { From 2ea9940f2029996ce3424c5e5a333e8372ef68cd Mon Sep 17 00:00:00 2001 From: rhigman <73792779+rhigman@users.noreply.github.com> Date: Mon, 16 Sep 2024 14:59:52 +0100 Subject: [PATCH 13/23] Restrict OA-specific output fields to works with licences: ONIX 3.0 --- thoth-export-server/src/xml/onix3_jstor.rs | 226 +++++++++++---- .../src/xml/onix3_project_muse.rs | 226 +++++++++++---- thoth-export-server/src/xml/onix3_thoth.rs | 267 ++++++++++-------- 3 files changed, 476 insertions(+), 243 deletions(-) diff --git a/thoth-export-server/src/xml/onix3_jstor.rs b/thoth-export-server/src/xml/onix3_jstor.rs index 67a4b581..6d54f2bb 100644 --- a/thoth-export-server/src/xml/onix3_jstor.rs +++ b/thoth-export-server/src/xml/onix3_jstor.rs @@ -81,6 +81,7 @@ impl XmlElementBlock for Work { .and_then(|l| l.full_text_url.as_ref()) { let work_id = format!("urn:uuid:{}", self.work_id); + let is_open_access = self.license.is_some(); let (main_isbn, print_isbn) = get_publications_data(&self.publications); write_element_block("Product", w, |w| { write_element_block("RecordReference", w, |w| { @@ -215,58 +216,63 @@ impl XmlElementBlock for Work { } Ok(()) })?; - write_element_block("CollateralDetail", w, |w| { - if let Some(labstract) = &self.long_abstract { - write_element_block("TextContent", w, |w| { - // 03 Description ("30 Abstract" not implemented in OAPEN) - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + if self.long_abstract.is_some() || self.toc.is_some() || is_open_access { + write_element_block("CollateralDetail", w, |w| { + if let Some(labstract) = &self.long_abstract { + write_element_block("TextContent", w, |w| { + // 03 Description ("30 Abstract" not implemented in OAPEN) + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_full_element_block( + "Text", + Some(vec![("language", "eng")]), + w, + |w| { + w.write(XmlEvent::Characters(labstract)) + .map_err(|e| e.into()) + }, + ) })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + } + if let Some(toc) = &self.toc { + write_element_block("TextContent", w, |w| { + // 04 Table of contents + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) + }) })?; - write_full_element_block( - "Text", - Some(vec![("language", "eng")]), - w, - |w| { - w.write(XmlEvent::Characters(labstract)) + } + if is_open_access { + write_element_block("TextContent", w, |w| { + // 20 Open access statement + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("20")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters("Open Access")) .map_err(|e| e.into()) - }, - ) - })?; - } - if let Some(toc) = &self.toc { - write_element_block("TextContent", w, |w| { - // 04 Table of contents - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + }) })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) - }) - })?; - } - write_element_block("TextContent", w, |w| { - // 20 Open access statement - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("20")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters("Open Access")) - .map_err(|e| e.into()) - }) - }) - })?; + } + Ok(()) + })?; + } write_element_block("PublishingDetail", w, |w| { write_element_block("Imprint", w, |w| { write_element_block("ImprintName", w, |w| { @@ -402,10 +408,47 @@ impl XmlElementBlock for Work { write_element_block("ProductAvailability", w, |w| { w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) })?; - // 01 Free of charge - write_element_block("UnpricedItemType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - }) + let prices = self + .publications + .iter() + .find(|p| p.publication_type.eq(&PublicationType::PDF)) + .map(|p| p.prices.clone()) + .unwrap_or_default(); + if is_open_access || prices.is_empty() { + // 01 Free of charge + write_element_block("UnpricedItemType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + }) + } else { + for price in prices { + let unit_price = price.unit_price; + let formatted_price = format!("{unit_price:.2}"); + write_element_block("Price", w, |w| { + // 02 RRP including tax + write_element_block("PriceType", w, |w| { + w.write(XmlEvent::Characters("02")) + .map_err(|e| e.into()) + })?; + write_element_block("PriceAmount", w, |w| { + w.write(XmlEvent::Characters(&formatted_price)) + .map_err(|e| e.into()) + })?; + write_element_block("CurrencyCode", w, |w| { + w.write(XmlEvent::Characters( + &price.currency_code.to_string(), + )) + .map_err(|e| e.into()) + })?; + write_element_block("Territory", w, |w| { + write_element_block("RegionsIncluded", w, |w| { + w.write(XmlEvent::Characters("WORLD")) + .map_err(|e| e.into()) + }) + }) + })?; + } + Ok(()) + } })?; } Ok(()) @@ -581,9 +624,10 @@ mod tests { use thoth_api::model::Isbn; use thoth_api::model::Orcid; use thoth_client::{ - ContributionType, LanguageCode, LanguageRelation, LocationPlatform, PublicationType, - WorkContributionsContributor, WorkFundings, WorkImprint, WorkImprintPublisher, WorkIssues, - WorkIssuesSeries, WorkPublicationsLocations, WorkStatus, WorkSubjects, WorkType, + ContributionType, CurrencyCode, LanguageCode, LanguageRelation, LocationPlatform, + PublicationType, WorkContributionsContributor, WorkFundings, WorkImprint, + WorkImprintPublisher, WorkIssues, WorkIssuesSeries, WorkPublicationsLocations, + WorkPublicationsPrices, WorkStatus, WorkSubjects, WorkType, }; use uuid::Uuid; @@ -807,7 +851,16 @@ mod tests { depth_in: None, weight_g: None, weight_oz: None, - prices: vec![], + prices: vec![ + WorkPublicationsPrices { + currency_code: CurrencyCode::EUR, + unit_price: 5.95, + }, + WorkPublicationsPrices { + currency_code: CurrencyCode::GBP, + unit_price: 4.95, + }, + ], locations: vec![WorkPublicationsLocations { landing_page: Some("https://www.book.com/pdf_landing".to_string()), full_text_url: Some("https://www.book.com/pdf_fulltext".to_string()), @@ -1009,6 +1062,32 @@ mod tests { assert!(!output .contains(r#" 02"#)); assert!(!output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#)); + // Absence of licence means we assume non-OA + assert!(!output.contains(r#" 20"#)); + assert!(!output.contains(r#" Open Access"#)); + assert!(!output.contains(r#" 01"#)); + assert!(output.contains( + r#" + + 02 + 5.95 + EUR + + WORLD + + "# + )); + assert!(output.contains( + r#" + + 02 + 4.95 + GBP + + WORLD + + "# + )); // No subtitle supplied (within Thoth UI this would automatically update full_title) assert!(!output.contains(r#" Book Subtitle"#)); // No page count supplied @@ -1022,10 +1101,10 @@ mod tests { // No TOC supplied assert!(!output.contains(r#" 04"#)); assert!(!output.contains(r#" 1. Chapter 1"#)); - // CollateralDetail block is still present as it always contains Open Access statement - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" 00"#)); + // No items left to go in CollateralDetail block so it's omitted + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" 00"#)); // No place supplied assert!(!output.contains(r#" León, Spain"#)); // No publication date supplied @@ -1045,6 +1124,33 @@ mod tests { assert!(!output.contains(r#" 15"#)); assert!(!output.contains(r#" 9781402894626"#)); + // Remove PDF prices but keep book "non-OA" (no licence) + test_work.publications[1].prices.clear(); + let output = generate_test_output(true, &test_work); + assert!(output.contains(r#" 01"#)); + assert!(!output.contains( + r#" + + 02 + 5.95 + EUR + + WORLD + + "# + )); + assert!(!output.contains( + r#" + + 02 + 4.95 + GBP + + WORLD + + "# + )); + // Add withdrawn date test_work.withdrawn_date = chrono::NaiveDate::from_ymd_opt(2020, 12, 31); let output = generate_test_output(true, &test_work); diff --git a/thoth-export-server/src/xml/onix3_project_muse.rs b/thoth-export-server/src/xml/onix3_project_muse.rs index 365a75b3..29aec17f 100644 --- a/thoth-export-server/src/xml/onix3_project_muse.rs +++ b/thoth-export-server/src/xml/onix3_project_muse.rs @@ -80,6 +80,7 @@ impl XmlElementBlock for Work { .and_then(|l| l.full_text_url.as_ref()) { let work_id = format!("urn:uuid:{}", self.work_id); + let is_open_access = self.license.is_some(); let (main_isbn, isbns) = get_publications_data(&self.publications); write_element_block("Product", w, |w| { write_element_block("RecordReference", w, |w| { @@ -256,58 +257,63 @@ impl XmlElementBlock for Work { })?; Ok(()) })?; - write_element_block("CollateralDetail", w, |w| { - if let Some(labstract) = &self.long_abstract { - write_element_block("TextContent", w, |w| { - // 03 Description ("30 Abstract" not implemented in OAPEN) - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + if self.long_abstract.is_some() || self.toc.is_some() || is_open_access { + write_element_block("CollateralDetail", w, |w| { + if let Some(labstract) = &self.long_abstract { + write_element_block("TextContent", w, |w| { + // 03 Description ("30 Abstract" not implemented in OAPEN) + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_full_element_block( + "Text", + Some(vec![("language", "eng")]), + w, + |w| { + w.write(XmlEvent::Characters(labstract)) + .map_err(|e| e.into()) + }, + ) })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + } + if let Some(toc) = &self.toc { + write_element_block("TextContent", w, |w| { + // 04 Table of contents + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) + }) })?; - write_full_element_block( - "Text", - Some(vec![("language", "eng")]), - w, - |w| { - w.write(XmlEvent::Characters(labstract)) + } + if is_open_access { + write_element_block("TextContent", w, |w| { + // 20 Open access statement + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("20")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters("Open Access")) .map_err(|e| e.into()) - }, - ) - })?; - } - if let Some(toc) = &self.toc { - write_element_block("TextContent", w, |w| { - // 04 Table of contents - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + }) })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) - }) - })?; - } - write_element_block("TextContent", w, |w| { - // 20 Open access statement - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("20")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters("Open Access")) - .map_err(|e| e.into()) - }) - }) - })?; + } + Ok(()) + })?; + } write_element_block("PublishingDetail", w, |w| { write_element_block("Imprint", w, |w| { write_element_block("ImprintName", w, |w| { @@ -453,10 +459,47 @@ impl XmlElementBlock for Work { write_element_block("ProductAvailability", w, |w| { w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) })?; - // 01 Free of charge - write_element_block("UnpricedItemType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - }) + let prices = self + .publications + .iter() + .find(|p| p.publication_type.eq(&PublicationType::PDF)) + .map(|p| p.prices.clone()) + .unwrap_or_default(); + if is_open_access || prices.is_empty() { + // 01 Free of charge + write_element_block("UnpricedItemType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + }) + } else { + for price in prices { + let unit_price = price.unit_price; + let formatted_price = format!("{unit_price:.2}"); + write_element_block("Price", w, |w| { + // 02 RRP including tax + write_element_block("PriceType", w, |w| { + w.write(XmlEvent::Characters("02")) + .map_err(|e| e.into()) + })?; + write_element_block("PriceAmount", w, |w| { + w.write(XmlEvent::Characters(&formatted_price)) + .map_err(|e| e.into()) + })?; + write_element_block("CurrencyCode", w, |w| { + w.write(XmlEvent::Characters( + &price.currency_code.to_string(), + )) + .map_err(|e| e.into()) + })?; + write_element_block("Territory", w, |w| { + write_element_block("RegionsIncluded", w, |w| { + w.write(XmlEvent::Characters("WORLD")) + .map_err(|e| e.into()) + }) + }) + })?; + } + Ok(()) + } })?; } Ok(()) @@ -675,9 +718,10 @@ mod tests { use thoth_api::model::Isbn; use thoth_api::model::Orcid; use thoth_client::{ - ContributionType, LanguageCode, LanguageRelation, LocationPlatform, PublicationType, - WorkContributionsContributor, WorkFundings, WorkImprint, WorkImprintPublisher, WorkIssues, - WorkIssuesSeries, WorkPublicationsLocations, WorkStatus, WorkSubjects, WorkType, + ContributionType, CurrencyCode, LanguageCode, LanguageRelation, LocationPlatform, + PublicationType, WorkContributionsContributor, WorkFundings, WorkImprint, + WorkImprintPublisher, WorkIssues, WorkIssuesSeries, WorkPublicationsLocations, + WorkPublicationsPrices, WorkStatus, WorkSubjects, WorkType, }; use uuid::Uuid; @@ -897,7 +941,16 @@ mod tests { depth_in: None, weight_g: None, weight_oz: None, - prices: vec![], + prices: vec![ + WorkPublicationsPrices { + currency_code: CurrencyCode::EUR, + unit_price: 5.95, + }, + WorkPublicationsPrices { + currency_code: CurrencyCode::GBP, + unit_price: 4.95, + }, + ], locations: vec![WorkPublicationsLocations { landing_page: Some("https://www.book.com/pdf_landing".to_string()), full_text_url: Some("https://www.book.com/pdf_fulltext".to_string()), @@ -1115,6 +1168,32 @@ mod tests { assert!(!output .contains(r#" 02"#)); assert!(!output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#)); + // Absence of licence means we assume non-OA + assert!(!output.contains(r#" 20"#)); + assert!(!output.contains(r#" Open Access"#)); + assert!(!output.contains(r#" 01"#)); + assert!(output.contains( + r#" + + 02 + 5.95 + EUR + + WORLD + + "# + )); + assert!(output.contains( + r#" + + 02 + 4.95 + GBP + + WORLD + + "# + )); // No subtitle supplied (within Thoth UI this would automatically update full_title) assert!(!output.contains(r#" Book Subtitle"#)); // No page count supplied @@ -1128,10 +1207,10 @@ mod tests { // No TOC supplied assert!(!output.contains(r#" 04"#)); assert!(!output.contains(r#" 1. Chapter 1"#)); - // CollateralDetail block is still present as it always contains Open Access statement - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" 00"#)); + // No items left to go in CollateralDetail block so it's omitted + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" 00"#)); // No place supplied assert!(!output.contains(r#" León, Spain"#)); // No publication date supplied @@ -1154,6 +1233,33 @@ mod tests { assert!(!output.contains(r#" B2"#)); assert!(!output.contains(r#" custom1"#)); + // Remove PDF prices but keep book "non-OA" (no licence) + test_work.publications[0].prices.clear(); + let output = generate_test_output(true, &test_work); + assert!(output.contains(r#" 01"#)); + assert!(!output.contains( + r#" + + 02 + 5.95 + EUR + + WORLD + + "# + )); + assert!(!output.contains( + r#" + + 02 + 4.95 + GBP + + WORLD + + "# + )); + // Remove the only remaining (BIC) subject // Result: error (can't generate Project MUSE ONIX without either a BIC or BISAC subject) test_work.subjects.clear(); diff --git a/thoth-export-server/src/xml/onix3_thoth.rs b/thoth-export-server/src/xml/onix3_thoth.rs index 0cc0b941..10c74de1 100644 --- a/thoth-export-server/src/xml/onix3_thoth.rs +++ b/thoth-export-server/src/xml/onix3_thoth.rs @@ -78,6 +78,7 @@ impl XmlElementBlock for Work { )); } let work_id = format!("urn:uuid:{}", self.work_id); + let is_open_access = self.license.is_some(); let isbns: Vec = self .publications .iter() @@ -420,136 +421,152 @@ impl XmlElementBlock for Work { }) }) })?; - write_element_block("CollateralDetail", w, |w| { - if let Some(mut short_abstract) = self.short_abstract.clone() { - // Short description field may not exceed 350 characters. - // Ensure that the string is truncated at a valid UTF-8 boundary - // by finding the byte index of the 350th character and then truncating - // the string at that index, to avoid creating invalid UTF-8 sequences. - if let Some((byte_index, _)) = short_abstract.char_indices().nth(350) { - short_abstract.truncate(byte_index); - } - write_element_block("TextContent", w, |w| { - // 02 Short description - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(&short_abstract)) - .map_err(|e| e.into()) - }) - })?; - } - if let Some(long_abstract) = &self.long_abstract { - // 03 Description, 30 Abstract - for text_type in ["03", "30"] { + if self.short_abstract.is_some() + || self.long_abstract.is_some() + || self.toc.is_some() + || self.general_note.is_some() + || self.cover_url.is_some() + || is_open_access + { + write_element_block("CollateralDetail", w, |w| { + if let Some(mut short_abstract) = self.short_abstract.clone() { + // Short description field may not exceed 350 characters. + // Ensure that the string is truncated at a valid UTF-8 boundary + // by finding the byte index of the 350th character and then truncating + // the string at that index, to avoid creating invalid UTF-8 sequences. + if let Some((byte_index, _)) = short_abstract.char_indices().nth(350) { + short_abstract.truncate(byte_index); + } write_element_block("TextContent", w, |w| { + // 02 Short description write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters(text_type)) - .map_err(|e| e.into()) + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) })?; // 00 Unrestricted write_element_block("ContentAudience", w, |w| { w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) })?; write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(long_abstract)) + w.write(XmlEvent::Characters(&short_abstract)) .map_err(|e| e.into()) }) })?; } - } - if let Some(toc) = &self.toc { - write_element_block("TextContent", w, |w| { - // 04 Table of contents - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) - }) - })?; - } - write_element_block("TextContent", w, |w| { - // 20 Open access statement - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("20")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_full_element_block("Text", Some(vec![("language", "eng")]), w, |w| { - w.write(XmlEvent::Characters("Open Access")) - .map_err(|e| e.into()) - }) - })?; - if let Some(general_note) = &self.general_note { - write_element_block("TextContent", w, |w| { - // 13 Publisher's notice - // "A statement included by a publisher in fulfillment of contractual obligations" - // Used in many different ways - closest approximation - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("13")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(general_note)) - .map_err(|e| e.into()) - }) - })?; - } - if let Some(cover_url) = &self.cover_url { - write_element_block("SupportingResource", w, |w| { - // 01 Front cover - write_element_block("ResourceContentType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - // 03 Image - write_element_block("ResourceMode", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - })?; - if let Some(cover_caption) = &self.cover_caption { - write_element_block("ResourceFeature", w, |w| { - // 02 Caption - write_element_block("ResourceFeatureType", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) + if let Some(long_abstract) = &self.long_abstract { + // 03 Description, 30 Abstract + for text_type in ["03", "30"] { + write_element_block("TextContent", w, |w| { + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters(text_type)) + .map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) })?; - write_element_block("FeatureNote", w, |w| { - w.write(XmlEvent::Characters(cover_caption)) + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(long_abstract)) .map_err(|e| e.into()) }) })?; } - write_element_block("ResourceVersion", w, |w| { - // 02 Downloadable file - write_element_block("ResourceForm", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) + } + if let Some(toc) = &self.toc { + write_element_block("TextContent", w, |w| { + // 04 Table of contents + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) + }) + })?; + } + if is_open_access { + write_element_block("TextContent", w, |w| { + // 20 Open access statement + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("20")).map_err(|e| e.into()) })?; - write_element_block("ResourceLink", w, |w| { - w.write(XmlEvent::Characters(cover_url)) + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_full_element_block( + "Text", + Some(vec![("language", "eng")]), + w, + |w| { + w.write(XmlEvent::Characters("Open Access")) + .map_err(|e| e.into()) + }, + ) + })?; + } + if let Some(general_note) = &self.general_note { + write_element_block("TextContent", w, |w| { + // 13 Publisher's notice + // "A statement included by a publisher in fulfillment of contractual obligations" + // Used in many different ways - closest approximation + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("13")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(general_note)) .map_err(|e| e.into()) }) - }) - })?; - } - Ok(()) - })?; + })?; + } + if let Some(cover_url) = &self.cover_url { + write_element_block("SupportingResource", w, |w| { + // 01 Front cover + write_element_block("ResourceContentType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + // 03 Image + write_element_block("ResourceMode", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + })?; + if let Some(cover_caption) = &self.cover_caption { + write_element_block("ResourceFeature", w, |w| { + // 02 Caption + write_element_block("ResourceFeatureType", w, |w| { + w.write(XmlEvent::Characters("02")) + .map_err(|e| e.into()) + })?; + write_element_block("FeatureNote", w, |w| { + w.write(XmlEvent::Characters(cover_caption)) + .map_err(|e| e.into()) + }) + })?; + } + write_element_block("ResourceVersion", w, |w| { + // 02 Downloadable file + write_element_block("ResourceForm", w, |w| { + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) + })?; + write_element_block("ResourceLink", w, |w| { + w.write(XmlEvent::Characters(cover_url)) + .map_err(|e| e.into()) + }) + }) + })?; + } + Ok(()) + })?; + } let chapter_relations: Vec = self .relations .clone() @@ -3125,6 +3142,15 @@ mod tests { 13 00 This is a general note + "# + )); + // No licence means we assume the title is non-OA + assert!(!output.contains( + r#" + + 20 + 00 + Open Access "# )); // SupportingResource block still present but ResourceFeature absent @@ -3248,6 +3274,8 @@ mod tests { "# )); + // Test truncation of short abstract + test_work.short_abstract = Some("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum vel libero eleifend, ultrices purus vitae, suscipit ligula. Aliquam ornare quam et nulla vestibulum, id euismod tellus malesuada. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nullam ornare bibendum ex nec dapibus. Proin porta risus elementum odio feugiat tempus. Etiam eu felis ac metus viverra ornare. In consectetur neque sed feugiat ornare. Mauris at purus fringilla orci tincidunt pulvinar sed a massa. Nullam vestibulum posuere augue, sit amet tincidunt nisl pulvinar ac.".to_string()); // Remove even more values test_work.edition = None; test_work.table_count = None; @@ -3283,14 +3311,14 @@ mod tests { )); assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" "#)); - // No cover URL means no SupportingResource block - CollateralDetail only contains OA statement + // No cover URL means no SupportingResource block - CollateralDetail only contains short abstract assert!(output.contains( r#" - 20 + 02 00 - Open Access + Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum vel libero eleifend, ultrices purus vitae, suscipit ligula. Aliquam ornare quam et nulla vestibulum, id euismod tellus malesuada. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nullam ornare bibendum ex nec dapibus. Proin porta risus elementu "# )); @@ -3336,8 +3364,8 @@ mod tests { test_work.relations[0].related_work.doi = None; // Remove remaining related work DOI: can't output RelatedMaterial block test_work.relations[1].related_work.doi = None; - // Test truncation of short abstract - test_work.short_abstract = Some("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum vel libero eleifend, ultrices purus vitae, suscipit ligula. Aliquam ornare quam et nulla vestibulum, id euismod tellus malesuada. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nullam ornare bibendum ex nec dapibus. Proin porta risus elementum odio feugiat tempus. Etiam eu felis ac metus viverra ornare. In consectetur neque sed feugiat ornare. Mauris at purus fringilla orci tincidunt pulvinar sed a massa. Nullam vestibulum posuere augue, sit amet tincidunt nisl pulvinar ac.".to_string()); + // Remove short abstract: can't output CollateralDetail block + test_work.short_abstract = None; // Reinstate landing page: supplier block for publisher now contains it test_work.landing_page = Some("https://www.book.com".to_string()); let output = generate_test_output(true, &test_work); @@ -3345,14 +3373,7 @@ mod tests { assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" "#)); - assert!(output.contains( - r#" - - 02 - 00 - Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum vel libero eleifend, ultrices purus vitae, suscipit ligula. Aliquam ornare quam et nulla vestibulum, id euismod tellus malesuada. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nullam ornare bibendum ex nec dapibus. Proin porta risus elementu - "# - )); + assert!(!output.contains(r#" "#)); assert!(output.contains( r#" From 02112f93cb823ba5fc68e1d6c0df2160ebfd908b Mon Sep 17 00:00:00 2001 From: rhigman <73792779+rhigman@users.noreply.github.com> Date: Mon, 16 Sep 2024 15:02:59 +0100 Subject: [PATCH 14/23] Restrict OA-specific output fields to works with licences: ONIX 2.1 --- .../src/xml/onix21_ebsco_host.rs | 231 ++++++++++++------ .../src/xml/onix21_proquest_ebrary.rs | 216 +++++++++++----- 2 files changed, 315 insertions(+), 132 deletions(-) diff --git a/thoth-export-server/src/xml/onix21_ebsco_host.rs b/thoth-export-server/src/xml/onix21_ebsco_host.rs index e426ebc0..72411587 100644 --- a/thoth-export-server/src/xml/onix21_ebsco_host.rs +++ b/thoth-export-server/src/xml/onix21_ebsco_host.rs @@ -58,31 +58,39 @@ impl XmlSpecification for Onix21EbscoHost { impl XmlElementBlock for Work { fn xml_element(&self, w: &mut EventWriter) -> ThothResult<()> { - // EBSCO Host can only accept PDFs and EPUBs, and can only - // process them as Open Access if they are unpriced - let pdf_url = self + // EBSCO Host can only accept PDFs and EPUBs + let pdf_publication = self .publications .iter() - .find(|p| { - p.publication_type.eq(&PublicationType::PDF) - && !p.locations.is_empty() - // Thoth database only accepts non-zero prices - && p.prices.is_empty() - }) + .find(|p| p.publication_type.eq(&PublicationType::PDF) && !p.locations.is_empty()); + let pdf_url = pdf_publication .and_then(|p| p.locations.iter().find(|l| l.canonical)) .and_then(|l| l.full_text_url.as_ref()); - let epub_url = self + let epub_publication = self .publications .iter() - .find(|p| { - p.publication_type.eq(&PublicationType::EPUB) - && !p.locations.is_empty() - // Thoth database only accepts non-zero prices - && p.prices.is_empty() - }) + .find(|p| p.publication_type.eq(&PublicationType::EPUB) && !p.locations.is_empty()); + let epub_url = epub_publication .and_then(|p| p.locations.iter().find(|l| l.canonical)) .and_then(|l| l.full_text_url.as_ref()); - if pdf_url.is_some() || epub_url.is_some() { + if pdf_url.is_none() && epub_url.is_none() { + return Err(ThothError::IncompleteMetadataRecord( + ONIX_ERROR.to_string(), + "No PDF or EPUB URL".to_string(), + )); + } + // EBSCO Host can only process works as Open Access if they are unpriced + let is_open_access = self.license.is_some(); + if is_open_access && + // Thoth database only accepts non-zero prices + !(pdf_publication.is_some_and(|p| p.prices.is_empty()) || + epub_publication.is_some_and(|p| p.prices.is_empty())) + { + Err(ThothError::IncompleteMetadataRecord( + ONIX_ERROR.to_string(), + "No unpriced PDF or EPUB URL (must be supplied for OA works)".to_string(), + )) + } else { let work_id = format!("urn:uuid:{}", self.work_id); let (main_isbn, isbns) = get_publications_data(&self.publications); write_element_block("Product", w, |w| { @@ -260,18 +268,20 @@ impl XmlElementBlock for Work { w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) }) })?; - write_element_block("OtherText", w, |w| { - // 47 Open access statement - // "Should always be accompanied by a link to the complete license (see code 46)" - // (not specified as required by EBSCO Host themselves) - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("47")).map_err(|e| e.into()) + if is_open_access { + write_element_block("OtherText", w, |w| { + // 47 Open access statement + // "Should always be accompanied by a link to the complete license (see code 46)" + // (not specified as required by EBSCO Host themselves) + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("47")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters("Open access - no commercial use")) + .map_err(|e| e.into()) + }) })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters("Open access - no commercial use")) - .map_err(|e| e.into()) - }) - })?; + } if let Some(license) = &self.license { write_element_block("OtherText", w, |w| { // 46 License @@ -408,34 +418,58 @@ impl XmlElementBlock for Work { write_element_block("ProductAvailability", w, |w| { w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) })?; - // R Restrictions apply, see note - write_element_block("AudienceRestrictionFlag", w, |w| { - w.write(XmlEvent::Characters("R")).map_err(|e| e.into()) - })?; - write_element_block("AudienceRestrictionNote", w, |w| { - w.write(XmlEvent::Characters("Open access")) - .map_err(|e| e.into()) - })?; - // EBSCO Host require the price point for Open Access titles to be listed as "0.01 USD". - write_element_block("Price", w, |w| { - // 01 RRP excluding tax (price code requested by EBSCO) - write_element_block("PriceTypeCode", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + if is_open_access { + // R Restrictions apply, see note + write_element_block("AudienceRestrictionFlag", w, |w| { + w.write(XmlEvent::Characters("R")).map_err(|e| e.into()) })?; - write_element_block("PriceAmount", w, |w| { - w.write(XmlEvent::Characters("0.01")).map_err(|e| e.into()) + write_element_block("AudienceRestrictionNote", w, |w| { + w.write(XmlEvent::Characters("Open access")) + .map_err(|e| e.into()) })?; - write_element_block("CurrencyCode", w, |w| { - w.write(XmlEvent::Characters("USD")).map_err(|e| e.into()) + } + let publication = match pdf_url.is_some() { + true => pdf_publication, + false => epub_publication, + }; + let prices = publication.map(|p| p.prices.clone()).unwrap_or_default(); + if is_open_access || prices.is_empty() { + // EBSCO Host require the price point for unpriced/Open Access titles to be listed as "0.01 USD". + write_element_block("Price", w, |w| { + // 01 RRP excluding tax (price code requested by EBSCO) + write_element_block("PriceTypeCode", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("PriceAmount", w, |w| { + w.write(XmlEvent::Characters("0.01")).map_err(|e| e.into()) + })?; + write_element_block("CurrencyCode", w, |w| { + w.write(XmlEvent::Characters("USD")).map_err(|e| e.into()) + }) }) - }) + } else { + for price in prices { + let unit_price = price.unit_price; + let formatted_price = format!("{unit_price:.2}"); + write_element_block("Price", w, |w| { + // 01 RRP excluding tax (price code requested by EBSCO) + write_element_block("PriceTypeCode", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("PriceAmount", w, |w| { + w.write(XmlEvent::Characters(&formatted_price)) + .map_err(|e| e.into()) + })?; + write_element_block("CurrencyCode", w, |w| { + w.write(XmlEvent::Characters(&price.currency_code.to_string())) + .map_err(|e| e.into()) + }) + })?; + } + Ok(()) + } }) }) - } else { - Err(ThothError::IncompleteMetadataRecord( - ONIX_ERROR.to_string(), - "No unpriced PDF or EPUB URL".to_string(), - )) } } } @@ -960,7 +994,16 @@ mod tests { depth_in: None, weight_g: None, weight_oz: None, - prices: vec![], + prices: vec![ + WorkPublicationsPrices { + currency_code: CurrencyCode::GBP, + unit_price: 5.99, + }, + WorkPublicationsPrices { + currency_code: CurrencyCode::EUR, + unit_price: 7.99, + }, + ], locations: vec![WorkPublicationsLocations { landing_page: Some("https://www.book.com/pdf_landing".to_string()), full_text_url: Some("https://www.book.com/pdf_fulltext".to_string()), @@ -1113,17 +1156,11 @@ mod tests { test_work.long_abstract = None; test_work.place = None; test_work.publication_date = None; - test_work.license = None; test_work.landing_page = None; test_work.cover_url = None; test_work.imprint.publisher.publisher_url = None; // Remove third (paperback) publication test_work.publications.pop(); - // Give PDF publication a positive price point - test_work.publications[1].prices = vec![WorkPublicationsPrices { - currency_code: CurrencyCode::USD, - unit_price: 7.99, - }]; let output = generate_test_output(true, &test_work); // Paperback publication removed, so its ISBN no longer appears // (either as the main ISBN or in RelatedProducts) @@ -1141,12 +1178,6 @@ mod tests { r#" Publisher's website: web shop"# )); assert!(!output.contains(r#" https://www.book.com"#)); - // PDF publication is no longer unpriced, hence no PDF URL, and EpubType changes - assert!( - !output.contains(r#" https://www.book.com/pdf_fulltext"#) - ); - assert!(!output.contains(r#" 002"#)); - assert!(output.contains(r#" 029"#)); // No page count supplied assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" 00"#)); @@ -1156,11 +1187,6 @@ mod tests { assert!(!output.contains(r#" 03"#)); assert!(!output.contains(r#" 06"#)); assert!(!output.contains(r#" Lorem ipsum dolor sit amet"#)); - // No licence supplied - assert!(!output.contains(r#" 46"#)); - assert!( - !output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#) - ); // No cover URL supplied assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" 04"#)); @@ -1176,13 +1202,78 @@ mod tests { // No publication date supplied assert!(!output.contains(r#" 19991231"#)); assert!(!output.contains(r#" 1999"#)); + // No licence supplied: assume non-OA, output real PDF price + assert!(!output.contains(r#" 47"#)); + assert!(!output.contains(r#" Open access - no commercial use"#)); + assert!(!output.contains(r#" 46"#)); + assert!( + !output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#) + ); + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" R"#)); + assert!(!output + .contains(r#" Open access"#)); + assert!(!output.contains(r#" 0.01"#)); + assert!(!output.contains(r#" USD"#)); + assert!(output.contains(r#" 5.99"#)); + assert!(output.contains(r#" GBP"#)); + assert!(output.contains(r#" 7.99"#)); + assert!(output.contains(r#" EUR"#)); + + // Remove PDF location + test_work.publications[1].locations.clear(); + let output = generate_test_output(true, &test_work); + // PDF no longer has a URL, so EpubType changes, and EPUB price (unpriced) is output + assert!( + !output.contains(r#" https://www.book.com/pdf_fulltext"#) + ); + assert!(!output.contains(r#" 002"#)); + assert!(output.contains(r#" 029"#)); + assert!(!output.contains(r#" 5.99"#)); + assert!(!output.contains(r#" GBP"#)); + assert!(!output.contains(r#" 7.99"#)); + assert!(!output.contains(r#" EUR"#)); + assert!(output.contains(r#" 0.01"#)); + assert!(output.contains(r#" USD"#)); + + // Give EPUB a price + test_work.publications[0].prices = vec![WorkPublicationsPrices { + currency_code: CurrencyCode::AUD, + unit_price: 10.00, + }]; + let output = generate_test_output(true, &test_work); + assert!(!output.contains(r#" 5.99"#)); + assert!(!output.contains(r#" GBP"#)); + assert!(!output.contains(r#" 7.99"#)); + assert!(!output.contains(r#" EUR"#)); + assert!(!output.contains(r#" 0.01"#)); + assert!(!output.contains(r#" USD"#)); + assert!(output.contains(r#" 10.00"#)); + assert!(output.contains(r#" AUD"#)); + + // Replace licence: error + test_work.license = Some("https://creativecommons.org/licenses/by/4.0/".to_string()); + let output = generate_test_output(false, &test_work); + assert_eq!( + output, + "Could not generate onix_2.1::ebsco_host: No unpriced PDF or EPUB URL (must be supplied for OA works)".to_string() + ); // Remove the EPUB publication's only location: error test_work.publications[0].locations.clear(); let output = generate_test_output(false, &test_work); assert_eq!( output, - "Could not generate onix_2.1::ebsco_host: No unpriced PDF or EPUB URL".to_string() + "Could not generate onix_2.1::ebsco_host: No PDF or EPUB URL".to_string() + ); + + // This occurs whether or not work is OA/priced + test_work.license = None; + test_work.publications[0].prices.clear(); + test_work.publications[1].prices.clear(); + assert_eq!( + output, + "Could not generate onix_2.1::ebsco_host: No PDF or EPUB URL".to_string() ); } } diff --git a/thoth-export-server/src/xml/onix21_proquest_ebrary.rs b/thoth-export-server/src/xml/onix21_proquest_ebrary.rs index 3c23d298..3f7824a5 100644 --- a/thoth-export-server/src/xml/onix21_proquest_ebrary.rs +++ b/thoth-export-server/src/xml/onix21_proquest_ebrary.rs @@ -60,31 +60,39 @@ impl XmlSpecification for Onix21ProquestEbrary { impl XmlElementBlock for Work { fn xml_element(&self, w: &mut EventWriter) -> ThothResult<()> { - // ProQuest Ebrary can only accept PDFs and EPUBs, and can only - // process them as Open Access if they are unpriced - let pdf_url = self + // ProQuest Ebrary can only accept PDFs and EPUBs + let pdf_publication = self .publications .iter() - .find(|p| { - p.publication_type.eq(&PublicationType::PDF) - && !p.locations.is_empty() - // Thoth database only accepts non-zero prices - && p.prices.is_empty() - }) + .find(|p| p.publication_type.eq(&PublicationType::PDF) && !p.locations.is_empty()); + let pdf_url = pdf_publication .and_then(|p| p.locations.iter().find(|l| l.canonical)) .and_then(|l| l.full_text_url.as_ref()); - let epub_url = self + let epub_publication = self .publications .iter() - .find(|p| { - p.publication_type.eq(&PublicationType::EPUB) - && !p.locations.is_empty() - // Thoth database only accepts non-zero prices - && p.prices.is_empty() - }) + .find(|p| p.publication_type.eq(&PublicationType::EPUB) && !p.locations.is_empty()); + let epub_url = epub_publication .and_then(|p| p.locations.iter().find(|l| l.canonical)) .and_then(|l| l.full_text_url.as_ref()); - if pdf_url.is_some() || epub_url.is_some() { + if pdf_url.is_none() && epub_url.is_none() { + return Err(ThothError::IncompleteMetadataRecord( + ONIX_ERROR.to_string(), + "No PDF or EPUB URL".to_string(), + )); + } + // ProQuest Ebrary can only process works as Open Access if they are unpriced + let is_open_access = self.license.is_some(); + if is_open_access && + // Thoth database only accepts non-zero prices + !(pdf_publication.is_some_and(|p| p.prices.is_empty()) || + epub_publication.is_some_and(|p| p.prices.is_empty())) + { + Err(ThothError::IncompleteMetadataRecord( + ONIX_ERROR.to_string(), + "No unpriced PDF or EPUB URL (must be supplied for OA works)".to_string(), + )) + } else { let work_id = format!("urn:uuid:{}", self.work_id); let (main_isbn, isbns) = get_publications_data(&self.publications); write_element_block("Product", w, |w| { @@ -261,16 +269,18 @@ impl XmlElementBlock for Work { w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) }) })?; - write_element_block("OtherText", w, |w| { - // 47 Open access statement - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("47")).map_err(|e| e.into()) + if is_open_access { + write_element_block("OtherText", w, |w| { + // 47 Open access statement + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("47")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters("Open access - no commercial use")) + .map_err(|e| e.into()) + }) })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters("Open access - no commercial use")) - .map_err(|e| e.into()) - }) - })?; + } if let Some(license) = &self.license { write_element_block("OtherText", w, |w| { // 46 License @@ -402,25 +412,48 @@ impl XmlElementBlock for Work { write_element_block("ProductAvailability", w, |w| { w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) })?; - // R Restrictions apply, see note - write_element_block("AudienceRestrictionFlag", w, |w| { - w.write(XmlEvent::Characters("R")).map_err(|e| e.into()) - })?; - write_element_block("AudienceRestrictionNote", w, |w| { - w.write(XmlEvent::Characters("Open access")) - .map_err(|e| e.into()) - })?; - // ProQuest Ebrary require Open Access titles to be listed as 01 Free of charge - write_element_block("UnpricedItemType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - }) + if is_open_access { + // R Restrictions apply, see note + write_element_block("AudienceRestrictionFlag", w, |w| { + w.write(XmlEvent::Characters("R")).map_err(|e| e.into()) + })?; + write_element_block("AudienceRestrictionNote", w, |w| { + w.write(XmlEvent::Characters("Open access")) + .map_err(|e| e.into()) + })?; + } + let publication = match pdf_url.is_some() { + true => pdf_publication, + false => epub_publication, + }; + let prices = publication.map(|p| p.prices.clone()).unwrap_or_default(); + if is_open_access || prices.is_empty() { + write_element_block("UnpricedItemType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + }) + } else { + for price in prices { + let unit_price = price.unit_price; + let formatted_price = format!("{unit_price:.2}"); + write_element_block("Price", w, |w| { + // 02 RRP including tax + write_element_block("PriceTypeCode", w, |w| { + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) + })?; + write_element_block("PriceAmount", w, |w| { + w.write(XmlEvent::Characters(&formatted_price)) + .map_err(|e| e.into()) + })?; + write_element_block("CurrencyCode", w, |w| { + w.write(XmlEvent::Characters(&price.currency_code.to_string())) + .map_err(|e| e.into()) + }) + })?; + } + Ok(()) + } }) }) - } else { - Err(ThothError::IncompleteMetadataRecord( - ONIX_ERROR.to_string(), - "No unpriced PDF or EPUB URL".to_string(), - )) } } } @@ -939,7 +972,16 @@ mod tests { depth_in: None, weight_g: None, weight_oz: None, - prices: vec![], + prices: vec![ + WorkPublicationsPrices { + currency_code: CurrencyCode::GBP, + unit_price: 5.99, + }, + WorkPublicationsPrices { + currency_code: CurrencyCode::EUR, + unit_price: 7.99, + }, + ], locations: vec![WorkPublicationsLocations { landing_page: Some("https://www.book.com/pdf_landing".to_string()), full_text_url: Some("https://www.book.com/pdf_fulltext".to_string()), @@ -1088,17 +1130,11 @@ mod tests { test_work.long_abstract = None; test_work.place = None; test_work.publication_date = None; - test_work.license = None; test_work.landing_page = None; test_work.cover_url = None; test_work.imprint.publisher.publisher_url = None; // Remove third (paperback) publication test_work.publications.pop(); - // Give PDF publication a positive price point - test_work.publications[1].prices = vec![WorkPublicationsPrices { - currency_code: CurrencyCode::USD, - unit_price: 7.99, - }]; let output = generate_test_output(true, &test_work); // Paperback publication removed, so its ISBN no longer appears // (either as the main ISBN or in RelatedProducts) @@ -1116,12 +1152,6 @@ mod tests { r#" Publisher's website: web shop"# )); assert!(!output.contains(r#" https://www.book.com"#)); - // PDF publication is no longer unpriced, hence no PDF URL, and EpubType changes - assert!( - !output.contains(r#" https://www.book.com/pdf_fulltext"#) - ); - assert!(!output.contains(r#" 002"#)); - assert!(output.contains(r#" 029"#)); // No page count supplied assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" 00"#)); @@ -1131,11 +1161,6 @@ mod tests { assert!(!output.contains(r#" 03"#)); assert!(!output.contains(r#" 06"#)); assert!(!output.contains(r#" Lorem ipsum dolor sit amet"#)); - // No licence supplied - assert!(!output.contains(r#" 46"#)); - assert!( - !output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#) - ); // No cover URL supplied assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" 04"#)); @@ -1151,13 +1176,80 @@ mod tests { // No publication date supplied assert!(!output.contains(r#" 19991231"#)); assert!(!output.contains(r#" 1999"#)); + // No licence supplied: assume non-OA, output real PDF prices + assert!(!output.contains(r#" 47"#)); + assert!(!output.contains(r#" Open access - no commercial use"#)); + assert!(!output.contains(r#" 46"#)); + assert!( + !output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#) + ); + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" R"#)); + assert!(!output + .contains(r#" Open access"#)); + assert!(!output.contains(r#" 01"#)); + assert!(output.contains(r#" "#)); + assert!(output.contains(r#" 02"#)); + assert!(output.contains(r#" 5.99"#)); + assert!(output.contains(r#" GBP"#)); + assert!(output.contains(r#" 7.99"#)); + assert!(output.contains(r#" EUR"#)); + + // Remove PDF location + test_work.publications[1].locations.clear(); + let output = generate_test_output(true, &test_work); + // PDF no longer has a URL, so EpubType changes, and EPUB price (unpriced) is output + assert!( + !output.contains(r#" https://www.book.com/pdf_fulltext"#) + ); + assert!(!output.contains(r#" 002"#)); + assert!(output.contains(r#" 029"#)); + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" 02"#)); + assert!(!output.contains(r#" 5.99"#)); + assert!(!output.contains(r#" GBP"#)); + assert!(output.contains(r#" 01"#)); + + // Give EPUB a price + test_work.publications[0].prices = vec![WorkPublicationsPrices { + currency_code: CurrencyCode::AUD, + unit_price: 10.00, + }]; + let output = generate_test_output(true, &test_work); + assert!(!output.contains(r#" 01"#)); + assert!(!output.contains(r#" 5.99"#)); + assert!(!output.contains(r#" GBP"#)); + assert!(!output.contains(r#" 7.99"#)); + assert!(!output.contains(r#" EUR"#)); + assert!(output.contains(r#" "#)); + assert!(output.contains(r#" 02"#)); + assert!(output.contains(r#" 10.00"#)); + assert!(output.contains(r#" AUD"#)); + + // Replace licence: error + test_work.license = Some("https://creativecommons.org/licenses/by/4.0/".to_string()); + let output = generate_test_output(false, &test_work); + assert_eq!( + output, + "Could not generate onix_2.1::proquest_ebrary: No unpriced PDF or EPUB URL (must be supplied for OA works)".to_string() + ); // Remove the EPUB publication's only location: error test_work.publications[0].locations.clear(); let output = generate_test_output(false, &test_work); assert_eq!( output, - "Could not generate onix_2.1::proquest_ebrary: No unpriced PDF or EPUB URL".to_string() + "Could not generate onix_2.1::proquest_ebrary: No PDF or EPUB URL".to_string() + ); + + // This occurs whether or not work is OA/priced + test_work.license = None; + test_work.publications[0].prices.clear(); + test_work.publications[1].prices.clear(); + let output = generate_test_output(false, &test_work); + assert_eq!( + output, + "Could not generate onix_2.1::proquest_ebrary: No PDF or EPUB URL".to_string() ); } } From 54dafac8e90ca57ba1b936e5295dcd4283e74368 Mon Sep 17 00:00:00 2001 From: rhigman <73792779+rhigman@users.noreply.github.com> Date: Mon, 16 Sep 2024 16:20:17 +0100 Subject: [PATCH 15/23] Don't output OAPEN ONIX for works without licences (platform is OA-only) --- thoth-export-server/src/xml/onix3_oapen.rs | 54 ++++++++++++---------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/thoth-export-server/src/xml/onix3_oapen.rs b/thoth-export-server/src/xml/onix3_oapen.rs index 16174d22..f484868f 100644 --- a/thoth-export-server/src/xml/onix3_oapen.rs +++ b/thoth-export-server/src/xml/onix3_oapen.rs @@ -60,6 +60,13 @@ impl XmlSpecification for Onix3Oapen { impl XmlElementBlock for Work { fn xml_element(&self, w: &mut EventWriter) -> ThothResult<()> { + // Don't output works with no licence, as we assume these are non-OA + if self.license.is_none() { + return Err(ThothError::IncompleteMetadataRecord( + ONIX_ERROR.to_string(), + "Missing License".to_string(), + )); + } // We can only generate the document if there's a PDF if let Some(pdf_url) = self .publications @@ -131,22 +138,21 @@ impl XmlElementBlock for Work { write_element_block("PrimaryContentType", w, |w| { w.write(XmlEvent::Characters("10")).map_err(|e| e.into()) })?; - if let Some(license) = &self.license { - write_element_block("EpubLicense", w, |w| { - write_element_block("EpubLicenseName", w, |w| { - w.write(XmlEvent::Characters("Creative Commons License")) - .map_err(|e| e.into()) + write_element_block("EpubLicense", w, |w| { + write_element_block("EpubLicenseName", w, |w| { + w.write(XmlEvent::Characters("Creative Commons License")) + .map_err(|e| e.into()) + })?; + write_element_block("EpubLicenseExpression", w, |w| { + write_element_block("EpubLicenseExpressionType", w, |w| { + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) })?; - write_element_block("EpubLicenseExpression", w, |w| { - write_element_block("EpubLicenseExpressionType", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) - })?; - write_element_block("EpubLicenseExpressionLink", w, |w| { - w.write(XmlEvent::Characters(license)).map_err(|e| e.into()) - }) + write_element_block("EpubLicenseExpressionLink", w, |w| { + w.write(XmlEvent::Characters(self.license.as_ref().unwrap())) + .map_err(|e| e.into()) }) - })?; - } + }) + })?; for issue in &self.issues { XmlElementBlock::::xml_element(issue, w).ok(); } @@ -1141,7 +1147,6 @@ mod tests { // Remove some values to test non-output of optional blocks test_work.doi = None; - test_work.license = None; test_work.subtitle = None; test_work.page_count = None; test_work.long_abstract = None; @@ -1152,14 +1157,6 @@ mod tests { // No DOI supplied assert!(!output.contains(r#" 06"#)); assert!(!output.contains(r#" 10.00001/BOOK.0001"#)); - // No licence supplied - assert!(!output.contains(r#" "#)); - assert!(!output - .contains(r#" Creative Commons License"#)); - assert!(!output.contains(r#" "#)); - assert!(!output - .contains(r#" 02"#)); - assert!(!output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#)); // No subtitle supplied (within Thoth UI this would automatically update full_title) assert!(!output.contains(r#" Book Subtitle"#)); // No page count supplied @@ -1241,8 +1238,17 @@ mod tests { assert!(!output .contains(r#" "https://www.book.com/cover""#)); - // Remove the only publication, which is the PDF + // Remove licence. Result: error + test_work.license = None; + let output = generate_test_output(false, &test_work); + assert_eq!( + output, + "Could not generate onix_3.0::oapen: Missing License".to_string() + ); + + // Replace licence, but remove the only publication, which is the PDF // Result: error (can't generate OAPEN ONIX without PDF URL) + test_work.license = Some("https://creativecommons.org/licenses/by/4.0/".to_string()); test_work.publications.clear(); let output = generate_test_output(false, &test_work); assert_eq!( From fde92de20acaa56b793c5d342414cebe97007dc4 Mon Sep 17 00:00:00 2001 From: rhigman <73792779+rhigman@users.noreply.github.com> Date: Mon, 16 Sep 2024 16:26:33 +0100 Subject: [PATCH 16/23] Explicitly return from fast-fail checks to reduce branching --- .../src/xml/onix21_ebsco_host.rs | 643 +++++++++--------- .../src/xml/onix21_proquest_ebrary.rs | 621 +++++++++-------- .../src/xml/onix3_google_books.rs | 1 - 3 files changed, 631 insertions(+), 634 deletions(-) diff --git a/thoth-export-server/src/xml/onix21_ebsco_host.rs b/thoth-export-server/src/xml/onix21_ebsco_host.rs index 72411587..9751ffea 100644 --- a/thoth-export-server/src/xml/onix21_ebsco_host.rs +++ b/thoth-export-server/src/xml/onix21_ebsco_host.rs @@ -86,391 +86,390 @@ impl XmlElementBlock for Work { !(pdf_publication.is_some_and(|p| p.prices.is_empty()) || epub_publication.is_some_and(|p| p.prices.is_empty())) { - Err(ThothError::IncompleteMetadataRecord( + return Err(ThothError::IncompleteMetadataRecord( ONIX_ERROR.to_string(), "No unpriced PDF or EPUB URL (must be supplied for OA works)".to_string(), - )) - } else { - let work_id = format!("urn:uuid:{}", self.work_id); - let (main_isbn, isbns) = get_publications_data(&self.publications); - write_element_block("Product", w, |w| { - write_element_block("RecordReference", w, |w| { + )); + } + let work_id = format!("urn:uuid:{}", self.work_id); + let (main_isbn, isbns) = get_publications_data(&self.publications); + write_element_block("Product", w, |w| { + write_element_block("RecordReference", w, |w| { + w.write(XmlEvent::Characters(&work_id)) + .map_err(|e| e.into()) + })?; + // 03 Notification confirmed on publication + write_element_block("NotificationType", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + })?; + // 01 Publisher + write_element_block("RecordSourceType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("ProductIdentifier", w, |w| { + // 01 Proprietary + write_element_block("ProductIDType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("IDValue", w, |w| { w.write(XmlEvent::Characters(&work_id)) .map_err(|e| e.into()) + }) + })?; + if let Some(isbn) = &main_isbn { + write_element_block("ProductIdentifier", w, |w| { + // 15 ISBN-13 + write_element_block("ProductIDType", w, |w| { + w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) + })?; + write_element_block("IDValue", w, |w| { + w.write(XmlEvent::Characters(isbn)).map_err(|e| e.into()) + }) })?; - // 03 Notification confirmed on publication - write_element_block("NotificationType", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - })?; - // 01 Publisher - write_element_block("RecordSourceType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; + } + if let Some(doi) = &self.doi { write_element_block("ProductIdentifier", w, |w| { - // 01 Proprietary write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) })?; write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&work_id)) + w.write(XmlEvent::Characters(&doi.to_string())) .map_err(|e| e.into()) }) })?; - if let Some(isbn) = &main_isbn { - write_element_block("ProductIdentifier", w, |w| { - // 15 ISBN-13 - write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) - })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(isbn)).map_err(|e| e.into()) - }) - })?; - } - if let Some(doi) = &self.doi { - write_element_block("ProductIdentifier", w, |w| { - write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) - })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&doi.to_string())) - .map_err(|e| e.into()) - }) - })?; + } + // DG Electronic book text in proprietary or open standard format + write_element_block("ProductForm", w, |w| { + w.write(XmlEvent::Characters("DG")).map_err(|e| e.into()) + })?; + write_element_block("EpubType", w, |w| { + // 002 PDF + let mut epub_type = "002"; + // We definitely have either a PDF URL or an EPUB URL (or both) + if pdf_url.is_none() { + // 029 EPUB + epub_type = "029"; } - // DG Electronic book text in proprietary or open standard format - write_element_block("ProductForm", w, |w| { - w.write(XmlEvent::Characters("DG")).map_err(|e| e.into()) + w.write(XmlEvent::Characters(epub_type)) + .map_err(|e| e.into()) + })?; + for issue in &self.issues { + XmlElementBlock::::xml_element(issue, w).ok(); + } + write_element_block("Title", w, |w| { + // 01 Distinctive title (book) + write_element_block("TitleType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - write_element_block("EpubType", w, |w| { - // 002 PDF - let mut epub_type = "002"; - // We definitely have either a PDF URL or an EPUB URL (or both) - if pdf_url.is_none() { - // 029 EPUB - epub_type = "029"; - } - w.write(XmlEvent::Characters(epub_type)) + write_element_block("TitleText", w, |w| { + w.write(XmlEvent::Characters(&self.title)) .map_err(|e| e.into()) })?; - for issue in &self.issues { - XmlElementBlock::::xml_element(issue, w).ok(); - } - write_element_block("Title", w, |w| { - // 01 Distinctive title (book) - write_element_block("TitleType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - write_element_block("TitleText", w, |w| { - w.write(XmlEvent::Characters(&self.title)) + if let Some(subtitle) = &self.subtitle { + write_element_block("Subtitle", w, |w| { + w.write(XmlEvent::Characters(subtitle)) .map_err(|e| e.into()) })?; - if let Some(subtitle) = &self.subtitle { - write_element_block("Subtitle", w, |w| { - w.write(XmlEvent::Characters(subtitle)) - .map_err(|e| e.into()) - })?; - } - Ok(()) + } + Ok(()) + })?; + write_element_block("WorkIdentifier", w, |w| { + // 01 Proprietary + write_element_block("WorkIDType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - write_element_block("WorkIdentifier", w, |w| { - // 01 Proprietary - write_element_block("WorkIDType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - write_element_block("IDTypeName", w, |w| { - w.write(XmlEvent::Characters("Thoth WorkID")) + write_element_block("IDTypeName", w, |w| { + w.write(XmlEvent::Characters("Thoth WorkID")) + .map_err(|e| e.into()) + })?; + write_element_block("IDValue", w, |w| { + w.write(XmlEvent::Characters(&work_id)) + .map_err(|e| e.into()) + }) + })?; + let mut websites: HashMap = HashMap::new(); + if let Some(pdf) = pdf_url { + websites.insert( + pdf.to_string(), + ( + "29".to_string(), + "Publisher's website: download the title".to_string(), + ), + ); + } + if let Some(epub) = epub_url { + websites.insert( + epub.to_string(), + ( + "29".to_string(), + "Publisher's website: download the title".to_string(), + ), + ); + } + if let Some(landing_page) = &self.landing_page { + websites.insert( + landing_page.to_string(), + ( + "02".to_string(), + "Publisher's website: web shop".to_string(), + ), + ); + } + for (url, description) in websites.iter() { + write_element_block("Website", w, |w| { + write_element_block("WebsiteRole", w, |w| { + w.write(XmlEvent::Characters(&description.0)) .map_err(|e| e.into()) })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&work_id)) + write_element_block("WebsiteDescription", w, |w| { + w.write(XmlEvent::Characters(&description.1)) .map_err(|e| e.into()) + })?; + write_element_block("WebsiteLink", w, |w| { + w.write(XmlEvent::Characters(url)).map_err(|e| e.into()) }) })?; - let mut websites: HashMap = HashMap::new(); - if let Some(pdf) = pdf_url { - websites.insert( - pdf.to_string(), - ( - "29".to_string(), - "Publisher's website: download the title".to_string(), - ), - ); - } - if let Some(epub) = epub_url { - websites.insert( - epub.to_string(), - ( - "29".to_string(), - "Publisher's website: download the title".to_string(), - ), - ); - } - if let Some(landing_page) = &self.landing_page { - websites.insert( - landing_page.to_string(), - ( - "02".to_string(), - "Publisher's website: web shop".to_string(), - ), - ); - } - for (url, description) in websites.iter() { - write_element_block("Website", w, |w| { - write_element_block("WebsiteRole", w, |w| { - w.write(XmlEvent::Characters(&description.0)) - .map_err(|e| e.into()) - })?; - write_element_block("WebsiteDescription", w, |w| { - w.write(XmlEvent::Characters(&description.1)) - .map_err(|e| e.into()) - })?; - write_element_block("WebsiteLink", w, |w| { - w.write(XmlEvent::Characters(url)).map_err(|e| e.into()) - }) - })?; - } - for contribution in &self.contributions { - // A51 Research by is not supported in ONIX 2 - if contribution.contribution_type != ContributionType::RESEARCH_BY { - XmlElementBlock::::xml_element(contribution, w).ok(); - } - } - for language in &self.languages { - XmlElementBlock::::xml_element(language, w).ok(); + } + for contribution in &self.contributions { + // A51 Research by is not supported in ONIX 2 + if contribution.contribution_type != ContributionType::RESEARCH_BY { + XmlElementBlock::::xml_element(contribution, w).ok(); } - if let Some(page_count) = self.page_count { - write_element_block("Extent", w, |w| { - // 00 Main content - write_element_block("ExtentType", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("ExtentValue", w, |w| { - w.write(XmlEvent::Characters(&page_count.to_string())) - .map_err(|e| e.into()) - })?; - // 03 Pages - write_element_block("ExtentUnit", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - }) + } + for language in &self.languages { + XmlElementBlock::::xml_element(language, w).ok(); + } + if let Some(page_count) = self.page_count { + write_element_block("Extent", w, |w| { + // 00 Main content + write_element_block("ExtentType", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) })?; - } - for subject in &self.subjects { - XmlElementBlock::::xml_element(subject, w).ok(); - } - write_element_block("Audience", w, |w| { - // 01 ONIX audience codes - write_element_block("AudienceCodeType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + write_element_block("ExtentValue", w, |w| { + w.write(XmlEvent::Characters(&page_count.to_string())) + .map_err(|e| e.into()) })?; - // 06 Professional and scholarly - write_element_block("AudienceCodeValue", w, |w| { - w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) + // 03 Pages + write_element_block("ExtentUnit", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) }) })?; - if is_open_access { - write_element_block("OtherText", w, |w| { - // 47 Open access statement - // "Should always be accompanied by a link to the complete license (see code 46)" - // (not specified as required by EBSCO Host themselves) - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("47")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters("Open access - no commercial use")) - .map_err(|e| e.into()) - }) + } + for subject in &self.subjects { + XmlElementBlock::::xml_element(subject, w).ok(); + } + write_element_block("Audience", w, |w| { + // 01 ONIX audience codes + write_element_block("AudienceCodeType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + // 06 Professional and scholarly + write_element_block("AudienceCodeValue", w, |w| { + w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) + }) + })?; + if is_open_access { + write_element_block("OtherText", w, |w| { + // 47 Open access statement + // "Should always be accompanied by a link to the complete license (see code 46)" + // (not specified as required by EBSCO Host themselves) + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("47")).map_err(|e| e.into()) })?; - } - if let Some(license) = &self.license { - write_element_block("OtherText", w, |w| { - // 46 License - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("46")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(license)).map_err(|e| e.into()) - }) + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters("Open access - no commercial use")) + .map_err(|e| e.into()) + }) + })?; + } + if let Some(license) = &self.license { + write_element_block("OtherText", w, |w| { + // 46 License + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("46")).map_err(|e| e.into()) })?; - } - if let Some(labstract) = &self.long_abstract { - write_element_block("OtherText", w, |w| { - // 03 Long description - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - })?; - // 06 Default text format - write_element_block("TextFormat", w, |w| { - w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(labstract)) - .map_err(|e| e.into()) - }) + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(license)).map_err(|e| e.into()) + }) + })?; + } + if let Some(labstract) = &self.long_abstract { + write_element_block("OtherText", w, |w| { + // 03 Long description + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) })?; - } - if let Some(cover_url) = &self.cover_url { - write_element_block("MediaFile", w, |w| { - // 04 Image: front cover - write_element_block("MediaFileTypeCode", w, |w| { - w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) - })?; - // 01 URL - write_element_block("MediaFileLinkTypeCode", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - write_element_block("MediaFileLink", w, |w| { - w.write(XmlEvent::Characters(cover_url)) - .map_err(|e| e.into()) - }) + // 06 Default text format + write_element_block("TextFormat", w, |w| { + w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) })?; - } - write_element_block("Imprint", w, |w| { - write_element_block("ImprintName", w, |w| { - w.write(XmlEvent::Characters(&self.imprint.imprint_name)) + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(labstract)) .map_err(|e| e.into()) }) })?; - write_element_block("Publisher", w, |w| { - // 01 Publisher - write_element_block("PublishingRole", w, |w| { + } + if let Some(cover_url) = &self.cover_url { + write_element_block("MediaFile", w, |w| { + // 04 Image: front cover + write_element_block("MediaFileTypeCode", w, |w| { + w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) + })?; + // 01 URL + write_element_block("MediaFileLinkTypeCode", w, |w| { w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - write_element_block("PublisherName", w, |w| { - w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) + write_element_block("MediaFileLink", w, |w| { + w.write(XmlEvent::Characters(cover_url)) .map_err(|e| e.into()) - })?; - if let Some(publisher_url) = &self.imprint.publisher.publisher_url { - write_element_block("Website", w, |w| { - write_element_block("WebsiteLink", w, |w| { - w.write(XmlEvent::Characters(publisher_url)) - .map_err(|e| e.into()) - }) - })?; - } - Ok(()) + }) })?; - if let Some(place) = &self.place { - write_element_block("CityOfPublication", w, |w| { - w.write(XmlEvent::Characters(place)).map_err(|e| e.into()) - })?; - } - XmlElement::::xml_element(&self.work_status, w)?; - if let Some(date) = self.publication_date { - write_element_block("PublicationDate", w, |w| { - w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) - .map_err(|e| e.into()) - })?; - write_element_block("CopyrightYear", w, |w| { - w.write(XmlEvent::Characters(&date.format("%Y").to_string())) - .map_err(|e| e.into()) + } + write_element_block("Imprint", w, |w| { + write_element_block("ImprintName", w, |w| { + w.write(XmlEvent::Characters(&self.imprint.imprint_name)) + .map_err(|e| e.into()) + }) + })?; + write_element_block("Publisher", w, |w| { + // 01 Publisher + write_element_block("PublishingRole", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("PublisherName", w, |w| { + w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) + .map_err(|e| e.into()) + })?; + if let Some(publisher_url) = &self.imprint.publisher.publisher_url { + write_element_block("Website", w, |w| { + write_element_block("WebsiteLink", w, |w| { + w.write(XmlEvent::Characters(publisher_url)) + .map_err(|e| e.into()) + }) })?; } - write_element_block("SalesRights", w, |w| { - // 02 For sale with non-exclusive rights in the specified countries or territories - write_element_block("SalesRightsType", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) - })?; - write_element_block("RightsTerritory", w, |w| { - w.write(XmlEvent::Characters("WORLD")).map_err(|e| e.into()) - }) + Ok(()) + })?; + if let Some(place) = &self.place { + write_element_block("CityOfPublication", w, |w| { + w.write(XmlEvent::Characters(place)).map_err(|e| e.into()) + })?; + } + XmlElement::::xml_element(&self.work_status, w)?; + if let Some(date) = self.publication_date { + write_element_block("PublicationDate", w, |w| { + w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) + .map_err(|e| e.into()) + })?; + write_element_block("CopyrightYear", w, |w| { + w.write(XmlEvent::Characters(&date.format("%Y").to_string())) + .map_err(|e| e.into()) })?; - if !isbns.is_empty() { - for (publication_type, isbn) in &isbns { - let relation_code = match publication_type { - PublicationType::PAPERBACK | PublicationType::HARDBACK => "13", // Epublication based on (print product) - _ => "06", // Alternative format - }; + } + write_element_block("SalesRights", w, |w| { + // 02 For sale with non-exclusive rights in the specified countries or territories + write_element_block("SalesRightsType", w, |w| { + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) + })?; + write_element_block("RightsTerritory", w, |w| { + w.write(XmlEvent::Characters("WORLD")).map_err(|e| e.into()) + }) + })?; + if !isbns.is_empty() { + for (publication_type, isbn) in &isbns { + let relation_code = match publication_type { + PublicationType::PAPERBACK | PublicationType::HARDBACK => "13", // Epublication based on (print product) + _ => "06", // Alternative format + }; - write_element_block("RelatedProduct", w, |w| { - write_element_block("RelationCode", w, |w| { - w.write(XmlEvent::Characters(relation_code)) - .map_err(|e| e.into()) + write_element_block("RelatedProduct", w, |w| { + write_element_block("RelationCode", w, |w| { + w.write(XmlEvent::Characters(relation_code)) + .map_err(|e| e.into()) + })?; + write_element_block("ProductIdentifier", w, |w| { + // 15 ISBN-13 + write_element_block("ProductIDType", w, |w| { + w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) })?; - write_element_block("ProductIdentifier", w, |w| { - // 15 ISBN-13 - write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) - })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(isbn)).map_err(|e| e.into()) - }) + write_element_block("IDValue", w, |w| { + w.write(XmlEvent::Characters(isbn)).map_err(|e| e.into()) }) - })?; - } - } - if let Some(date) = self.withdrawn_date { - write_element_block("OutofPrintDate", w, |w| { - w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) - .map_err(|e| e.into()) + }) })?; } - write_element_block("SupplyDetail", w, |w| { - write_element_block("SupplierName", w, |w| { - w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) - .map_err(|e| e.into()) - })?; - // 09 Publisher to end-customers - write_element_block("SupplierRole", w, |w| { - w.write(XmlEvent::Characters("09")).map_err(|e| e.into()) + } + if let Some(date) = self.withdrawn_date { + write_element_block("OutofPrintDate", w, |w| { + w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) + .map_err(|e| e.into()) + })?; + } + write_element_block("SupplyDetail", w, |w| { + write_element_block("SupplierName", w, |w| { + w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) + .map_err(|e| e.into()) + })?; + // 09 Publisher to end-customers + write_element_block("SupplierRole", w, |w| { + w.write(XmlEvent::Characters("09")).map_err(|e| e.into()) + })?; + // 99 Contact supplier + write_element_block("ProductAvailability", w, |w| { + w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) + })?; + if is_open_access { + // R Restrictions apply, see note + write_element_block("AudienceRestrictionFlag", w, |w| { + w.write(XmlEvent::Characters("R")).map_err(|e| e.into()) })?; - // 99 Contact supplier - write_element_block("ProductAvailability", w, |w| { - w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) + write_element_block("AudienceRestrictionNote", w, |w| { + w.write(XmlEvent::Characters("Open access")) + .map_err(|e| e.into()) })?; - if is_open_access { - // R Restrictions apply, see note - write_element_block("AudienceRestrictionFlag", w, |w| { - w.write(XmlEvent::Characters("R")).map_err(|e| e.into()) + } + let publication = match pdf_url.is_some() { + true => pdf_publication, + false => epub_publication, + }; + let prices = publication.map(|p| p.prices.clone()).unwrap_or_default(); + if is_open_access || prices.is_empty() { + // EBSCO Host require the price point for unpriced/Open Access titles to be listed as "0.01 USD". + write_element_block("Price", w, |w| { + // 01 RRP excluding tax (price code requested by EBSCO) + write_element_block("PriceTypeCode", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - write_element_block("AudienceRestrictionNote", w, |w| { - w.write(XmlEvent::Characters("Open access")) - .map_err(|e| e.into()) + write_element_block("PriceAmount", w, |w| { + w.write(XmlEvent::Characters("0.01")).map_err(|e| e.into()) })?; - } - let publication = match pdf_url.is_some() { - true => pdf_publication, - false => epub_publication, - }; - let prices = publication.map(|p| p.prices.clone()).unwrap_or_default(); - if is_open_access || prices.is_empty() { - // EBSCO Host require the price point for unpriced/Open Access titles to be listed as "0.01 USD". + write_element_block("CurrencyCode", w, |w| { + w.write(XmlEvent::Characters("USD")).map_err(|e| e.into()) + }) + }) + } else { + for price in prices { + let unit_price = price.unit_price; + let formatted_price = format!("{unit_price:.2}"); write_element_block("Price", w, |w| { // 01 RRP excluding tax (price code requested by EBSCO) write_element_block("PriceTypeCode", w, |w| { w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; write_element_block("PriceAmount", w, |w| { - w.write(XmlEvent::Characters("0.01")).map_err(|e| e.into()) + w.write(XmlEvent::Characters(&formatted_price)) + .map_err(|e| e.into()) })?; write_element_block("CurrencyCode", w, |w| { - w.write(XmlEvent::Characters("USD")).map_err(|e| e.into()) + w.write(XmlEvent::Characters(&price.currency_code.to_string())) + .map_err(|e| e.into()) }) - }) - } else { - for price in prices { - let unit_price = price.unit_price; - let formatted_price = format!("{unit_price:.2}"); - write_element_block("Price", w, |w| { - // 01 RRP excluding tax (price code requested by EBSCO) - write_element_block("PriceTypeCode", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - write_element_block("PriceAmount", w, |w| { - w.write(XmlEvent::Characters(&formatted_price)) - .map_err(|e| e.into()) - })?; - write_element_block("CurrencyCode", w, |w| { - w.write(XmlEvent::Characters(&price.currency_code.to_string())) - .map_err(|e| e.into()) - }) - })?; - } - Ok(()) + })?; } - }) + Ok(()) + } }) - } + }) } } diff --git a/thoth-export-server/src/xml/onix21_proquest_ebrary.rs b/thoth-export-server/src/xml/onix21_proquest_ebrary.rs index 3f7824a5..cb24b2e0 100644 --- a/thoth-export-server/src/xml/onix21_proquest_ebrary.rs +++ b/thoth-export-server/src/xml/onix21_proquest_ebrary.rs @@ -88,373 +88,372 @@ impl XmlElementBlock for Work { !(pdf_publication.is_some_and(|p| p.prices.is_empty()) || epub_publication.is_some_and(|p| p.prices.is_empty())) { - Err(ThothError::IncompleteMetadataRecord( + return Err(ThothError::IncompleteMetadataRecord( ONIX_ERROR.to_string(), "No unpriced PDF or EPUB URL (must be supplied for OA works)".to_string(), - )) - } else { - let work_id = format!("urn:uuid:{}", self.work_id); - let (main_isbn, isbns) = get_publications_data(&self.publications); - write_element_block("Product", w, |w| { - write_element_block("RecordReference", w, |w| { + )); + } + let work_id = format!("urn:uuid:{}", self.work_id); + let (main_isbn, isbns) = get_publications_data(&self.publications); + write_element_block("Product", w, |w| { + write_element_block("RecordReference", w, |w| { + w.write(XmlEvent::Characters(&work_id)) + .map_err(|e| e.into()) + })?; + // 03 Notification confirmed on publication + write_element_block("NotificationType", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + })?; + // 01 Publisher + write_element_block("RecordSourceType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("ProductIdentifier", w, |w| { + // 01 Proprietary + write_element_block("ProductIDType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("IDValue", w, |w| { w.write(XmlEvent::Characters(&work_id)) .map_err(|e| e.into()) + }) + })?; + write_element_block("ProductIdentifier", w, |w| { + // 15 ISBN-13 + write_element_block("ProductIDType", w, |w| { + w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) })?; - // 03 Notification confirmed on publication - write_element_block("NotificationType", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - })?; - // 01 Publisher - write_element_block("RecordSourceType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; + write_element_block("IDValue", w, |w| { + w.write(XmlEvent::Characters(&main_isbn)) + .map_err(|e| e.into()) + }) + })?; + if let Some(doi) = &self.doi { write_element_block("ProductIdentifier", w, |w| { - // 01 Proprietary write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) })?; write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&work_id)) + w.write(XmlEvent::Characters(&doi.to_string())) .map_err(|e| e.into()) }) })?; - write_element_block("ProductIdentifier", w, |w| { - // 15 ISBN-13 - write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) - })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&main_isbn)) - .map_err(|e| e.into()) - }) + } + // DG Electronic book text in proprietary or open standard format + write_element_block("ProductForm", w, |w| { + w.write(XmlEvent::Characters("DG")).map_err(|e| e.into()) + })?; + write_element_block("EpubType", w, |w| { + // 002 PDF + let mut epub_type = "002"; + // We definitely have either a PDF URL or an EPUB URL (or both) + if pdf_url.is_none() { + // 029 EPUB + epub_type = "029"; + } + w.write(XmlEvent::Characters(epub_type)) + .map_err(|e| e.into()) + })?; + for issue in &self.issues { + XmlElementBlock::::xml_element(issue, w).ok(); + } + write_element_block("Title", w, |w| { + // 01 Distinctive title (book) + write_element_block("TitleType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - if let Some(doi) = &self.doi { - write_element_block("ProductIdentifier", w, |w| { - write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) - })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&doi.to_string())) - .map_err(|e| e.into()) - }) + write_element_block("TitleText", w, |w| { + w.write(XmlEvent::Characters(&self.title)) + .map_err(|e| e.into()) + })?; + if let Some(subtitle) = &self.subtitle { + write_element_block("Subtitle", w, |w| { + w.write(XmlEvent::Characters(subtitle)) + .map_err(|e| e.into()) })?; } - // DG Electronic book text in proprietary or open standard format - write_element_block("ProductForm", w, |w| { - w.write(XmlEvent::Characters("DG")).map_err(|e| e.into()) + Ok(()) + })?; + write_element_block("WorkIdentifier", w, |w| { + // 01 Proprietary + write_element_block("WorkIDType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - write_element_block("EpubType", w, |w| { - // 002 PDF - let mut epub_type = "002"; - // We definitely have either a PDF URL or an EPUB URL (or both) - if pdf_url.is_none() { - // 029 EPUB - epub_type = "029"; - } - w.write(XmlEvent::Characters(epub_type)) + write_element_block("IDTypeName", w, |w| { + w.write(XmlEvent::Characters("Thoth WorkID")) .map_err(|e| e.into()) })?; - for issue in &self.issues { - XmlElementBlock::::xml_element(issue, w).ok(); - } - write_element_block("Title", w, |w| { - // 01 Distinctive title (book) - write_element_block("TitleType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + write_element_block("IDValue", w, |w| { + w.write(XmlEvent::Characters(&work_id)) + .map_err(|e| e.into()) + }) + })?; + let mut websites: HashMap = HashMap::new(); + if let Some(pdf) = pdf_url { + websites.insert( + pdf.to_string(), + ( + "29".to_string(), + "Publisher's website: download the title".to_string(), + ), + ); + } + if let Some(epub) = epub_url { + websites.insert( + epub.to_string(), + ( + "29".to_string(), + "Publisher's website: download the title".to_string(), + ), + ); + } + if let Some(landing_page) = &self.landing_page { + websites.insert( + landing_page.to_string(), + ( + "02".to_string(), + "Publisher's website: web shop".to_string(), + ), + ); + } + for (url, description) in websites.iter() { + write_element_block("Website", w, |w| { + write_element_block("WebsiteRole", w, |w| { + w.write(XmlEvent::Characters(&description.0)) + .map_err(|e| e.into()) })?; - write_element_block("TitleText", w, |w| { - w.write(XmlEvent::Characters(&self.title)) + write_element_block("WebsiteDescription", w, |w| { + w.write(XmlEvent::Characters(&description.1)) .map_err(|e| e.into()) })?; - if let Some(subtitle) = &self.subtitle { - write_element_block("Subtitle", w, |w| { - w.write(XmlEvent::Characters(subtitle)) - .map_err(|e| e.into()) - })?; - } - Ok(()) + write_element_block("WebsiteLink", w, |w| { + w.write(XmlEvent::Characters(url)).map_err(|e| e.into()) + }) })?; - write_element_block("WorkIdentifier", w, |w| { - // 01 Proprietary - write_element_block("WorkIDType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + } + for contribution in &self.contributions { + // A51 Research by is not supported in ONIX 2 + if contribution.contribution_type != ContributionType::RESEARCH_BY { + XmlElementBlock::::xml_element(contribution, w).ok(); + } + } + for language in &self.languages { + XmlElementBlock::::xml_element(language, w).ok(); + } + if let Some(page_count) = self.page_count { + write_element_block("Extent", w, |w| { + // 00 Main content + write_element_block("ExtentType", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) })?; - write_element_block("IDTypeName", w, |w| { - w.write(XmlEvent::Characters("Thoth WorkID")) + write_element_block("ExtentValue", w, |w| { + w.write(XmlEvent::Characters(&page_count.to_string())) .map_err(|e| e.into()) })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&work_id)) - .map_err(|e| e.into()) + // 03 Pages + write_element_block("ExtentUnit", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) }) })?; - let mut websites: HashMap = HashMap::new(); - if let Some(pdf) = pdf_url { - websites.insert( - pdf.to_string(), - ( - "29".to_string(), - "Publisher's website: download the title".to_string(), - ), - ); - } - if let Some(epub) = epub_url { - websites.insert( - epub.to_string(), - ( - "29".to_string(), - "Publisher's website: download the title".to_string(), - ), - ); - } - if let Some(landing_page) = &self.landing_page { - websites.insert( - landing_page.to_string(), - ( - "02".to_string(), - "Publisher's website: web shop".to_string(), - ), - ); - } - for (url, description) in websites.iter() { - write_element_block("Website", w, |w| { - write_element_block("WebsiteRole", w, |w| { - w.write(XmlEvent::Characters(&description.0)) - .map_err(|e| e.into()) - })?; - write_element_block("WebsiteDescription", w, |w| { - w.write(XmlEvent::Characters(&description.1)) - .map_err(|e| e.into()) - })?; - write_element_block("WebsiteLink", w, |w| { - w.write(XmlEvent::Characters(url)).map_err(|e| e.into()) - }) + } + for subject in &self.subjects { + XmlElementBlock::::xml_element(subject, w).ok(); + } + write_element_block("Audience", w, |w| { + // 01 ONIX audience codes + write_element_block("AudienceCodeType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + // 06 Professional and scholarly + write_element_block("AudienceCodeValue", w, |w| { + w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) + }) + })?; + if is_open_access { + write_element_block("OtherText", w, |w| { + // 47 Open access statement + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("47")).map_err(|e| e.into()) })?; - } - for contribution in &self.contributions { - // A51 Research by is not supported in ONIX 2 - if contribution.contribution_type != ContributionType::RESEARCH_BY { - XmlElementBlock::::xml_element(contribution, w).ok(); - } - } - for language in &self.languages { - XmlElementBlock::::xml_element(language, w).ok(); - } - if let Some(page_count) = self.page_count { - write_element_block("Extent", w, |w| { - // 00 Main content - write_element_block("ExtentType", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("ExtentValue", w, |w| { - w.write(XmlEvent::Characters(&page_count.to_string())) - .map_err(|e| e.into()) - })?; - // 03 Pages - write_element_block("ExtentUnit", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - }) + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters("Open access - no commercial use")) + .map_err(|e| e.into()) + }) + })?; + } + if let Some(license) = &self.license { + write_element_block("OtherText", w, |w| { + // 46 License + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("46")).map_err(|e| e.into()) })?; - } - for subject in &self.subjects { - XmlElementBlock::::xml_element(subject, w).ok(); - } - write_element_block("Audience", w, |w| { - // 01 ONIX audience codes - write_element_block("AudienceCodeType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(license)).map_err(|e| e.into()) + }) + })?; + } + if let Some(labstract) = &self.long_abstract { + write_element_block("OtherText", w, |w| { + // 03 Long description + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) })?; - // 06 Professional and scholarly - write_element_block("AudienceCodeValue", w, |w| { + // 06 Default text format + write_element_block("TextFormat", w, |w| { w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(labstract)) + .map_err(|e| e.into()) }) })?; - if is_open_access { - write_element_block("OtherText", w, |w| { - // 47 Open access statement - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("47")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters("Open access - no commercial use")) - .map_err(|e| e.into()) - }) + } + if let Some(cover_url) = &self.cover_url { + write_element_block("MediaFile", w, |w| { + // 04 Image: front cover + write_element_block("MediaFileTypeCode", w, |w| { + w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) })?; - } - if let Some(license) = &self.license { - write_element_block("OtherText", w, |w| { - // 46 License - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("46")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(license)).map_err(|e| e.into()) - }) + // 01 URL + write_element_block("MediaFileLinkTypeCode", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - } - if let Some(labstract) = &self.long_abstract { - write_element_block("OtherText", w, |w| { - // 03 Long description - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - })?; - // 06 Default text format - write_element_block("TextFormat", w, |w| { - w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(labstract)) + write_element_block("MediaFileLink", w, |w| { + w.write(XmlEvent::Characters(cover_url)) + .map_err(|e| e.into()) + }) + })?; + } + write_element_block("Imprint", w, |w| { + write_element_block("ImprintName", w, |w| { + w.write(XmlEvent::Characters(&self.imprint.imprint_name)) + .map_err(|e| e.into()) + }) + })?; + write_element_block("Publisher", w, |w| { + // 01 Publisher + write_element_block("PublishingRole", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("PublisherName", w, |w| { + w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) + .map_err(|e| e.into()) + })?; + if let Some(publisher_url) = &self.imprint.publisher.publisher_url { + write_element_block("Website", w, |w| { + write_element_block("WebsiteLink", w, |w| { + w.write(XmlEvent::Characters(publisher_url)) .map_err(|e| e.into()) }) })?; } - if let Some(cover_url) = &self.cover_url { - write_element_block("MediaFile", w, |w| { - // 04 Image: front cover - write_element_block("MediaFileTypeCode", w, |w| { - w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) - })?; - // 01 URL - write_element_block("MediaFileLinkTypeCode", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + Ok(()) + })?; + if let Some(place) = &self.place { + write_element_block("CityOfPublication", w, |w| { + w.write(XmlEvent::Characters(place)).map_err(|e| e.into()) + })?; + } + XmlElement::::xml_element(&self.work_status, w)?; + if let Some(date) = self.publication_date { + write_element_block("PublicationDate", w, |w| { + w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) + .map_err(|e| e.into()) + })?; + write_element_block("CopyrightYear", w, |w| { + w.write(XmlEvent::Characters(&date.format("%Y").to_string())) + .map_err(|e| e.into()) + })?; + } + write_element_block("SalesRights", w, |w| { + // 02 For sale with non-exclusive rights in the specified countries or territories + write_element_block("SalesRightsType", w, |w| { + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) + })?; + write_element_block("RightsTerritory", w, |w| { + w.write(XmlEvent::Characters("WORLD")).map_err(|e| e.into()) + }) + })?; + if !isbns.is_empty() { + for isbn in &isbns { + write_element_block("RelatedProduct", w, |w| { + // 06 Alternative format + write_element_block("RelationCode", w, |w| { + w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) })?; - write_element_block("MediaFileLink", w, |w| { - w.write(XmlEvent::Characters(cover_url)) - .map_err(|e| e.into()) + write_element_block("ProductIdentifier", w, |w| { + // 15 ISBN-13 + write_element_block("ProductIDType", w, |w| { + w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) + })?; + write_element_block("IDValue", w, |w| { + w.write(XmlEvent::Characters(isbn)).map_err(|e| e.into()) + }) }) })?; } - write_element_block("Imprint", w, |w| { - write_element_block("ImprintName", w, |w| { - w.write(XmlEvent::Characters(&self.imprint.imprint_name)) - .map_err(|e| e.into()) - }) + } + if let Some(date) = self.withdrawn_date { + write_element_block("OutofPrintDate", w, |w| { + w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) + .map_err(|e| e.into()) })?; - write_element_block("Publisher", w, |w| { - // 01 Publisher - write_element_block("PublishingRole", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - write_element_block("PublisherName", w, |w| { - w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) - .map_err(|e| e.into()) - })?; - if let Some(publisher_url) = &self.imprint.publisher.publisher_url { - write_element_block("Website", w, |w| { - write_element_block("WebsiteLink", w, |w| { - w.write(XmlEvent::Characters(publisher_url)) - .map_err(|e| e.into()) - }) - })?; - } - Ok(()) + } + write_element_block("SupplyDetail", w, |w| { + write_element_block("SupplierName", w, |w| { + w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) + .map_err(|e| e.into()) })?; - if let Some(place) = &self.place { - write_element_block("CityOfPublication", w, |w| { - w.write(XmlEvent::Characters(place)).map_err(|e| e.into()) - })?; - } - XmlElement::::xml_element(&self.work_status, w)?; - if let Some(date) = self.publication_date { - write_element_block("PublicationDate", w, |w| { - w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) - .map_err(|e| e.into()) + // 09 Publisher to end-customers + write_element_block("SupplierRole", w, |w| { + w.write(XmlEvent::Characters("09")).map_err(|e| e.into()) + })?; + // 99 Contact supplier + write_element_block("ProductAvailability", w, |w| { + w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) + })?; + if is_open_access { + // R Restrictions apply, see note + write_element_block("AudienceRestrictionFlag", w, |w| { + w.write(XmlEvent::Characters("R")).map_err(|e| e.into()) })?; - write_element_block("CopyrightYear", w, |w| { - w.write(XmlEvent::Characters(&date.format("%Y").to_string())) + write_element_block("AudienceRestrictionNote", w, |w| { + w.write(XmlEvent::Characters("Open access")) .map_err(|e| e.into()) })?; } - write_element_block("SalesRights", w, |w| { - // 02 For sale with non-exclusive rights in the specified countries or territories - write_element_block("SalesRightsType", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) - })?; - write_element_block("RightsTerritory", w, |w| { - w.write(XmlEvent::Characters("WORLD")).map_err(|e| e.into()) + let publication = match pdf_url.is_some() { + true => pdf_publication, + false => epub_publication, + }; + let prices = publication.map(|p| p.prices.clone()).unwrap_or_default(); + if is_open_access || prices.is_empty() { + write_element_block("UnpricedItemType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) }) - })?; - if !isbns.is_empty() { - for isbn in &isbns { - write_element_block("RelatedProduct", w, |w| { - // 06 Alternative format - write_element_block("RelationCode", w, |w| { - w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) + } else { + for price in prices { + let unit_price = price.unit_price; + let formatted_price = format!("{unit_price:.2}"); + write_element_block("Price", w, |w| { + // 02 RRP including tax + write_element_block("PriceTypeCode", w, |w| { + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) + })?; + write_element_block("PriceAmount", w, |w| { + w.write(XmlEvent::Characters(&formatted_price)) + .map_err(|e| e.into()) })?; - write_element_block("ProductIdentifier", w, |w| { - // 15 ISBN-13 - write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) - })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(isbn)).map_err(|e| e.into()) - }) + write_element_block("CurrencyCode", w, |w| { + w.write(XmlEvent::Characters(&price.currency_code.to_string())) + .map_err(|e| e.into()) }) })?; } + Ok(()) } - if let Some(date) = self.withdrawn_date { - write_element_block("OutofPrintDate", w, |w| { - w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) - .map_err(|e| e.into()) - })?; - } - write_element_block("SupplyDetail", w, |w| { - write_element_block("SupplierName", w, |w| { - w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) - .map_err(|e| e.into()) - })?; - // 09 Publisher to end-customers - write_element_block("SupplierRole", w, |w| { - w.write(XmlEvent::Characters("09")).map_err(|e| e.into()) - })?; - // 99 Contact supplier - write_element_block("ProductAvailability", w, |w| { - w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) - })?; - if is_open_access { - // R Restrictions apply, see note - write_element_block("AudienceRestrictionFlag", w, |w| { - w.write(XmlEvent::Characters("R")).map_err(|e| e.into()) - })?; - write_element_block("AudienceRestrictionNote", w, |w| { - w.write(XmlEvent::Characters("Open access")) - .map_err(|e| e.into()) - })?; - } - let publication = match pdf_url.is_some() { - true => pdf_publication, - false => epub_publication, - }; - let prices = publication.map(|p| p.prices.clone()).unwrap_or_default(); - if is_open_access || prices.is_empty() { - write_element_block("UnpricedItemType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - }) - } else { - for price in prices { - let unit_price = price.unit_price; - let formatted_price = format!("{unit_price:.2}"); - write_element_block("Price", w, |w| { - // 02 RRP including tax - write_element_block("PriceTypeCode", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) - })?; - write_element_block("PriceAmount", w, |w| { - w.write(XmlEvent::Characters(&formatted_price)) - .map_err(|e| e.into()) - })?; - write_element_block("CurrencyCode", w, |w| { - w.write(XmlEvent::Characters(&price.currency_code.to_string())) - .map_err(|e| e.into()) - }) - })?; - } - Ok(()) - } - }) }) - } + }) } } diff --git a/thoth-export-server/src/xml/onix3_google_books.rs b/thoth-export-server/src/xml/onix3_google_books.rs index a02e1fb4..f62af98b 100644 --- a/thoth-export-server/src/xml/onix3_google_books.rs +++ b/thoth-export-server/src/xml/onix3_google_books.rs @@ -68,7 +68,6 @@ impl XmlElementBlock for Work { fn xml_element(&self, w: &mut EventWriter) -> ThothResult<()> { // Don't output works with no BIC, BISAC or LCC subject code // Google Books can only ingest works which have at least one - if !self.subjects.iter().any(|s| { matches!( s.subject_type, From 499d724c49a4314f8523b953d3d675119fa35c90 Mon Sep 17 00:00:00 2001 From: rhigman <73792779+rhigman@users.noreply.github.com> Date: Mon, 16 Sep 2024 16:43:33 +0100 Subject: [PATCH 17/23] Don't output Crossref free_to_read tag if no licence supplied --- .../src/xml/doideposit_crossref.rs | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/thoth-export-server/src/xml/doideposit_crossref.rs b/thoth-export-server/src/xml/doideposit_crossref.rs index d90d5233..c98b89b2 100644 --- a/thoth-export-server/src/xml/doideposit_crossref.rs +++ b/thoth-export-server/src/xml/doideposit_crossref.rs @@ -529,20 +529,20 @@ fn write_access_content( license: &Option, w: &mut EventWriter, ) -> ThothResult<()> { - write_full_element_block( - "ai:program", - Some(vec![("name", "AccessIndicators")]), - w, - |w| { - write_element_block("ai:free_to_read", w, |_w| Ok(()))?; - if let Some(license) = license { + // Assume works without licences are non-OA + if let Some(license) = license { + write_full_element_block( + "ai:program", + Some(vec![("name", "AccessIndicators")]), + w, + |w| { + write_element_block("ai:free_to_read", w, |_w| Ok(()))?; write_element_block("ai:license_ref", w, |w| { w.write(XmlEvent::Characters(license)).map_err(|e| e.into()) - })?; - } - Ok(()) - }, - )?; + }) + }, + )?; + } Ok(()) } @@ -1400,9 +1400,9 @@ mod tests { assert!(!output.contains(r#" 02"#)); assert!(!output.contains(r#" 28"#)); assert!(!output.contains(r#" 2000"#)); - // No licence supplied - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" "#)); + // No licence supplied: assume non-OA + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" "#)); assert!(!output.contains( r#" https://creativecommons.org/licenses/by/4.0/"# )); @@ -2001,9 +2001,9 @@ mod tests { assert!(!output.contains(r#" 978-1-4028-9462-6"#)); // No place supplied assert!(!output.contains(r#" León, Spain"#)); - // No licence supplied - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" "#)); + // No licence supplied: assume non-OA + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" "#)); assert!(!output.contains( r#" https://creativecommons.org/licenses/by/4.0/"# )); From 376b2ef70e293ed5c40009e7f6271c3863177c9e Mon Sep 17 00:00:00 2001 From: rhigman <73792779+rhigman@users.noreply.github.com> Date: Mon, 16 Sep 2024 16:47:46 +0100 Subject: [PATCH 18/23] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ee583e9a..6522243d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [551](https://github.com/thoth-pub/thoth/issues/551) - Only include chapters in Crossref metadata output if they have DOIs - [628](https://github.com/thoth-pub/thoth/pull/628) - Upgrade `paperclip` to v0.9.1 - [628](https://github.com/thoth-pub/thoth/pull/628) - Upgrade rust to `1.81.0` in production and development `Dockerfile` + - [544](https://github.com/thoth-pub/thoth/issues/544) - Implement non-OA metadata in export outputs ### Fixed - [565](https://github.com/thoth-pub/thoth/issues/565) - Don't generate Crossref metadata output if no DOIs (work or chapter) are present From ee11d3512d7c1167d80cdb267961f2fcdddbcbb9 Mon Sep 17 00:00:00 2001 From: Javier Arias Date: Tue, 1 Oct 2024 13:03:38 +0100 Subject: [PATCH 19/23] Add second order clause to work --- thoth-api/src/model/work/crud.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/thoth-api/src/model/work/crud.rs b/thoth-api/src/model/work/crud.rs index f2c14496..be44ea49 100644 --- a/thoth-api/src/model/work/crud.rs +++ b/thoth-api/src/model/work/crud.rs @@ -315,6 +315,7 @@ impl Crud for Work { ); } match query + .then_order_by(dsl::work_id) .limit(limit.into()) .offset(offset.into()) .load::(&mut connection) From d976f542529cfcff05fcd5ede5d2b0a9dab4d956 Mon Sep 17 00:00:00 2001 From: Javier Arias Date: Tue, 1 Oct 2024 13:07:29 +0100 Subject: [PATCH 20/23] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6522243d..1bf4737d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - [565](https://github.com/thoth-pub/thoth/issues/565) - Don't generate Crossref metadata output if no DOIs (work or chapter) are present + - Add second order by clause (work\_id) to work queries for consistent ordering when multiple works share the same user-ordered field, such as publication date ## [[0.12.9]](https://github.com/thoth-pub/thoth/releases/tag/v0.12.9) - 2024-09-06 ### Added From daad3443265797e05bab9d90e10f3c39ed548267 Mon Sep 17 00:00:00 2001 From: Javier Arias Date: Tue, 1 Oct 2024 13:08:33 +0100 Subject: [PATCH 21/23] Update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bf4737d..25ba6a65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - [565](https://github.com/thoth-pub/thoth/issues/565) - Don't generate Crossref metadata output if no DOIs (work or chapter) are present - - Add second order by clause (work\_id) to work queries for consistent ordering when multiple works share the same user-ordered field, such as publication date + - [632](https://github.com/thoth-pub/thoth/pull/632) - Add second order by clause (work\_id) to work queries for consistent ordering when multiple works share the same user-ordered field, such as publication date ## [[0.12.9]](https://github.com/thoth-pub/thoth/releases/tag/v0.12.9) - 2024-09-06 ### Added From c55e52f3c78ca07c7bfce707ab0815680613d6ea Mon Sep 17 00:00:00 2001 From: Javier Arias Date: Tue, 1 Oct 2024 13:43:13 +0100 Subject: [PATCH 22/23] Bump v0.12.10 --- Cargo.lock | 16 ++++++++-------- Cargo.toml | 12 ++++++------ thoth-api-server/Cargo.toml | 6 +++--- thoth-api/Cargo.toml | 4 ++-- thoth-app-server/Cargo.toml | 2 +- thoth-app/Cargo.toml | 6 +++--- thoth-client/Cargo.toml | 8 ++++---- thoth-errors/Cargo.toml | 2 +- thoth-export-server/Cargo.toml | 8 ++++---- 9 files changed, 32 insertions(+), 32 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b6bf32a6..c7303322 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3055,7 +3055,7 @@ dependencies = [ [[package]] name = "thoth" -version = "0.12.9" +version = "0.12.10" dependencies = [ "cargo-husky", "clap", @@ -3070,7 +3070,7 @@ dependencies = [ [[package]] name = "thoth-api" -version = "0.12.9" +version = "0.12.10" dependencies = [ "actix-web", "argon2rs", @@ -3099,7 +3099,7 @@ dependencies = [ [[package]] name = "thoth-api-server" -version = "0.12.9" +version = "0.12.10" dependencies = [ "actix-cors", "actix-identity", @@ -3115,7 +3115,7 @@ dependencies = [ [[package]] name = "thoth-app" -version = "0.12.9" +version = "0.12.10" dependencies = [ "anyhow", "chrono", @@ -3144,7 +3144,7 @@ dependencies = [ [[package]] name = "thoth-app-server" -version = "0.12.9" +version = "0.12.10" dependencies = [ "actix-cors", "actix-web", @@ -3154,7 +3154,7 @@ dependencies = [ [[package]] name = "thoth-client" -version = "0.12.9" +version = "0.12.10" dependencies = [ "chrono", "graphql_client", @@ -3170,7 +3170,7 @@ dependencies = [ [[package]] name = "thoth-errors" -version = "0.12.9" +version = "0.12.10" dependencies = [ "actix-web", "csv", @@ -3191,7 +3191,7 @@ dependencies = [ [[package]] name = "thoth-export-server" -version = "0.12.9" +version = "0.12.10" dependencies = [ "actix-cors", "actix-web", diff --git a/Cargo.toml b/Cargo.toml index 432b0123..e657279b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thoth" -version = "0.12.9" +version = "0.12.10" authors = ["Javier Arias ", "Ross Higman "] edition = "2021" license = "Apache-2.0" @@ -16,11 +16,11 @@ maintenance = { status = "actively-developed" } members = ["thoth-api", "thoth-api-server", "thoth-app", "thoth-app-server", "thoth-client", "thoth-errors", "thoth-export-server"] [dependencies] -thoth-api = { version = "=0.12.9", path = "thoth-api", features = ["backend"] } -thoth-api-server = { version = "=0.12.9", path = "thoth-api-server" } -thoth-app-server = { version = "=0.12.9", path = "thoth-app-server" } -thoth-errors = { version = "=0.12.9", path = "thoth-errors" } -thoth-export-server = { version = "=0.12.9", path = "thoth-export-server" } +thoth-api = { version = "=0.12.10", path = "thoth-api", features = ["backend"] } +thoth-api-server = { version = "=0.12.10", path = "thoth-api-server" } +thoth-app-server = { version = "=0.12.10", path = "thoth-app-server" } +thoth-errors = { version = "=0.12.10", path = "thoth-errors" } +thoth-export-server = { version = "=0.12.10", path = "thoth-export-server" } clap = { version = "4.5.16", features = ["cargo", "env"] } dialoguer = { version = "0.11.0", features = ["password"] } dotenv = "0.15.0" diff --git a/thoth-api-server/Cargo.toml b/thoth-api-server/Cargo.toml index 68954b50..5ea31522 100644 --- a/thoth-api-server/Cargo.toml +++ b/thoth-api-server/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thoth-api-server" -version = "0.12.9" +version = "0.12.10" authors = ["Javier Arias ", "Ross Higman "] edition = "2021" license = "Apache-2.0" @@ -9,8 +9,8 @@ repository = "https://github.com/thoth-pub/thoth" readme = "README.md" [dependencies] -thoth-api = { version = "=0.12.9", path = "../thoth-api", features = ["backend"] } -thoth-errors = { version = "=0.12.9", path = "../thoth-errors" } +thoth-api = { version = "=0.12.10", path = "../thoth-api", features = ["backend"] } +thoth-errors = { version = "=0.12.10", path = "../thoth-errors" } actix-web = "4.9" actix-cors = "0.7.0" actix-identity = "0.7.1" diff --git a/thoth-api/Cargo.toml b/thoth-api/Cargo.toml index 46e0809c..409e2f7e 100644 --- a/thoth-api/Cargo.toml +++ b/thoth-api/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thoth-api" -version = "0.12.9" +version = "0.12.10" authors = ["Javier Arias ", "Ross Higman "] edition = "2021" license = "Apache-2.0" @@ -16,7 +16,7 @@ maintenance = { status = "actively-developed" } backend = ["diesel", "diesel-derive-enum", "diesel_migrations", "futures", "actix-web", "jsonwebtoken"] [dependencies] -thoth-errors = { version = "=0.12.9", path = "../thoth-errors" } +thoth-errors = { version = "=0.12.10", path = "../thoth-errors" } actix-web = { version = "4.8", optional = true } argon2rs = "0.2.5" isbn2 = "0.4.0" diff --git a/thoth-app-server/Cargo.toml b/thoth-app-server/Cargo.toml index e36cb48a..338fdbfd 100644 --- a/thoth-app-server/Cargo.toml +++ b/thoth-app-server/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thoth-app-server" -version = "0.12.9" +version = "0.12.10" authors = ["Javier Arias ", "Ross Higman "] edition = "2021" license = "Apache-2.0" diff --git a/thoth-app/Cargo.toml b/thoth-app/Cargo.toml index 9ca51acf..f2fb79a3 100644 --- a/thoth-app/Cargo.toml +++ b/thoth-app/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thoth-app" -version = "0.12.9" +version = "0.12.10" authors = ["Javier Arias ", "Ross Higman "] edition = "2021" license = "Apache-2.0" @@ -36,8 +36,8 @@ uuid = { version = "1.10.0", features = ["serde", "v4"] } # `getrandom` is a dependency of `uuid`, we need to explicitly import and include the `js` feature to enable wasm # https://docs.rs/getrandom/latest/getrandom/#webassembly-support getrandom = { version = "0.2", features = ["js"] } -thoth-api = { version = "=0.12.9", path = "../thoth-api" } -thoth-errors = { version = "=0.12.9", path = "../thoth-errors" } +thoth-api = { version = "=0.12.10", path = "../thoth-api" } +thoth-errors = { version = "=0.12.10", path = "../thoth-errors" } [build-dependencies] dotenv = "0.15.0" diff --git a/thoth-client/Cargo.toml b/thoth-client/Cargo.toml index d5ab33da..4cc16650 100644 --- a/thoth-client/Cargo.toml +++ b/thoth-client/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thoth-client" -version = "0.12.9" +version = "0.12.10" authors = ["Javier Arias ", "Ross Higman "] edition = "2021" license = "Apache-2.0" @@ -10,8 +10,8 @@ readme = "README.md" build = "build.rs" [dependencies] -thoth-api = {version = "=0.12.9", path = "../thoth-api" } -thoth-errors = {version = "=0.12.9", path = "../thoth-errors" } +thoth-api = {version = "=0.12.10", path = "../thoth-api" } +thoth-errors = {version = "=0.12.10", path = "../thoth-errors" } graphql_client = "0.14.0" chrono = { version = "0.4.38", features = ["serde"] } reqwest = { version = "0.11", features = ["json"] } @@ -22,4 +22,4 @@ serde_json = "1.0" uuid = { version = "1.10.0", features = ["serde"] } [build-dependencies] -thoth-api = { version = "=0.12.9", path = "../thoth-api", features = ["backend"] } +thoth-api = { version = "=0.12.10", path = "../thoth-api", features = ["backend"] } diff --git a/thoth-errors/Cargo.toml b/thoth-errors/Cargo.toml index 6acd4f91..ea1fc104 100644 --- a/thoth-errors/Cargo.toml +++ b/thoth-errors/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thoth-errors" -version = "0.12.9" +version = "0.12.10" authors = ["Javier Arias ", "Ross Higman "] edition = "2021" license = "Apache-2.0" diff --git a/thoth-export-server/Cargo.toml b/thoth-export-server/Cargo.toml index 83395ec9..4366ae8a 100644 --- a/thoth-export-server/Cargo.toml +++ b/thoth-export-server/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thoth-export-server" -version = "0.12.9" +version = "0.12.10" authors = ["Javier Arias ", "Ross Higman "] edition = "2021" license = "Apache-2.0" @@ -10,9 +10,9 @@ readme = "README.md" build = "build.rs" [dependencies] -thoth-api = { version = "=0.12.9", path = "../thoth-api" } -thoth-errors = { version = "=0.12.9", path = "../thoth-errors" } -thoth-client = { version = "=0.12.9", path = "../thoth-client" } +thoth-api = { version = "=0.12.10", path = "../thoth-api" } +thoth-errors = { version = "=0.12.10", path = "../thoth-errors" } +thoth-client = { version = "=0.12.10", path = "../thoth-client" } actix-web = "4.9" actix-cors = "0.7.0" cc_license = "0.1.0" From 0a725df046653c5efe9b97b23956b38cc8ab8294 Mon Sep 17 00:00:00 2001 From: Javier Arias Date: Tue, 1 Oct 2024 13:44:04 +0100 Subject: [PATCH 23/23] Update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 25ba6a65..1bb8f269 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] + +## [[0.12.10]](https://github.com/thoth-pub/thoth/releases/tag/v0.12.10) - 2024-10-01 ### Added - [628](https://github.com/thoth-pub/thoth/pull/628) - Implement OpenAPI v3 schema in export API, served under `/openapi.json` - [628](https://github.com/thoth-pub/thoth/pull/628) - Added terms of service to export API