diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ffff930..1bb8f269 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [[0.12.10]](https://github.com/thoth-pub/thoth/releases/tag/v0.12.10) - 2024-10-01 +### Added + - [628](https://github.com/thoth-pub/thoth/pull/628) - Implement OpenAPI v3 schema in export API, served under `/openapi.json` + - [628](https://github.com/thoth-pub/thoth/pull/628) - Added terms of service to export API + +### Changed + - [551](https://github.com/thoth-pub/thoth/issues/551) - Only include chapters in Crossref metadata output if they have DOIs + - [628](https://github.com/thoth-pub/thoth/pull/628) - Upgrade `paperclip` to v0.9.1 + - [628](https://github.com/thoth-pub/thoth/pull/628) - Upgrade rust to `1.81.0` in production and development `Dockerfile` + - [544](https://github.com/thoth-pub/thoth/issues/544) - Implement non-OA metadata in export outputs + +### Fixed + - [565](https://github.com/thoth-pub/thoth/issues/565) - Don't generate Crossref metadata output if no DOIs (work or chapter) are present + - [632](https://github.com/thoth-pub/thoth/pull/632) - Add second order by clause (work\_id) to work queries for consistent ordering when multiple works share the same user-ordered field, such as publication date + ## [[0.12.9]](https://github.com/thoth-pub/thoth/releases/tag/v0.12.9) - 2024-09-06 ### Added - [595](https://github.com/thoth-pub/thoth/issues/595), [626](https://github.com/thoth-pub/thoth/pull/626) - Remove infrequently used and unused work statuses (unspecified, no longer our product, out of stock indefinitely, out of print, inactive, unknown, remaindered, recalled). Require a publication date for active, withdrawn, and superseded works in Thoth. Add a new `Superseded` work status to replace Out of Print for older editions of Works. Require a withdrawn date for Superseded works. diff --git a/Cargo.lock b/Cargo.lock index 795a3899..c7303322 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1697,6 +1697,7 @@ checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown 0.12.3", + "serde", ] [[package]] @@ -2053,6 +2054,17 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" +[[package]] +name = "openapiv3" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33b83630305ecc3355e998ddd2f926f98aae8e105eb42652174a58757851ba47" +dependencies = [ + "indexmap 1.9.3", + "serde", + "serde_json", +] + [[package]] name = "openssl" version = "0.10.66" @@ -2099,13 +2111,14 @@ dependencies = [ [[package]] name = "paperclip" -version = "0.8.2" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2509afd8f138efe07cd367832289f5cc61d1eb1ec7f1eb75172abca6f7b9b66d" +checksum = "ac41f27e83168c22515ef52d62a0357b5f5b8df846aa391f8b903b0ed7719429" dependencies = [ "anyhow", "itertools", "once_cell", + "openapiv3", "paperclip-actix", "paperclip-core", "paperclip-macros", @@ -2120,15 +2133,16 @@ dependencies = [ [[package]] name = "paperclip-actix" -version = "0.6.2" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4adf797da91baee514bc03b020fdd6673d2f8c1af8a859e50d6d803a4b3dddd2" +checksum = "d5b6f4c003aa6167df82f1574c951638a8b9a43827059da0cda8b5df88d20ecf" dependencies = [ "actix-service", "actix-web", "futures", "mime_guess", "once_cell", + "openapiv3", "paperclip-core", "paperclip-macros", "serde_json", @@ -2136,13 +2150,14 @@ dependencies = [ [[package]] name = "paperclip-core" -version = "0.6.2" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8db363c823fa71c00da73ff8cee3d6902e1ad66b770cc224a74dc7cf54de3aad" +checksum = "8e2e742f71daf34eb8f62ccc5a1a5e1f029eb84be563523a2a5ee049366329f4" dependencies = [ "actix-web", "mime", "once_cell", + "openapiv3", "paperclip-macros", "pin-project-lite", "regex", @@ -2155,9 +2170,9 @@ dependencies = [ [[package]] name = "paperclip-macros" -version = "0.6.3" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6e25ce2c5362c8d48dc89e0f9ca076d507f7c1eabd04f0d593cdf5addff90c" +checksum = "0385be5ae9c886c46688290534363a229f2531aa2c5c2bfc3b3ddafed5143aaa" dependencies = [ "heck 0.4.1", "http", @@ -3040,7 +3055,7 @@ dependencies = [ [[package]] name = "thoth" -version = "0.12.9" +version = "0.12.10" dependencies = [ "cargo-husky", "clap", @@ -3055,7 +3070,7 @@ dependencies = [ [[package]] name = "thoth-api" -version = "0.12.9" +version = "0.12.10" dependencies = [ "actix-web", "argon2rs", @@ -3084,7 +3099,7 @@ dependencies = [ [[package]] name = "thoth-api-server" -version = "0.12.9" +version = "0.12.10" dependencies = [ "actix-cors", "actix-identity", @@ -3100,7 +3115,7 @@ dependencies = [ [[package]] name = "thoth-app" -version = "0.12.9" +version = "0.12.10" dependencies = [ "anyhow", "chrono", @@ -3129,7 +3144,7 @@ dependencies = [ [[package]] name = "thoth-app-server" -version = "0.12.9" +version = "0.12.10" dependencies = [ "actix-cors", "actix-web", @@ -3139,7 +3154,7 @@ dependencies = [ [[package]] name = "thoth-client" -version = "0.12.9" +version = "0.12.10" dependencies = [ "chrono", "graphql_client", @@ -3155,7 +3170,7 @@ dependencies = [ [[package]] name = "thoth-errors" -version = "0.12.9" +version = "0.12.10" dependencies = [ "actix-web", "csv", @@ -3176,7 +3191,7 @@ dependencies = [ [[package]] name = "thoth-export-server" -version = "0.12.9" +version = "0.12.10" dependencies = [ "actix-cors", "actix-web", diff --git a/Cargo.toml b/Cargo.toml index 432b0123..e657279b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thoth" -version = "0.12.9" +version = "0.12.10" authors = ["Javier Arias ", "Ross Higman "] edition = "2021" license = "Apache-2.0" @@ -16,11 +16,11 @@ maintenance = { status = "actively-developed" } members = ["thoth-api", "thoth-api-server", "thoth-app", "thoth-app-server", "thoth-client", "thoth-errors", "thoth-export-server"] [dependencies] -thoth-api = { version = "=0.12.9", path = "thoth-api", features = ["backend"] } -thoth-api-server = { version = "=0.12.9", path = "thoth-api-server" } -thoth-app-server = { version = "=0.12.9", path = "thoth-app-server" } -thoth-errors = { version = "=0.12.9", path = "thoth-errors" } -thoth-export-server = { version = "=0.12.9", path = "thoth-export-server" } +thoth-api = { version = "=0.12.10", path = "thoth-api", features = ["backend"] } +thoth-api-server = { version = "=0.12.10", path = "thoth-api-server" } +thoth-app-server = { version = "=0.12.10", path = "thoth-app-server" } +thoth-errors = { version = "=0.12.10", path = "thoth-errors" } +thoth-export-server = { version = "=0.12.10", path = "thoth-export-server" } clap = { version = "4.5.16", features = ["cargo", "env"] } dialoguer = { version = "0.11.0", features = ["password"] } dotenv = "0.15.0" diff --git a/Dockerfile b/Dockerfile index 16bf4592..3d87552c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG MUSL_IMAGE=clux/muslrust:1.80.1-stable +ARG MUSL_IMAGE=clux/muslrust:1.81.0-stable FROM ${MUSL_IMAGE} as build diff --git a/Dockerfile.dev b/Dockerfile.dev index a9035611..e3ff5cf2 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -1,4 +1,4 @@ -ARG RUST_VERSION=1.80.1 +ARG RUST_VERSION=1.81.0 FROM rust:${RUST_VERSION} diff --git a/thoth-api-server/Cargo.toml b/thoth-api-server/Cargo.toml index 68954b50..5ea31522 100644 --- a/thoth-api-server/Cargo.toml +++ b/thoth-api-server/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thoth-api-server" -version = "0.12.9" +version = "0.12.10" authors = ["Javier Arias ", "Ross Higman "] edition = "2021" license = "Apache-2.0" @@ -9,8 +9,8 @@ repository = "https://github.com/thoth-pub/thoth" readme = "README.md" [dependencies] -thoth-api = { version = "=0.12.9", path = "../thoth-api", features = ["backend"] } -thoth-errors = { version = "=0.12.9", path = "../thoth-errors" } +thoth-api = { version = "=0.12.10", path = "../thoth-api", features = ["backend"] } +thoth-errors = { version = "=0.12.10", path = "../thoth-errors" } actix-web = "4.9" actix-cors = "0.7.0" actix-identity = "0.7.1" diff --git a/thoth-api/Cargo.toml b/thoth-api/Cargo.toml index 46e0809c..409e2f7e 100644 --- a/thoth-api/Cargo.toml +++ b/thoth-api/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thoth-api" -version = "0.12.9" +version = "0.12.10" authors = ["Javier Arias ", "Ross Higman "] edition = "2021" license = "Apache-2.0" @@ -16,7 +16,7 @@ maintenance = { status = "actively-developed" } backend = ["diesel", "diesel-derive-enum", "diesel_migrations", "futures", "actix-web", "jsonwebtoken"] [dependencies] -thoth-errors = { version = "=0.12.9", path = "../thoth-errors" } +thoth-errors = { version = "=0.12.10", path = "../thoth-errors" } actix-web = { version = "4.8", optional = true } argon2rs = "0.2.5" isbn2 = "0.4.0" diff --git a/thoth-api/src/model/work/crud.rs b/thoth-api/src/model/work/crud.rs index f2c14496..be44ea49 100644 --- a/thoth-api/src/model/work/crud.rs +++ b/thoth-api/src/model/work/crud.rs @@ -315,6 +315,7 @@ impl Crud for Work { ); } match query + .then_order_by(dsl::work_id) .limit(limit.into()) .offset(offset.into()) .load::(&mut connection) diff --git a/thoth-app-server/Cargo.toml b/thoth-app-server/Cargo.toml index e36cb48a..338fdbfd 100644 --- a/thoth-app-server/Cargo.toml +++ b/thoth-app-server/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thoth-app-server" -version = "0.12.9" +version = "0.12.10" authors = ["Javier Arias ", "Ross Higman "] edition = "2021" license = "Apache-2.0" diff --git a/thoth-app/Cargo.toml b/thoth-app/Cargo.toml index 9ca51acf..f2fb79a3 100644 --- a/thoth-app/Cargo.toml +++ b/thoth-app/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thoth-app" -version = "0.12.9" +version = "0.12.10" authors = ["Javier Arias ", "Ross Higman "] edition = "2021" license = "Apache-2.0" @@ -36,8 +36,8 @@ uuid = { version = "1.10.0", features = ["serde", "v4"] } # `getrandom` is a dependency of `uuid`, we need to explicitly import and include the `js` feature to enable wasm # https://docs.rs/getrandom/latest/getrandom/#webassembly-support getrandom = { version = "0.2", features = ["js"] } -thoth-api = { version = "=0.12.9", path = "../thoth-api" } -thoth-errors = { version = "=0.12.9", path = "../thoth-errors" } +thoth-api = { version = "=0.12.10", path = "../thoth-api" } +thoth-errors = { version = "=0.12.10", path = "../thoth-errors" } [build-dependencies] dotenv = "0.15.0" diff --git a/thoth-client/Cargo.toml b/thoth-client/Cargo.toml index d5ab33da..4cc16650 100644 --- a/thoth-client/Cargo.toml +++ b/thoth-client/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thoth-client" -version = "0.12.9" +version = "0.12.10" authors = ["Javier Arias ", "Ross Higman "] edition = "2021" license = "Apache-2.0" @@ -10,8 +10,8 @@ readme = "README.md" build = "build.rs" [dependencies] -thoth-api = {version = "=0.12.9", path = "../thoth-api" } -thoth-errors = {version = "=0.12.9", path = "../thoth-errors" } +thoth-api = {version = "=0.12.10", path = "../thoth-api" } +thoth-errors = {version = "=0.12.10", path = "../thoth-errors" } graphql_client = "0.14.0" chrono = { version = "0.4.38", features = ["serde"] } reqwest = { version = "0.11", features = ["json"] } @@ -22,4 +22,4 @@ serde_json = "1.0" uuid = { version = "1.10.0", features = ["serde"] } [build-dependencies] -thoth-api = { version = "=0.12.9", path = "../thoth-api", features = ["backend"] } +thoth-api = { version = "=0.12.10", path = "../thoth-api", features = ["backend"] } diff --git a/thoth-errors/Cargo.toml b/thoth-errors/Cargo.toml index 6acd4f91..ea1fc104 100644 --- a/thoth-errors/Cargo.toml +++ b/thoth-errors/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thoth-errors" -version = "0.12.9" +version = "0.12.10" authors = ["Javier Arias ", "Ross Higman "] edition = "2021" license = "Apache-2.0" diff --git a/thoth-export-server/Cargo.toml b/thoth-export-server/Cargo.toml index f3607cd7..4366ae8a 100644 --- a/thoth-export-server/Cargo.toml +++ b/thoth-export-server/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "thoth-export-server" -version = "0.12.9" +version = "0.12.10" authors = ["Javier Arias ", "Ross Higman "] edition = "2021" license = "Apache-2.0" @@ -10,9 +10,9 @@ readme = "README.md" build = "build.rs" [dependencies] -thoth-api = { version = "=0.12.9", path = "../thoth-api" } -thoth-errors = { version = "=0.12.9", path = "../thoth-errors" } -thoth-client = { version = "=0.12.9", path = "../thoth-client" } +thoth-api = { version = "=0.12.10", path = "../thoth-api" } +thoth-errors = { version = "=0.12.10", path = "../thoth-errors" } +thoth-client = { version = "=0.12.10", path = "../thoth-client" } actix-web = "4.9" actix-cors = "0.7.0" cc_license = "0.1.0" @@ -23,7 +23,7 @@ futures = "0.3.30" lazy_static = "1.5.0" log = "0.4.20" marc = { version = "3.1.1", features = ["xml"] } -paperclip = { version = "0.8.2", features = ["actix-base", "actix4", "uuid1", "v2"] } +paperclip = { version = "0.9.1", features = ["actix4", "uuid1", "v3"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" uuid = { version = "1.10.0", features = ["serde"] } diff --git a/thoth-export-server/src/lib.rs b/thoth-export-server/src/lib.rs index 7b85b470..7ac5ca87 100644 --- a/thoth-export-server/src/lib.rs +++ b/thoth-export-server/src/lib.rs @@ -31,7 +31,7 @@ struct ApiConfig { impl ApiConfig { pub fn new(public_url: String) -> Self { Self { - api_schema: format!("{public_url}/swagger.json"), + api_schema: format!("{public_url}/openapi.json"), } } } @@ -82,6 +82,7 @@ pub async fn start_server( "Obtain Thoth metadata records in various formats and platform specifications" .to_string(), ), + terms_of_service: Some("https://thoth.pub/policies/terms-thoth-free".to_string()), contact: Some(Contact { name: Some("Thoth Support".to_string()), url: Some("https://thoth.pub".to_string()), @@ -107,6 +108,7 @@ pub async fn start_server( .configure(platform::route) .configure(specification::route) .with_json_spec_at("/swagger.json") + .with_json_spec_v3_at("/openapi.json") .build() }) .workers(threads) diff --git a/thoth-export-server/src/marc21/marc21record_thoth.rs b/thoth-export-server/src/marc21/marc21record_thoth.rs index 52d61e9d..9765d65c 100644 --- a/thoth-export-server/src/marc21/marc21record_thoth.rs +++ b/thoth-export-server/src/marc21/marc21record_thoth.rs @@ -319,12 +319,15 @@ impl Marc21Entry for Work { .and_then(|f| builder.add_field(f))?; } - // 506 - restrictions on access - FieldRepr::from((b"506", "0\\")) - .add_subfield(b"a", "Open Access") - .and_then(|f| f.add_subfield(b"f", "Unrestricted online access")) - .and_then(|f| f.add_subfield(b"2", "star")) - .and_then(|f| builder.add_field(f))?; + // Assume omission of licence means work is non-OA + if self.license.is_some() { + // 506 - restrictions on access + FieldRepr::from((b"506", "0\\")) + .add_subfield(b"a", "Open Access") + .and_then(|f| f.add_subfield(b"f", "Unrestricted online access")) + .and_then(|f| f.add_subfield(b"2", "star")) + .and_then(|f| builder.add_field(f))?; + } // 520 - abstract if let Some(mut long_abstract) = self.long_abstract.clone() { diff --git a/thoth-export-server/src/xml/doideposit_crossref.rs b/thoth-export-server/src/xml/doideposit_crossref.rs index f7b675f3..c98b89b2 100644 --- a/thoth-export-server/src/xml/doideposit_crossref.rs +++ b/thoth-export-server/src/xml/doideposit_crossref.rs @@ -79,6 +79,17 @@ impl XmlSpecification for DoiDepositCrossref { impl XmlElementBlock for Work { fn xml_element(&self, w: &mut EventWriter) -> ThothResult<()> { + if self.doi.is_none() + && !self + .relations + .iter() + .any(|r| r.relation_type == RelationType::HAS_CHILD && r.related_work.doi.is_some()) + { + return Err(ThothError::IncompleteMetadataRecord( + DEPOSIT_ERROR.to_string(), + "No work or chapter DOIs to deposit".to_string(), + )); + } let work_type = match &self.work_type { WorkType::MONOGRAPH => "monograph", WorkType::EDITED_BOOK => "edited_book", @@ -86,7 +97,6 @@ impl XmlElementBlock for Work { WorkType::JOURNAL_ISSUE | WorkType::BOOK_SET | WorkType::BOOK_CHAPTER => "other", WorkType::Other(_) => unreachable!(), }; - let work = self; // As an alternative to `book_metadata` and `book_series_metadata` below, // `book_set_metadata` can be used for works which are part of a set. // Omitted at present but could be considered as a future enhancement. @@ -106,22 +116,22 @@ impl XmlElementBlock for Work { XmlElementBlock::::xml_element(series, w)?; ordinal = Some(ord); } - write_work_contributions(work, w)?; - write_work_title(work, w)?; - write_work_abstract(work, w)?; + write_work_contributions(self, w)?; + write_work_title(self, w)?; + write_work_abstract(self, w)?; if ordinal.is_some() { let ordinal_i64 = ordinal.unwrap_or(0); write_work_volume(ordinal_i64, w)?; } - write_work_edition(work, w)?; - write_work_publication_date(work, w)?; - write_work_publications(work, w)?; - write_publisher(work, w)?; - write_crossmark_funding_access(work, w)?; - write_doi_collection(work, w)?; - write_work_references(work, w)?; + write_work_edition(self, w)?; + write_work_publication_date(self, w)?; + write_work_publications(self, w)?; + write_publisher(self, w)?; + write_crossmark_funding_access(self, w)?; + write_doi_collection(self, w)?; + write_work_references(self, w)?; Ok(()) })?; @@ -132,7 +142,11 @@ impl XmlElementBlock for Work { .iter() .filter(|r| r.relation_type == RelationType::HAS_CHILD) { - XmlElementBlock::::xml_element(chapter, w)?; + // If chapter has no DOI, nothing to output (`content_item` element + // representing chapter must contain `doi_data` element with `doi`) + if chapter.related_work.doi.is_some() { + XmlElementBlock::::xml_element(chapter, w)?; + } } Ok(()) }) @@ -515,20 +529,20 @@ fn write_access_content( license: &Option, w: &mut EventWriter, ) -> ThothResult<()> { - write_full_element_block( - "ai:program", - Some(vec![("name", "AccessIndicators")]), - w, - |w| { - write_element_block("ai:free_to_read", w, |_w| Ok(()))?; - if let Some(license) = license { + // Assume works without licences are non-OA + if let Some(license) = license { + write_full_element_block( + "ai:program", + Some(vec![("name", "AccessIndicators")]), + w, + |w| { + write_element_block("ai:free_to_read", w, |_w| Ok(()))?; write_element_block("ai:license_ref", w, |w| { w.write(XmlEvent::Characters(license)).map_err(|e| e.into()) - })?; - } - Ok(()) - }, - )?; + }) + }, + )?; + } Ok(()) } @@ -612,91 +626,83 @@ fn write_chapter_doi_collection( chapter: &WorkRelations, w: &mut EventWriter, ) -> ThothResult<()> { - if let Some(doi) = &chapter.related_work.doi { - if let Some(landing_page) = &chapter.related_work.landing_page { - write_element_block("doi_data", w, |w| { - write_element_block("doi", w, |w| { - w.write(XmlEvent::Characters(&doi.to_string())) - .map_err(|e| e.into()) - })?; - write_element_block("resource", w, |w| { - w.write(XmlEvent::Characters(landing_page)) - .map_err(|e| e.into()) - })?; - if let Some(pdf_url) = chapter - .related_work - .publications - .iter() - .find(|p| { - p.publication_type.eq(&PublicationType::PDF) && !p.locations.is_empty() - }) - .and_then(|p| p.locations.iter().find(|l| l.canonical)) - .and_then(|l| l.full_text_url.as_ref()) - { - // Used for CrossRef Similarity Check. URL must point directly to full-text PDF. - // Alternatively, a direct link to full-text HTML can be used (not implemented here). - write_full_element_block( - "collection", - Some(vec![("property", "crawler-based")]), - w, - |w| { - for crawler in ["iParadigms", "google", "msn", "yahoo", "scirus"] { - write_full_element_block( - "item", - Some(vec![("crawler", crawler)]), - w, - |w| { - write_full_element_block( - "resource", - Some(vec![("mime_type", "application/pdf")]), - w, - |w| { - w.write(XmlEvent::Characters(pdf_url)) - .map_err(|e| e.into()) - }, - ) - }, - )?; - } - Ok(()) - }, - )?; - // Used for CrossRef Text and Data Mining. URL must point directly to full-text PDF. - // Alternatively, a direct link to full-text XML can be used (not implemented here). - write_full_element_block( - "collection", - Some(vec![("property", "text-mining")]), - w, - |w| { - write_element_block("item", w, |w| { - write_full_element_block( - "resource", - Some(vec![("mime_type", "application/pdf")]), - w, - |w| { - w.write(XmlEvent::Characters(pdf_url)).map_err(|e| e.into()) - }, - ) - }) - }, - )?; - } - Ok(()) + let doi = &chapter + .related_work + .doi + .as_ref() + .expect("Caller should only pass in chapters which have DOIs"); + if let Some(landing_page) = &chapter.related_work.landing_page { + write_element_block("doi_data", w, |w| { + write_element_block("doi", w, |w| { + w.write(XmlEvent::Characters(&doi.to_string())) + .map_err(|e| e.into()) })?; - } else { - // `doi_data` element is mandatory for `content_item`, and must contain - // both `doi` element and `resource` (landing page) element - return Err(ThothError::IncompleteMetadataRecord( - DEPOSIT_ERROR.to_string(), - "Missing chapter Landing Page".to_string(), - )); - } + write_element_block("resource", w, |w| { + w.write(XmlEvent::Characters(landing_page)) + .map_err(|e| e.into()) + })?; + if let Some(pdf_url) = chapter + .related_work + .publications + .iter() + .find(|p| p.publication_type.eq(&PublicationType::PDF) && !p.locations.is_empty()) + .and_then(|p| p.locations.iter().find(|l| l.canonical)) + .and_then(|l| l.full_text_url.as_ref()) + { + // Used for CrossRef Similarity Check. URL must point directly to full-text PDF. + // Alternatively, a direct link to full-text HTML can be used (not implemented here). + write_full_element_block( + "collection", + Some(vec![("property", "crawler-based")]), + w, + |w| { + for crawler in ["iParadigms", "google", "msn", "yahoo", "scirus"] { + write_full_element_block( + "item", + Some(vec![("crawler", crawler)]), + w, + |w| { + write_full_element_block( + "resource", + Some(vec![("mime_type", "application/pdf")]), + w, + |w| { + w.write(XmlEvent::Characters(pdf_url)) + .map_err(|e| e.into()) + }, + ) + }, + )?; + } + Ok(()) + }, + )?; + // Used for CrossRef Text and Data Mining. URL must point directly to full-text PDF. + // Alternatively, a direct link to full-text XML can be used (not implemented here). + write_full_element_block( + "collection", + Some(vec![("property", "text-mining")]), + w, + |w| { + write_element_block("item", w, |w| { + write_full_element_block( + "resource", + Some(vec![("mime_type", "application/pdf")]), + w, + |w| w.write(XmlEvent::Characters(pdf_url)).map_err(|e| e.into()), + ) + }) + }, + )?; + } + Ok(()) + })?; } else { // `doi_data` element is mandatory for `content_item`, and must contain // both `doi` element and `resource` (landing page) element return Err(ThothError::IncompleteMetadataRecord( DEPOSIT_ERROR.to_string(), - "Missing chapter DOI".to_string(), + "Missing chapter Landing Page".to_string(), )); } Ok(()) @@ -1394,9 +1400,9 @@ mod tests { assert!(!output.contains(r#" 02"#)); assert!(!output.contains(r#" 28"#)); assert!(!output.contains(r#" 2000"#)); - // No licence supplied - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" "#)); + // No licence supplied: assume non-OA + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" "#)); assert!(!output.contains( r#" https://creativecommons.org/licenses/by/4.0/"# )); @@ -1436,17 +1442,6 @@ mod tests { output, "Could not generate doideposit::crossref: Missing chapter Landing Page".to_string() ); - - // Restore landing page but remove DOI. Result: error, as above - test_relations.related_work.edition = None; - test_relations.related_work.landing_page = - Some("https://www.book.com/chapter_one".to_string()); - test_relations.related_work.doi = None; - let output = generate_test_output(false, &test_relations); - assert_eq!( - output, - "Could not generate doideposit::crossref: Missing chapter DOI".to_string() - ); } #[test] @@ -1974,6 +1969,9 @@ mod tests { test_work.publications[0].locations.clear(); // Remove last (hardback) publication test_work.publications.pop(); + // Change sole relation to chapter with no DOI + test_work.relations[0].relation_type = RelationType::HAS_CHILD; + test_work.relations[0].related_work.doi = None; let output = generate_test_output(true, &test_work); // Work type changed @@ -2003,9 +2001,9 @@ mod tests { assert!(!output.contains(r#" 978-1-4028-9462-6"#)); // No place supplied assert!(!output.contains(r#" León, Spain"#)); - // No licence supplied - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" "#)); + // No licence supplied: assume non-OA + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" "#)); assert!(!output.contains( r#" https://creativecommons.org/licenses/by/4.0/"# )); @@ -2022,6 +2020,18 @@ mod tests { assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" "#)); + // Only chapters with no DOI supplied: no `content_item` elements emitted + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" Part"#)); + assert!(!output.contains(r#" One"#)); + assert!(!output.contains(r#" 1"#)); + assert!(!output.contains(r#" 02"#)); + assert!(!output.contains(r#" 28"#)); + assert!(!output.contains(r#" 2000"#)); + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" 10"#)); + assert!(!output.contains(r#" 20"#)); + assert!(!output.contains(r#" https://www.book.com/part_one"#)); // Change work type, remove landing page, remove XML ISBN, // remove all but the omitted contributor @@ -2065,18 +2075,31 @@ mod tests { assert!(!output.contains(r#" 10.00001/BOOK.0001"#)); assert!(!output.contains(r#" https://www.book.com"#)); - // Change work type again, replace landing page but remove DOI + // Remove DOI (so neither work nor chapter DOIs are present). Result: error + test_work.doi = None; + let output = generate_test_output(false, &test_work); + assert_eq!( + output, + "Could not generate doideposit::crossref: No work or chapter DOIs to deposit" + .to_string() + ); + + // Change work type again, replace landing page, replace chapter DOI test_work.work_type = WorkType::JOURNAL_ISSUE; test_work.landing_page = Some("https://www.book.com".to_string()); - test_work.doi = None; + test_work.relations[0].related_work.doi = + Some(Doi::from_str("https://doi.org/10.00001/PART.0001").unwrap()); let output = generate_test_output(true, &test_work); // Work type changed assert!(!output.contains(r#" "#)); assert!(output.contains(r#" "#)); - // No DOI: entire `doi_data` element omitted (even though landing page restored) - assert!(!output.contains(r#" "#)); + // No work DOI: entire work-specific `doi_data` element omitted (even though landing page restored) assert!(!output.contains(r#" 10.00001/BOOK.0001"#)); assert!(!output.contains(r#" https://www.book.com"#)); + // But chapter-specific `doi_data` element will be present (at same nesting level) + assert!(output.contains(r#" "#)); + assert!(output.contains(r#" 10.00001/PART.0001"#)); + assert!(output.contains(r#" https://www.book.com/part_one"#)); // Remove publication date. Result: error test_work.publication_date = None; diff --git a/thoth-export-server/src/xml/onix21_ebsco_host.rs b/thoth-export-server/src/xml/onix21_ebsco_host.rs index e426ebc0..9751ffea 100644 --- a/thoth-export-server/src/xml/onix21_ebsco_host.rs +++ b/thoth-export-server/src/xml/onix21_ebsco_host.rs @@ -58,208 +58,217 @@ impl XmlSpecification for Onix21EbscoHost { impl XmlElementBlock for Work { fn xml_element(&self, w: &mut EventWriter) -> ThothResult<()> { - // EBSCO Host can only accept PDFs and EPUBs, and can only - // process them as Open Access if they are unpriced - let pdf_url = self + // EBSCO Host can only accept PDFs and EPUBs + let pdf_publication = self .publications .iter() - .find(|p| { - p.publication_type.eq(&PublicationType::PDF) - && !p.locations.is_empty() - // Thoth database only accepts non-zero prices - && p.prices.is_empty() - }) + .find(|p| p.publication_type.eq(&PublicationType::PDF) && !p.locations.is_empty()); + let pdf_url = pdf_publication .and_then(|p| p.locations.iter().find(|l| l.canonical)) .and_then(|l| l.full_text_url.as_ref()); - let epub_url = self + let epub_publication = self .publications .iter() - .find(|p| { - p.publication_type.eq(&PublicationType::EPUB) - && !p.locations.is_empty() - // Thoth database only accepts non-zero prices - && p.prices.is_empty() - }) + .find(|p| p.publication_type.eq(&PublicationType::EPUB) && !p.locations.is_empty()); + let epub_url = epub_publication .and_then(|p| p.locations.iter().find(|l| l.canonical)) .and_then(|l| l.full_text_url.as_ref()); - if pdf_url.is_some() || epub_url.is_some() { - let work_id = format!("urn:uuid:{}", self.work_id); - let (main_isbn, isbns) = get_publications_data(&self.publications); - write_element_block("Product", w, |w| { - write_element_block("RecordReference", w, |w| { + if pdf_url.is_none() && epub_url.is_none() { + return Err(ThothError::IncompleteMetadataRecord( + ONIX_ERROR.to_string(), + "No PDF or EPUB URL".to_string(), + )); + } + // EBSCO Host can only process works as Open Access if they are unpriced + let is_open_access = self.license.is_some(); + if is_open_access && + // Thoth database only accepts non-zero prices + !(pdf_publication.is_some_and(|p| p.prices.is_empty()) || + epub_publication.is_some_and(|p| p.prices.is_empty())) + { + return Err(ThothError::IncompleteMetadataRecord( + ONIX_ERROR.to_string(), + "No unpriced PDF or EPUB URL (must be supplied for OA works)".to_string(), + )); + } + let work_id = format!("urn:uuid:{}", self.work_id); + let (main_isbn, isbns) = get_publications_data(&self.publications); + write_element_block("Product", w, |w| { + write_element_block("RecordReference", w, |w| { + w.write(XmlEvent::Characters(&work_id)) + .map_err(|e| e.into()) + })?; + // 03 Notification confirmed on publication + write_element_block("NotificationType", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + })?; + // 01 Publisher + write_element_block("RecordSourceType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("ProductIdentifier", w, |w| { + // 01 Proprietary + write_element_block("ProductIDType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("IDValue", w, |w| { w.write(XmlEvent::Characters(&work_id)) .map_err(|e| e.into()) + }) + })?; + if let Some(isbn) = &main_isbn { + write_element_block("ProductIdentifier", w, |w| { + // 15 ISBN-13 + write_element_block("ProductIDType", w, |w| { + w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) + })?; + write_element_block("IDValue", w, |w| { + w.write(XmlEvent::Characters(isbn)).map_err(|e| e.into()) + }) })?; - // 03 Notification confirmed on publication - write_element_block("NotificationType", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - })?; - // 01 Publisher - write_element_block("RecordSourceType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; + } + if let Some(doi) = &self.doi { write_element_block("ProductIdentifier", w, |w| { - // 01 Proprietary write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) })?; write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&work_id)) + w.write(XmlEvent::Characters(&doi.to_string())) .map_err(|e| e.into()) }) })?; - if let Some(isbn) = &main_isbn { - write_element_block("ProductIdentifier", w, |w| { - // 15 ISBN-13 - write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) - })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(isbn)).map_err(|e| e.into()) - }) - })?; - } - if let Some(doi) = &self.doi { - write_element_block("ProductIdentifier", w, |w| { - write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) - })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&doi.to_string())) - .map_err(|e| e.into()) - }) - })?; + } + // DG Electronic book text in proprietary or open standard format + write_element_block("ProductForm", w, |w| { + w.write(XmlEvent::Characters("DG")).map_err(|e| e.into()) + })?; + write_element_block("EpubType", w, |w| { + // 002 PDF + let mut epub_type = "002"; + // We definitely have either a PDF URL or an EPUB URL (or both) + if pdf_url.is_none() { + // 029 EPUB + epub_type = "029"; } - // DG Electronic book text in proprietary or open standard format - write_element_block("ProductForm", w, |w| { - w.write(XmlEvent::Characters("DG")).map_err(|e| e.into()) + w.write(XmlEvent::Characters(epub_type)) + .map_err(|e| e.into()) + })?; + for issue in &self.issues { + XmlElementBlock::::xml_element(issue, w).ok(); + } + write_element_block("Title", w, |w| { + // 01 Distinctive title (book) + write_element_block("TitleType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - write_element_block("EpubType", w, |w| { - // 002 PDF - let mut epub_type = "002"; - // We definitely have either a PDF URL or an EPUB URL (or both) - if pdf_url.is_none() { - // 029 EPUB - epub_type = "029"; - } - w.write(XmlEvent::Characters(epub_type)) + write_element_block("TitleText", w, |w| { + w.write(XmlEvent::Characters(&self.title)) .map_err(|e| e.into()) })?; - for issue in &self.issues { - XmlElementBlock::::xml_element(issue, w).ok(); - } - write_element_block("Title", w, |w| { - // 01 Distinctive title (book) - write_element_block("TitleType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - write_element_block("TitleText", w, |w| { - w.write(XmlEvent::Characters(&self.title)) + if let Some(subtitle) = &self.subtitle { + write_element_block("Subtitle", w, |w| { + w.write(XmlEvent::Characters(subtitle)) .map_err(|e| e.into()) })?; - if let Some(subtitle) = &self.subtitle { - write_element_block("Subtitle", w, |w| { - w.write(XmlEvent::Characters(subtitle)) - .map_err(|e| e.into()) - })?; - } - Ok(()) + } + Ok(()) + })?; + write_element_block("WorkIdentifier", w, |w| { + // 01 Proprietary + write_element_block("WorkIDType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - write_element_block("WorkIdentifier", w, |w| { - // 01 Proprietary - write_element_block("WorkIDType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - write_element_block("IDTypeName", w, |w| { - w.write(XmlEvent::Characters("Thoth WorkID")) + write_element_block("IDTypeName", w, |w| { + w.write(XmlEvent::Characters("Thoth WorkID")) + .map_err(|e| e.into()) + })?; + write_element_block("IDValue", w, |w| { + w.write(XmlEvent::Characters(&work_id)) + .map_err(|e| e.into()) + }) + })?; + let mut websites: HashMap = HashMap::new(); + if let Some(pdf) = pdf_url { + websites.insert( + pdf.to_string(), + ( + "29".to_string(), + "Publisher's website: download the title".to_string(), + ), + ); + } + if let Some(epub) = epub_url { + websites.insert( + epub.to_string(), + ( + "29".to_string(), + "Publisher's website: download the title".to_string(), + ), + ); + } + if let Some(landing_page) = &self.landing_page { + websites.insert( + landing_page.to_string(), + ( + "02".to_string(), + "Publisher's website: web shop".to_string(), + ), + ); + } + for (url, description) in websites.iter() { + write_element_block("Website", w, |w| { + write_element_block("WebsiteRole", w, |w| { + w.write(XmlEvent::Characters(&description.0)) .map_err(|e| e.into()) })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&work_id)) + write_element_block("WebsiteDescription", w, |w| { + w.write(XmlEvent::Characters(&description.1)) .map_err(|e| e.into()) + })?; + write_element_block("WebsiteLink", w, |w| { + w.write(XmlEvent::Characters(url)).map_err(|e| e.into()) }) })?; - let mut websites: HashMap = HashMap::new(); - if let Some(pdf) = pdf_url { - websites.insert( - pdf.to_string(), - ( - "29".to_string(), - "Publisher's website: download the title".to_string(), - ), - ); - } - if let Some(epub) = epub_url { - websites.insert( - epub.to_string(), - ( - "29".to_string(), - "Publisher's website: download the title".to_string(), - ), - ); - } - if let Some(landing_page) = &self.landing_page { - websites.insert( - landing_page.to_string(), - ( - "02".to_string(), - "Publisher's website: web shop".to_string(), - ), - ); - } - for (url, description) in websites.iter() { - write_element_block("Website", w, |w| { - write_element_block("WebsiteRole", w, |w| { - w.write(XmlEvent::Characters(&description.0)) - .map_err(|e| e.into()) - })?; - write_element_block("WebsiteDescription", w, |w| { - w.write(XmlEvent::Characters(&description.1)) - .map_err(|e| e.into()) - })?; - write_element_block("WebsiteLink", w, |w| { - w.write(XmlEvent::Characters(url)).map_err(|e| e.into()) - }) - })?; - } - for contribution in &self.contributions { - // A51 Research by is not supported in ONIX 2 - if contribution.contribution_type != ContributionType::RESEARCH_BY { - XmlElementBlock::::xml_element(contribution, w).ok(); - } - } - for language in &self.languages { - XmlElementBlock::::xml_element(language, w).ok(); + } + for contribution in &self.contributions { + // A51 Research by is not supported in ONIX 2 + if contribution.contribution_type != ContributionType::RESEARCH_BY { + XmlElementBlock::::xml_element(contribution, w).ok(); } - if let Some(page_count) = self.page_count { - write_element_block("Extent", w, |w| { - // 00 Main content - write_element_block("ExtentType", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("ExtentValue", w, |w| { - w.write(XmlEvent::Characters(&page_count.to_string())) - .map_err(|e| e.into()) - })?; - // 03 Pages - write_element_block("ExtentUnit", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - }) + } + for language in &self.languages { + XmlElementBlock::::xml_element(language, w).ok(); + } + if let Some(page_count) = self.page_count { + write_element_block("Extent", w, |w| { + // 00 Main content + write_element_block("ExtentType", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) })?; - } - for subject in &self.subjects { - XmlElementBlock::::xml_element(subject, w).ok(); - } - write_element_block("Audience", w, |w| { - // 01 ONIX audience codes - write_element_block("AudienceCodeType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + write_element_block("ExtentValue", w, |w| { + w.write(XmlEvent::Characters(&page_count.to_string())) + .map_err(|e| e.into()) })?; - // 06 Professional and scholarly - write_element_block("AudienceCodeValue", w, |w| { - w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) + // 03 Pages + write_element_block("ExtentUnit", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) }) })?; + } + for subject in &self.subjects { + XmlElementBlock::::xml_element(subject, w).ok(); + } + write_element_block("Audience", w, |w| { + // 01 ONIX audience codes + write_element_block("AudienceCodeType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + // 06 Professional and scholarly + write_element_block("AudienceCodeValue", w, |w| { + w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) + }) + })?; + if is_open_access { write_element_block("OtherText", w, |w| { // 47 Open access statement // "Should always be accompanied by a link to the complete license (see code 46)" @@ -272,142 +281,144 @@ impl XmlElementBlock for Work { .map_err(|e| e.into()) }) })?; - if let Some(license) = &self.license { - write_element_block("OtherText", w, |w| { - // 46 License - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("46")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(license)).map_err(|e| e.into()) - }) + } + if let Some(license) = &self.license { + write_element_block("OtherText", w, |w| { + // 46 License + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("46")).map_err(|e| e.into()) })?; - } - if let Some(labstract) = &self.long_abstract { - write_element_block("OtherText", w, |w| { - // 03 Long description - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - })?; - // 06 Default text format - write_element_block("TextFormat", w, |w| { - w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(labstract)) - .map_err(|e| e.into()) - }) + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(license)).map_err(|e| e.into()) + }) + })?; + } + if let Some(labstract) = &self.long_abstract { + write_element_block("OtherText", w, |w| { + // 03 Long description + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) })?; - } - if let Some(cover_url) = &self.cover_url { - write_element_block("MediaFile", w, |w| { - // 04 Image: front cover - write_element_block("MediaFileTypeCode", w, |w| { - w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) - })?; - // 01 URL - write_element_block("MediaFileLinkTypeCode", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - write_element_block("MediaFileLink", w, |w| { - w.write(XmlEvent::Characters(cover_url)) - .map_err(|e| e.into()) - }) + // 06 Default text format + write_element_block("TextFormat", w, |w| { + w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) })?; - } - write_element_block("Imprint", w, |w| { - write_element_block("ImprintName", w, |w| { - w.write(XmlEvent::Characters(&self.imprint.imprint_name)) + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(labstract)) .map_err(|e| e.into()) }) })?; - write_element_block("Publisher", w, |w| { - // 01 Publisher - write_element_block("PublishingRole", w, |w| { + } + if let Some(cover_url) = &self.cover_url { + write_element_block("MediaFile", w, |w| { + // 04 Image: front cover + write_element_block("MediaFileTypeCode", w, |w| { + w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) + })?; + // 01 URL + write_element_block("MediaFileLinkTypeCode", w, |w| { w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - write_element_block("PublisherName", w, |w| { - w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) + write_element_block("MediaFileLink", w, |w| { + w.write(XmlEvent::Characters(cover_url)) .map_err(|e| e.into()) - })?; - if let Some(publisher_url) = &self.imprint.publisher.publisher_url { - write_element_block("Website", w, |w| { - write_element_block("WebsiteLink", w, |w| { - w.write(XmlEvent::Characters(publisher_url)) - .map_err(|e| e.into()) - }) - })?; - } - Ok(()) + }) })?; - if let Some(place) = &self.place { - write_element_block("CityOfPublication", w, |w| { - w.write(XmlEvent::Characters(place)).map_err(|e| e.into()) - })?; - } - XmlElement::::xml_element(&self.work_status, w)?; - if let Some(date) = self.publication_date { - write_element_block("PublicationDate", w, |w| { - w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) - .map_err(|e| e.into()) - })?; - write_element_block("CopyrightYear", w, |w| { - w.write(XmlEvent::Characters(&date.format("%Y").to_string())) - .map_err(|e| e.into()) + } + write_element_block("Imprint", w, |w| { + write_element_block("ImprintName", w, |w| { + w.write(XmlEvent::Characters(&self.imprint.imprint_name)) + .map_err(|e| e.into()) + }) + })?; + write_element_block("Publisher", w, |w| { + // 01 Publisher + write_element_block("PublishingRole", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("PublisherName", w, |w| { + w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) + .map_err(|e| e.into()) + })?; + if let Some(publisher_url) = &self.imprint.publisher.publisher_url { + write_element_block("Website", w, |w| { + write_element_block("WebsiteLink", w, |w| { + w.write(XmlEvent::Characters(publisher_url)) + .map_err(|e| e.into()) + }) })?; } - write_element_block("SalesRights", w, |w| { - // 02 For sale with non-exclusive rights in the specified countries or territories - write_element_block("SalesRightsType", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) - })?; - write_element_block("RightsTerritory", w, |w| { - w.write(XmlEvent::Characters("WORLD")).map_err(|e| e.into()) - }) + Ok(()) + })?; + if let Some(place) = &self.place { + write_element_block("CityOfPublication", w, |w| { + w.write(XmlEvent::Characters(place)).map_err(|e| e.into()) })?; - if !isbns.is_empty() { - for (publication_type, isbn) in &isbns { - let relation_code = match publication_type { - PublicationType::PAPERBACK | PublicationType::HARDBACK => "13", // Epublication based on (print product) - _ => "06", // Alternative format - }; + } + XmlElement::::xml_element(&self.work_status, w)?; + if let Some(date) = self.publication_date { + write_element_block("PublicationDate", w, |w| { + w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) + .map_err(|e| e.into()) + })?; + write_element_block("CopyrightYear", w, |w| { + w.write(XmlEvent::Characters(&date.format("%Y").to_string())) + .map_err(|e| e.into()) + })?; + } + write_element_block("SalesRights", w, |w| { + // 02 For sale with non-exclusive rights in the specified countries or territories + write_element_block("SalesRightsType", w, |w| { + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) + })?; + write_element_block("RightsTerritory", w, |w| { + w.write(XmlEvent::Characters("WORLD")).map_err(|e| e.into()) + }) + })?; + if !isbns.is_empty() { + for (publication_type, isbn) in &isbns { + let relation_code = match publication_type { + PublicationType::PAPERBACK | PublicationType::HARDBACK => "13", // Epublication based on (print product) + _ => "06", // Alternative format + }; - write_element_block("RelatedProduct", w, |w| { - write_element_block("RelationCode", w, |w| { - w.write(XmlEvent::Characters(relation_code)) - .map_err(|e| e.into()) + write_element_block("RelatedProduct", w, |w| { + write_element_block("RelationCode", w, |w| { + w.write(XmlEvent::Characters(relation_code)) + .map_err(|e| e.into()) + })?; + write_element_block("ProductIdentifier", w, |w| { + // 15 ISBN-13 + write_element_block("ProductIDType", w, |w| { + w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) })?; - write_element_block("ProductIdentifier", w, |w| { - // 15 ISBN-13 - write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) - })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(isbn)).map_err(|e| e.into()) - }) + write_element_block("IDValue", w, |w| { + w.write(XmlEvent::Characters(isbn)).map_err(|e| e.into()) }) - })?; - } - } - if let Some(date) = self.withdrawn_date { - write_element_block("OutofPrintDate", w, |w| { - w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) - .map_err(|e| e.into()) + }) })?; } - write_element_block("SupplyDetail", w, |w| { - write_element_block("SupplierName", w, |w| { - w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) - .map_err(|e| e.into()) - })?; - // 09 Publisher to end-customers - write_element_block("SupplierRole", w, |w| { - w.write(XmlEvent::Characters("09")).map_err(|e| e.into()) - })?; - // 99 Contact supplier - write_element_block("ProductAvailability", w, |w| { - w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) - })?; + } + if let Some(date) = self.withdrawn_date { + write_element_block("OutofPrintDate", w, |w| { + w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) + .map_err(|e| e.into()) + })?; + } + write_element_block("SupplyDetail", w, |w| { + write_element_block("SupplierName", w, |w| { + w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) + .map_err(|e| e.into()) + })?; + // 09 Publisher to end-customers + write_element_block("SupplierRole", w, |w| { + w.write(XmlEvent::Characters("09")).map_err(|e| e.into()) + })?; + // 99 Contact supplier + write_element_block("ProductAvailability", w, |w| { + w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) + })?; + if is_open_access { // R Restrictions apply, see note write_element_block("AudienceRestrictionFlag", w, |w| { w.write(XmlEvent::Characters("R")).map_err(|e| e.into()) @@ -416,7 +427,14 @@ impl XmlElementBlock for Work { w.write(XmlEvent::Characters("Open access")) .map_err(|e| e.into()) })?; - // EBSCO Host require the price point for Open Access titles to be listed as "0.01 USD". + } + let publication = match pdf_url.is_some() { + true => pdf_publication, + false => epub_publication, + }; + let prices = publication.map(|p| p.prices.clone()).unwrap_or_default(); + if is_open_access || prices.is_empty() { + // EBSCO Host require the price point for unpriced/Open Access titles to be listed as "0.01 USD". write_element_block("Price", w, |w| { // 01 RRP excluding tax (price code requested by EBSCO) write_element_block("PriceTypeCode", w, |w| { @@ -429,14 +447,29 @@ impl XmlElementBlock for Work { w.write(XmlEvent::Characters("USD")).map_err(|e| e.into()) }) }) - }) + } else { + for price in prices { + let unit_price = price.unit_price; + let formatted_price = format!("{unit_price:.2}"); + write_element_block("Price", w, |w| { + // 01 RRP excluding tax (price code requested by EBSCO) + write_element_block("PriceTypeCode", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("PriceAmount", w, |w| { + w.write(XmlEvent::Characters(&formatted_price)) + .map_err(|e| e.into()) + })?; + write_element_block("CurrencyCode", w, |w| { + w.write(XmlEvent::Characters(&price.currency_code.to_string())) + .map_err(|e| e.into()) + }) + })?; + } + Ok(()) + } }) - } else { - Err(ThothError::IncompleteMetadataRecord( - ONIX_ERROR.to_string(), - "No unpriced PDF or EPUB URL".to_string(), - )) - } + }) } } @@ -960,7 +993,16 @@ mod tests { depth_in: None, weight_g: None, weight_oz: None, - prices: vec![], + prices: vec![ + WorkPublicationsPrices { + currency_code: CurrencyCode::GBP, + unit_price: 5.99, + }, + WorkPublicationsPrices { + currency_code: CurrencyCode::EUR, + unit_price: 7.99, + }, + ], locations: vec![WorkPublicationsLocations { landing_page: Some("https://www.book.com/pdf_landing".to_string()), full_text_url: Some("https://www.book.com/pdf_fulltext".to_string()), @@ -1113,17 +1155,11 @@ mod tests { test_work.long_abstract = None; test_work.place = None; test_work.publication_date = None; - test_work.license = None; test_work.landing_page = None; test_work.cover_url = None; test_work.imprint.publisher.publisher_url = None; // Remove third (paperback) publication test_work.publications.pop(); - // Give PDF publication a positive price point - test_work.publications[1].prices = vec![WorkPublicationsPrices { - currency_code: CurrencyCode::USD, - unit_price: 7.99, - }]; let output = generate_test_output(true, &test_work); // Paperback publication removed, so its ISBN no longer appears // (either as the main ISBN or in RelatedProducts) @@ -1141,12 +1177,6 @@ mod tests { r#" Publisher's website: web shop"# )); assert!(!output.contains(r#" https://www.book.com"#)); - // PDF publication is no longer unpriced, hence no PDF URL, and EpubType changes - assert!( - !output.contains(r#" https://www.book.com/pdf_fulltext"#) - ); - assert!(!output.contains(r#" 002"#)); - assert!(output.contains(r#" 029"#)); // No page count supplied assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" 00"#)); @@ -1156,11 +1186,6 @@ mod tests { assert!(!output.contains(r#" 03"#)); assert!(!output.contains(r#" 06"#)); assert!(!output.contains(r#" Lorem ipsum dolor sit amet"#)); - // No licence supplied - assert!(!output.contains(r#" 46"#)); - assert!( - !output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#) - ); // No cover URL supplied assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" 04"#)); @@ -1176,13 +1201,78 @@ mod tests { // No publication date supplied assert!(!output.contains(r#" 19991231"#)); assert!(!output.contains(r#" 1999"#)); + // No licence supplied: assume non-OA, output real PDF price + assert!(!output.contains(r#" 47"#)); + assert!(!output.contains(r#" Open access - no commercial use"#)); + assert!(!output.contains(r#" 46"#)); + assert!( + !output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#) + ); + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" R"#)); + assert!(!output + .contains(r#" Open access"#)); + assert!(!output.contains(r#" 0.01"#)); + assert!(!output.contains(r#" USD"#)); + assert!(output.contains(r#" 5.99"#)); + assert!(output.contains(r#" GBP"#)); + assert!(output.contains(r#" 7.99"#)); + assert!(output.contains(r#" EUR"#)); + + // Remove PDF location + test_work.publications[1].locations.clear(); + let output = generate_test_output(true, &test_work); + // PDF no longer has a URL, so EpubType changes, and EPUB price (unpriced) is output + assert!( + !output.contains(r#" https://www.book.com/pdf_fulltext"#) + ); + assert!(!output.contains(r#" 002"#)); + assert!(output.contains(r#" 029"#)); + assert!(!output.contains(r#" 5.99"#)); + assert!(!output.contains(r#" GBP"#)); + assert!(!output.contains(r#" 7.99"#)); + assert!(!output.contains(r#" EUR"#)); + assert!(output.contains(r#" 0.01"#)); + assert!(output.contains(r#" USD"#)); + + // Give EPUB a price + test_work.publications[0].prices = vec![WorkPublicationsPrices { + currency_code: CurrencyCode::AUD, + unit_price: 10.00, + }]; + let output = generate_test_output(true, &test_work); + assert!(!output.contains(r#" 5.99"#)); + assert!(!output.contains(r#" GBP"#)); + assert!(!output.contains(r#" 7.99"#)); + assert!(!output.contains(r#" EUR"#)); + assert!(!output.contains(r#" 0.01"#)); + assert!(!output.contains(r#" USD"#)); + assert!(output.contains(r#" 10.00"#)); + assert!(output.contains(r#" AUD"#)); + + // Replace licence: error + test_work.license = Some("https://creativecommons.org/licenses/by/4.0/".to_string()); + let output = generate_test_output(false, &test_work); + assert_eq!( + output, + "Could not generate onix_2.1::ebsco_host: No unpriced PDF or EPUB URL (must be supplied for OA works)".to_string() + ); // Remove the EPUB publication's only location: error test_work.publications[0].locations.clear(); let output = generate_test_output(false, &test_work); assert_eq!( output, - "Could not generate onix_2.1::ebsco_host: No unpriced PDF or EPUB URL".to_string() + "Could not generate onix_2.1::ebsco_host: No PDF or EPUB URL".to_string() + ); + + // This occurs whether or not work is OA/priced + test_work.license = None; + test_work.publications[0].prices.clear(); + test_work.publications[1].prices.clear(); + assert_eq!( + output, + "Could not generate onix_2.1::ebsco_host: No PDF or EPUB URL".to_string() ); } } diff --git a/thoth-export-server/src/xml/onix21_proquest_ebrary.rs b/thoth-export-server/src/xml/onix21_proquest_ebrary.rs index 3c23d298..cb24b2e0 100644 --- a/thoth-export-server/src/xml/onix21_proquest_ebrary.rs +++ b/thoth-export-server/src/xml/onix21_proquest_ebrary.rs @@ -60,207 +60,216 @@ impl XmlSpecification for Onix21ProquestEbrary { impl XmlElementBlock for Work { fn xml_element(&self, w: &mut EventWriter) -> ThothResult<()> { - // ProQuest Ebrary can only accept PDFs and EPUBs, and can only - // process them as Open Access if they are unpriced - let pdf_url = self + // ProQuest Ebrary can only accept PDFs and EPUBs + let pdf_publication = self .publications .iter() - .find(|p| { - p.publication_type.eq(&PublicationType::PDF) - && !p.locations.is_empty() - // Thoth database only accepts non-zero prices - && p.prices.is_empty() - }) + .find(|p| p.publication_type.eq(&PublicationType::PDF) && !p.locations.is_empty()); + let pdf_url = pdf_publication .and_then(|p| p.locations.iter().find(|l| l.canonical)) .and_then(|l| l.full_text_url.as_ref()); - let epub_url = self + let epub_publication = self .publications .iter() - .find(|p| { - p.publication_type.eq(&PublicationType::EPUB) - && !p.locations.is_empty() - // Thoth database only accepts non-zero prices - && p.prices.is_empty() - }) + .find(|p| p.publication_type.eq(&PublicationType::EPUB) && !p.locations.is_empty()); + let epub_url = epub_publication .and_then(|p| p.locations.iter().find(|l| l.canonical)) .and_then(|l| l.full_text_url.as_ref()); - if pdf_url.is_some() || epub_url.is_some() { - let work_id = format!("urn:uuid:{}", self.work_id); - let (main_isbn, isbns) = get_publications_data(&self.publications); - write_element_block("Product", w, |w| { - write_element_block("RecordReference", w, |w| { - w.write(XmlEvent::Characters(&work_id)) - .map_err(|e| e.into()) - })?; - // 03 Notification confirmed on publication - write_element_block("NotificationType", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - })?; - // 01 Publisher - write_element_block("RecordSourceType", w, |w| { + if pdf_url.is_none() && epub_url.is_none() { + return Err(ThothError::IncompleteMetadataRecord( + ONIX_ERROR.to_string(), + "No PDF or EPUB URL".to_string(), + )); + } + // ProQuest Ebrary can only process works as Open Access if they are unpriced + let is_open_access = self.license.is_some(); + if is_open_access && + // Thoth database only accepts non-zero prices + !(pdf_publication.is_some_and(|p| p.prices.is_empty()) || + epub_publication.is_some_and(|p| p.prices.is_empty())) + { + return Err(ThothError::IncompleteMetadataRecord( + ONIX_ERROR.to_string(), + "No unpriced PDF or EPUB URL (must be supplied for OA works)".to_string(), + )); + } + let work_id = format!("urn:uuid:{}", self.work_id); + let (main_isbn, isbns) = get_publications_data(&self.publications); + write_element_block("Product", w, |w| { + write_element_block("RecordReference", w, |w| { + w.write(XmlEvent::Characters(&work_id)) + .map_err(|e| e.into()) + })?; + // 03 Notification confirmed on publication + write_element_block("NotificationType", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + })?; + // 01 Publisher + write_element_block("RecordSourceType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("ProductIdentifier", w, |w| { + // 01 Proprietary + write_element_block("ProductIDType", w, |w| { w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - write_element_block("ProductIdentifier", w, |w| { - // 01 Proprietary - write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&work_id)) - .map_err(|e| e.into()) - }) + write_element_block("IDValue", w, |w| { + w.write(XmlEvent::Characters(&work_id)) + .map_err(|e| e.into()) + }) + })?; + write_element_block("ProductIdentifier", w, |w| { + // 15 ISBN-13 + write_element_block("ProductIDType", w, |w| { + w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) })?; + write_element_block("IDValue", w, |w| { + w.write(XmlEvent::Characters(&main_isbn)) + .map_err(|e| e.into()) + }) + })?; + if let Some(doi) = &self.doi { write_element_block("ProductIdentifier", w, |w| { - // 15 ISBN-13 write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) + w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) })?; write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&main_isbn)) + w.write(XmlEvent::Characters(&doi.to_string())) .map_err(|e| e.into()) }) })?; - if let Some(doi) = &self.doi { - write_element_block("ProductIdentifier", w, |w| { - write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) - })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&doi.to_string())) - .map_err(|e| e.into()) - }) - })?; + } + // DG Electronic book text in proprietary or open standard format + write_element_block("ProductForm", w, |w| { + w.write(XmlEvent::Characters("DG")).map_err(|e| e.into()) + })?; + write_element_block("EpubType", w, |w| { + // 002 PDF + let mut epub_type = "002"; + // We definitely have either a PDF URL or an EPUB URL (or both) + if pdf_url.is_none() { + // 029 EPUB + epub_type = "029"; } - // DG Electronic book text in proprietary or open standard format - write_element_block("ProductForm", w, |w| { - w.write(XmlEvent::Characters("DG")).map_err(|e| e.into()) + w.write(XmlEvent::Characters(epub_type)) + .map_err(|e| e.into()) + })?; + for issue in &self.issues { + XmlElementBlock::::xml_element(issue, w).ok(); + } + write_element_block("Title", w, |w| { + // 01 Distinctive title (book) + write_element_block("TitleType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - write_element_block("EpubType", w, |w| { - // 002 PDF - let mut epub_type = "002"; - // We definitely have either a PDF URL or an EPUB URL (or both) - if pdf_url.is_none() { - // 029 EPUB - epub_type = "029"; - } - w.write(XmlEvent::Characters(epub_type)) + write_element_block("TitleText", w, |w| { + w.write(XmlEvent::Characters(&self.title)) .map_err(|e| e.into()) })?; - for issue in &self.issues { - XmlElementBlock::::xml_element(issue, w).ok(); - } - write_element_block("Title", w, |w| { - // 01 Distinctive title (book) - write_element_block("TitleType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - write_element_block("TitleText", w, |w| { - w.write(XmlEvent::Characters(&self.title)) + if let Some(subtitle) = &self.subtitle { + write_element_block("Subtitle", w, |w| { + w.write(XmlEvent::Characters(subtitle)) .map_err(|e| e.into()) })?; - if let Some(subtitle) = &self.subtitle { - write_element_block("Subtitle", w, |w| { - w.write(XmlEvent::Characters(subtitle)) - .map_err(|e| e.into()) - })?; - } - Ok(()) + } + Ok(()) + })?; + write_element_block("WorkIdentifier", w, |w| { + // 01 Proprietary + write_element_block("WorkIDType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - write_element_block("WorkIdentifier", w, |w| { - // 01 Proprietary - write_element_block("WorkIDType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - write_element_block("IDTypeName", w, |w| { - w.write(XmlEvent::Characters("Thoth WorkID")) + write_element_block("IDTypeName", w, |w| { + w.write(XmlEvent::Characters("Thoth WorkID")) + .map_err(|e| e.into()) + })?; + write_element_block("IDValue", w, |w| { + w.write(XmlEvent::Characters(&work_id)) + .map_err(|e| e.into()) + }) + })?; + let mut websites: HashMap = HashMap::new(); + if let Some(pdf) = pdf_url { + websites.insert( + pdf.to_string(), + ( + "29".to_string(), + "Publisher's website: download the title".to_string(), + ), + ); + } + if let Some(epub) = epub_url { + websites.insert( + epub.to_string(), + ( + "29".to_string(), + "Publisher's website: download the title".to_string(), + ), + ); + } + if let Some(landing_page) = &self.landing_page { + websites.insert( + landing_page.to_string(), + ( + "02".to_string(), + "Publisher's website: web shop".to_string(), + ), + ); + } + for (url, description) in websites.iter() { + write_element_block("Website", w, |w| { + write_element_block("WebsiteRole", w, |w| { + w.write(XmlEvent::Characters(&description.0)) .map_err(|e| e.into()) })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&work_id)) + write_element_block("WebsiteDescription", w, |w| { + w.write(XmlEvent::Characters(&description.1)) .map_err(|e| e.into()) + })?; + write_element_block("WebsiteLink", w, |w| { + w.write(XmlEvent::Characters(url)).map_err(|e| e.into()) }) })?; - let mut websites: HashMap = HashMap::new(); - if let Some(pdf) = pdf_url { - websites.insert( - pdf.to_string(), - ( - "29".to_string(), - "Publisher's website: download the title".to_string(), - ), - ); - } - if let Some(epub) = epub_url { - websites.insert( - epub.to_string(), - ( - "29".to_string(), - "Publisher's website: download the title".to_string(), - ), - ); - } - if let Some(landing_page) = &self.landing_page { - websites.insert( - landing_page.to_string(), - ( - "02".to_string(), - "Publisher's website: web shop".to_string(), - ), - ); - } - for (url, description) in websites.iter() { - write_element_block("Website", w, |w| { - write_element_block("WebsiteRole", w, |w| { - w.write(XmlEvent::Characters(&description.0)) - .map_err(|e| e.into()) - })?; - write_element_block("WebsiteDescription", w, |w| { - w.write(XmlEvent::Characters(&description.1)) - .map_err(|e| e.into()) - })?; - write_element_block("WebsiteLink", w, |w| { - w.write(XmlEvent::Characters(url)).map_err(|e| e.into()) - }) - })?; - } - for contribution in &self.contributions { - // A51 Research by is not supported in ONIX 2 - if contribution.contribution_type != ContributionType::RESEARCH_BY { - XmlElementBlock::::xml_element(contribution, w).ok(); - } - } - for language in &self.languages { - XmlElementBlock::::xml_element(language, w).ok(); + } + for contribution in &self.contributions { + // A51 Research by is not supported in ONIX 2 + if contribution.contribution_type != ContributionType::RESEARCH_BY { + XmlElementBlock::::xml_element(contribution, w).ok(); } - if let Some(page_count) = self.page_count { - write_element_block("Extent", w, |w| { - // 00 Main content - write_element_block("ExtentType", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("ExtentValue", w, |w| { - w.write(XmlEvent::Characters(&page_count.to_string())) - .map_err(|e| e.into()) - })?; - // 03 Pages - write_element_block("ExtentUnit", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - }) + } + for language in &self.languages { + XmlElementBlock::::xml_element(language, w).ok(); + } + if let Some(page_count) = self.page_count { + write_element_block("Extent", w, |w| { + // 00 Main content + write_element_block("ExtentType", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) })?; - } - for subject in &self.subjects { - XmlElementBlock::::xml_element(subject, w).ok(); - } - write_element_block("Audience", w, |w| { - // 01 ONIX audience codes - write_element_block("AudienceCodeType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + write_element_block("ExtentValue", w, |w| { + w.write(XmlEvent::Characters(&page_count.to_string())) + .map_err(|e| e.into()) })?; - // 06 Professional and scholarly - write_element_block("AudienceCodeValue", w, |w| { - w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) + // 03 Pages + write_element_block("ExtentUnit", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) }) })?; + } + for subject in &self.subjects { + XmlElementBlock::::xml_element(subject, w).ok(); + } + write_element_block("Audience", w, |w| { + // 01 ONIX audience codes + write_element_block("AudienceCodeType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + // 06 Professional and scholarly + write_element_block("AudienceCodeValue", w, |w| { + w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) + }) + })?; + if is_open_access { write_element_block("OtherText", w, |w| { // 47 Open access statement write_element_block("TextTypeCode", w, |w| { @@ -271,137 +280,139 @@ impl XmlElementBlock for Work { .map_err(|e| e.into()) }) })?; - if let Some(license) = &self.license { - write_element_block("OtherText", w, |w| { - // 46 License - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("46")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(license)).map_err(|e| e.into()) - }) + } + if let Some(license) = &self.license { + write_element_block("OtherText", w, |w| { + // 46 License + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("46")).map_err(|e| e.into()) })?; - } - if let Some(labstract) = &self.long_abstract { - write_element_block("OtherText", w, |w| { - // 03 Long description - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - })?; - // 06 Default text format - write_element_block("TextFormat", w, |w| { - w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(labstract)) - .map_err(|e| e.into()) - }) + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(license)).map_err(|e| e.into()) + }) + })?; + } + if let Some(labstract) = &self.long_abstract { + write_element_block("OtherText", w, |w| { + // 03 Long description + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) })?; - } - if let Some(cover_url) = &self.cover_url { - write_element_block("MediaFile", w, |w| { - // 04 Image: front cover - write_element_block("MediaFileTypeCode", w, |w| { - w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) - })?; - // 01 URL - write_element_block("MediaFileLinkTypeCode", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - write_element_block("MediaFileLink", w, |w| { - w.write(XmlEvent::Characters(cover_url)) - .map_err(|e| e.into()) - }) + // 06 Default text format + write_element_block("TextFormat", w, |w| { + w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) })?; - } - write_element_block("Imprint", w, |w| { - write_element_block("ImprintName", w, |w| { - w.write(XmlEvent::Characters(&self.imprint.imprint_name)) + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(labstract)) .map_err(|e| e.into()) }) })?; - write_element_block("Publisher", w, |w| { - // 01 Publisher - write_element_block("PublishingRole", w, |w| { + } + if let Some(cover_url) = &self.cover_url { + write_element_block("MediaFile", w, |w| { + // 04 Image: front cover + write_element_block("MediaFileTypeCode", w, |w| { + w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) + })?; + // 01 URL + write_element_block("MediaFileLinkTypeCode", w, |w| { w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - write_element_block("PublisherName", w, |w| { - w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) + write_element_block("MediaFileLink", w, |w| { + w.write(XmlEvent::Characters(cover_url)) .map_err(|e| e.into()) - })?; - if let Some(publisher_url) = &self.imprint.publisher.publisher_url { - write_element_block("Website", w, |w| { - write_element_block("WebsiteLink", w, |w| { - w.write(XmlEvent::Characters(publisher_url)) - .map_err(|e| e.into()) - }) - })?; - } - Ok(()) + }) })?; - if let Some(place) = &self.place { - write_element_block("CityOfPublication", w, |w| { - w.write(XmlEvent::Characters(place)).map_err(|e| e.into()) - })?; - } - XmlElement::::xml_element(&self.work_status, w)?; - if let Some(date) = self.publication_date { - write_element_block("PublicationDate", w, |w| { - w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) - .map_err(|e| e.into()) - })?; - write_element_block("CopyrightYear", w, |w| { - w.write(XmlEvent::Characters(&date.format("%Y").to_string())) - .map_err(|e| e.into()) + } + write_element_block("Imprint", w, |w| { + write_element_block("ImprintName", w, |w| { + w.write(XmlEvent::Characters(&self.imprint.imprint_name)) + .map_err(|e| e.into()) + }) + })?; + write_element_block("Publisher", w, |w| { + // 01 Publisher + write_element_block("PublishingRole", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("PublisherName", w, |w| { + w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) + .map_err(|e| e.into()) + })?; + if let Some(publisher_url) = &self.imprint.publisher.publisher_url { + write_element_block("Website", w, |w| { + write_element_block("WebsiteLink", w, |w| { + w.write(XmlEvent::Characters(publisher_url)) + .map_err(|e| e.into()) + }) })?; } - write_element_block("SalesRights", w, |w| { - // 02 For sale with non-exclusive rights in the specified countries or territories - write_element_block("SalesRightsType", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) - })?; - write_element_block("RightsTerritory", w, |w| { - w.write(XmlEvent::Characters("WORLD")).map_err(|e| e.into()) - }) + Ok(()) + })?; + if let Some(place) = &self.place { + write_element_block("CityOfPublication", w, |w| { + w.write(XmlEvent::Characters(place)).map_err(|e| e.into()) + })?; + } + XmlElement::::xml_element(&self.work_status, w)?; + if let Some(date) = self.publication_date { + write_element_block("PublicationDate", w, |w| { + w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) + .map_err(|e| e.into()) + })?; + write_element_block("CopyrightYear", w, |w| { + w.write(XmlEvent::Characters(&date.format("%Y").to_string())) + .map_err(|e| e.into()) + })?; + } + write_element_block("SalesRights", w, |w| { + // 02 For sale with non-exclusive rights in the specified countries or territories + write_element_block("SalesRightsType", w, |w| { + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) })?; - if !isbns.is_empty() { - for isbn in &isbns { - write_element_block("RelatedProduct", w, |w| { - // 06 Alternative format - write_element_block("RelationCode", w, |w| { - w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) + write_element_block("RightsTerritory", w, |w| { + w.write(XmlEvent::Characters("WORLD")).map_err(|e| e.into()) + }) + })?; + if !isbns.is_empty() { + for isbn in &isbns { + write_element_block("RelatedProduct", w, |w| { + // 06 Alternative format + write_element_block("RelationCode", w, |w| { + w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) + })?; + write_element_block("ProductIdentifier", w, |w| { + // 15 ISBN-13 + write_element_block("ProductIDType", w, |w| { + w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) })?; - write_element_block("ProductIdentifier", w, |w| { - // 15 ISBN-13 - write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) - })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(isbn)).map_err(|e| e.into()) - }) + write_element_block("IDValue", w, |w| { + w.write(XmlEvent::Characters(isbn)).map_err(|e| e.into()) }) - })?; - } - } - if let Some(date) = self.withdrawn_date { - write_element_block("OutofPrintDate", w, |w| { - w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) - .map_err(|e| e.into()) + }) })?; } - write_element_block("SupplyDetail", w, |w| { - write_element_block("SupplierName", w, |w| { - w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) - .map_err(|e| e.into()) - })?; - // 09 Publisher to end-customers - write_element_block("SupplierRole", w, |w| { - w.write(XmlEvent::Characters("09")).map_err(|e| e.into()) - })?; - // 99 Contact supplier - write_element_block("ProductAvailability", w, |w| { - w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) - })?; + } + if let Some(date) = self.withdrawn_date { + write_element_block("OutofPrintDate", w, |w| { + w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) + .map_err(|e| e.into()) + })?; + } + write_element_block("SupplyDetail", w, |w| { + write_element_block("SupplierName", w, |w| { + w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) + .map_err(|e| e.into()) + })?; + // 09 Publisher to end-customers + write_element_block("SupplierRole", w, |w| { + w.write(XmlEvent::Characters("09")).map_err(|e| e.into()) + })?; + // 99 Contact supplier + write_element_block("ProductAvailability", w, |w| { + w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) + })?; + if is_open_access { // R Restrictions apply, see note write_element_block("AudienceRestrictionFlag", w, |w| { w.write(XmlEvent::Characters("R")).map_err(|e| e.into()) @@ -410,18 +421,39 @@ impl XmlElementBlock for Work { w.write(XmlEvent::Characters("Open access")) .map_err(|e| e.into()) })?; - // ProQuest Ebrary require Open Access titles to be listed as 01 Free of charge + } + let publication = match pdf_url.is_some() { + true => pdf_publication, + false => epub_publication, + }; + let prices = publication.map(|p| p.prices.clone()).unwrap_or_default(); + if is_open_access || prices.is_empty() { write_element_block("UnpricedItemType", w, |w| { w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) }) - }) + } else { + for price in prices { + let unit_price = price.unit_price; + let formatted_price = format!("{unit_price:.2}"); + write_element_block("Price", w, |w| { + // 02 RRP including tax + write_element_block("PriceTypeCode", w, |w| { + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) + })?; + write_element_block("PriceAmount", w, |w| { + w.write(XmlEvent::Characters(&formatted_price)) + .map_err(|e| e.into()) + })?; + write_element_block("CurrencyCode", w, |w| { + w.write(XmlEvent::Characters(&price.currency_code.to_string())) + .map_err(|e| e.into()) + }) + })?; + } + Ok(()) + } }) - } else { - Err(ThothError::IncompleteMetadataRecord( - ONIX_ERROR.to_string(), - "No unpriced PDF or EPUB URL".to_string(), - )) - } + }) } } @@ -939,7 +971,16 @@ mod tests { depth_in: None, weight_g: None, weight_oz: None, - prices: vec![], + prices: vec![ + WorkPublicationsPrices { + currency_code: CurrencyCode::GBP, + unit_price: 5.99, + }, + WorkPublicationsPrices { + currency_code: CurrencyCode::EUR, + unit_price: 7.99, + }, + ], locations: vec![WorkPublicationsLocations { landing_page: Some("https://www.book.com/pdf_landing".to_string()), full_text_url: Some("https://www.book.com/pdf_fulltext".to_string()), @@ -1088,17 +1129,11 @@ mod tests { test_work.long_abstract = None; test_work.place = None; test_work.publication_date = None; - test_work.license = None; test_work.landing_page = None; test_work.cover_url = None; test_work.imprint.publisher.publisher_url = None; // Remove third (paperback) publication test_work.publications.pop(); - // Give PDF publication a positive price point - test_work.publications[1].prices = vec![WorkPublicationsPrices { - currency_code: CurrencyCode::USD, - unit_price: 7.99, - }]; let output = generate_test_output(true, &test_work); // Paperback publication removed, so its ISBN no longer appears // (either as the main ISBN or in RelatedProducts) @@ -1116,12 +1151,6 @@ mod tests { r#" Publisher's website: web shop"# )); assert!(!output.contains(r#" https://www.book.com"#)); - // PDF publication is no longer unpriced, hence no PDF URL, and EpubType changes - assert!( - !output.contains(r#" https://www.book.com/pdf_fulltext"#) - ); - assert!(!output.contains(r#" 002"#)); - assert!(output.contains(r#" 029"#)); // No page count supplied assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" 00"#)); @@ -1131,11 +1160,6 @@ mod tests { assert!(!output.contains(r#" 03"#)); assert!(!output.contains(r#" 06"#)); assert!(!output.contains(r#" Lorem ipsum dolor sit amet"#)); - // No licence supplied - assert!(!output.contains(r#" 46"#)); - assert!( - !output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#) - ); // No cover URL supplied assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" 04"#)); @@ -1151,13 +1175,80 @@ mod tests { // No publication date supplied assert!(!output.contains(r#" 19991231"#)); assert!(!output.contains(r#" 1999"#)); + // No licence supplied: assume non-OA, output real PDF prices + assert!(!output.contains(r#" 47"#)); + assert!(!output.contains(r#" Open access - no commercial use"#)); + assert!(!output.contains(r#" 46"#)); + assert!( + !output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#) + ); + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" R"#)); + assert!(!output + .contains(r#" Open access"#)); + assert!(!output.contains(r#" 01"#)); + assert!(output.contains(r#" "#)); + assert!(output.contains(r#" 02"#)); + assert!(output.contains(r#" 5.99"#)); + assert!(output.contains(r#" GBP"#)); + assert!(output.contains(r#" 7.99"#)); + assert!(output.contains(r#" EUR"#)); + + // Remove PDF location + test_work.publications[1].locations.clear(); + let output = generate_test_output(true, &test_work); + // PDF no longer has a URL, so EpubType changes, and EPUB price (unpriced) is output + assert!( + !output.contains(r#" https://www.book.com/pdf_fulltext"#) + ); + assert!(!output.contains(r#" 002"#)); + assert!(output.contains(r#" 029"#)); + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" 02"#)); + assert!(!output.contains(r#" 5.99"#)); + assert!(!output.contains(r#" GBP"#)); + assert!(output.contains(r#" 01"#)); + + // Give EPUB a price + test_work.publications[0].prices = vec![WorkPublicationsPrices { + currency_code: CurrencyCode::AUD, + unit_price: 10.00, + }]; + let output = generate_test_output(true, &test_work); + assert!(!output.contains(r#" 01"#)); + assert!(!output.contains(r#" 5.99"#)); + assert!(!output.contains(r#" GBP"#)); + assert!(!output.contains(r#" 7.99"#)); + assert!(!output.contains(r#" EUR"#)); + assert!(output.contains(r#" "#)); + assert!(output.contains(r#" 02"#)); + assert!(output.contains(r#" 10.00"#)); + assert!(output.contains(r#" AUD"#)); + + // Replace licence: error + test_work.license = Some("https://creativecommons.org/licenses/by/4.0/".to_string()); + let output = generate_test_output(false, &test_work); + assert_eq!( + output, + "Could not generate onix_2.1::proquest_ebrary: No unpriced PDF or EPUB URL (must be supplied for OA works)".to_string() + ); // Remove the EPUB publication's only location: error test_work.publications[0].locations.clear(); let output = generate_test_output(false, &test_work); assert_eq!( output, - "Could not generate onix_2.1::proquest_ebrary: No unpriced PDF or EPUB URL".to_string() + "Could not generate onix_2.1::proquest_ebrary: No PDF or EPUB URL".to_string() + ); + + // This occurs whether or not work is OA/priced + test_work.license = None; + test_work.publications[0].prices.clear(); + test_work.publications[1].prices.clear(); + let output = generate_test_output(false, &test_work); + assert_eq!( + output, + "Could not generate onix_2.1::proquest_ebrary: No PDF or EPUB URL".to_string() ); } } diff --git a/thoth-export-server/src/xml/onix3_google_books.rs b/thoth-export-server/src/xml/onix3_google_books.rs index a02e1fb4..f62af98b 100644 --- a/thoth-export-server/src/xml/onix3_google_books.rs +++ b/thoth-export-server/src/xml/onix3_google_books.rs @@ -68,7 +68,6 @@ impl XmlElementBlock for Work { fn xml_element(&self, w: &mut EventWriter) -> ThothResult<()> { // Don't output works with no BIC, BISAC or LCC subject code // Google Books can only ingest works which have at least one - if !self.subjects.iter().any(|s| { matches!( s.subject_type, diff --git a/thoth-export-server/src/xml/onix3_jstor.rs b/thoth-export-server/src/xml/onix3_jstor.rs index 67a4b581..6d54f2bb 100644 --- a/thoth-export-server/src/xml/onix3_jstor.rs +++ b/thoth-export-server/src/xml/onix3_jstor.rs @@ -81,6 +81,7 @@ impl XmlElementBlock for Work { .and_then(|l| l.full_text_url.as_ref()) { let work_id = format!("urn:uuid:{}", self.work_id); + let is_open_access = self.license.is_some(); let (main_isbn, print_isbn) = get_publications_data(&self.publications); write_element_block("Product", w, |w| { write_element_block("RecordReference", w, |w| { @@ -215,58 +216,63 @@ impl XmlElementBlock for Work { } Ok(()) })?; - write_element_block("CollateralDetail", w, |w| { - if let Some(labstract) = &self.long_abstract { - write_element_block("TextContent", w, |w| { - // 03 Description ("30 Abstract" not implemented in OAPEN) - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + if self.long_abstract.is_some() || self.toc.is_some() || is_open_access { + write_element_block("CollateralDetail", w, |w| { + if let Some(labstract) = &self.long_abstract { + write_element_block("TextContent", w, |w| { + // 03 Description ("30 Abstract" not implemented in OAPEN) + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_full_element_block( + "Text", + Some(vec![("language", "eng")]), + w, + |w| { + w.write(XmlEvent::Characters(labstract)) + .map_err(|e| e.into()) + }, + ) })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + } + if let Some(toc) = &self.toc { + write_element_block("TextContent", w, |w| { + // 04 Table of contents + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) + }) })?; - write_full_element_block( - "Text", - Some(vec![("language", "eng")]), - w, - |w| { - w.write(XmlEvent::Characters(labstract)) + } + if is_open_access { + write_element_block("TextContent", w, |w| { + // 20 Open access statement + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("20")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters("Open Access")) .map_err(|e| e.into()) - }, - ) - })?; - } - if let Some(toc) = &self.toc { - write_element_block("TextContent", w, |w| { - // 04 Table of contents - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + }) })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) - }) - })?; - } - write_element_block("TextContent", w, |w| { - // 20 Open access statement - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("20")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters("Open Access")) - .map_err(|e| e.into()) - }) - }) - })?; + } + Ok(()) + })?; + } write_element_block("PublishingDetail", w, |w| { write_element_block("Imprint", w, |w| { write_element_block("ImprintName", w, |w| { @@ -402,10 +408,47 @@ impl XmlElementBlock for Work { write_element_block("ProductAvailability", w, |w| { w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) })?; - // 01 Free of charge - write_element_block("UnpricedItemType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - }) + let prices = self + .publications + .iter() + .find(|p| p.publication_type.eq(&PublicationType::PDF)) + .map(|p| p.prices.clone()) + .unwrap_or_default(); + if is_open_access || prices.is_empty() { + // 01 Free of charge + write_element_block("UnpricedItemType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + }) + } else { + for price in prices { + let unit_price = price.unit_price; + let formatted_price = format!("{unit_price:.2}"); + write_element_block("Price", w, |w| { + // 02 RRP including tax + write_element_block("PriceType", w, |w| { + w.write(XmlEvent::Characters("02")) + .map_err(|e| e.into()) + })?; + write_element_block("PriceAmount", w, |w| { + w.write(XmlEvent::Characters(&formatted_price)) + .map_err(|e| e.into()) + })?; + write_element_block("CurrencyCode", w, |w| { + w.write(XmlEvent::Characters( + &price.currency_code.to_string(), + )) + .map_err(|e| e.into()) + })?; + write_element_block("Territory", w, |w| { + write_element_block("RegionsIncluded", w, |w| { + w.write(XmlEvent::Characters("WORLD")) + .map_err(|e| e.into()) + }) + }) + })?; + } + Ok(()) + } })?; } Ok(()) @@ -581,9 +624,10 @@ mod tests { use thoth_api::model::Isbn; use thoth_api::model::Orcid; use thoth_client::{ - ContributionType, LanguageCode, LanguageRelation, LocationPlatform, PublicationType, - WorkContributionsContributor, WorkFundings, WorkImprint, WorkImprintPublisher, WorkIssues, - WorkIssuesSeries, WorkPublicationsLocations, WorkStatus, WorkSubjects, WorkType, + ContributionType, CurrencyCode, LanguageCode, LanguageRelation, LocationPlatform, + PublicationType, WorkContributionsContributor, WorkFundings, WorkImprint, + WorkImprintPublisher, WorkIssues, WorkIssuesSeries, WorkPublicationsLocations, + WorkPublicationsPrices, WorkStatus, WorkSubjects, WorkType, }; use uuid::Uuid; @@ -807,7 +851,16 @@ mod tests { depth_in: None, weight_g: None, weight_oz: None, - prices: vec![], + prices: vec![ + WorkPublicationsPrices { + currency_code: CurrencyCode::EUR, + unit_price: 5.95, + }, + WorkPublicationsPrices { + currency_code: CurrencyCode::GBP, + unit_price: 4.95, + }, + ], locations: vec![WorkPublicationsLocations { landing_page: Some("https://www.book.com/pdf_landing".to_string()), full_text_url: Some("https://www.book.com/pdf_fulltext".to_string()), @@ -1009,6 +1062,32 @@ mod tests { assert!(!output .contains(r#" 02"#)); assert!(!output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#)); + // Absence of licence means we assume non-OA + assert!(!output.contains(r#" 20"#)); + assert!(!output.contains(r#" Open Access"#)); + assert!(!output.contains(r#" 01"#)); + assert!(output.contains( + r#" + + 02 + 5.95 + EUR + + WORLD + + "# + )); + assert!(output.contains( + r#" + + 02 + 4.95 + GBP + + WORLD + + "# + )); // No subtitle supplied (within Thoth UI this would automatically update full_title) assert!(!output.contains(r#" Book Subtitle"#)); // No page count supplied @@ -1022,10 +1101,10 @@ mod tests { // No TOC supplied assert!(!output.contains(r#" 04"#)); assert!(!output.contains(r#" 1. Chapter 1"#)); - // CollateralDetail block is still present as it always contains Open Access statement - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" 00"#)); + // No items left to go in CollateralDetail block so it's omitted + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" 00"#)); // No place supplied assert!(!output.contains(r#" León, Spain"#)); // No publication date supplied @@ -1045,6 +1124,33 @@ mod tests { assert!(!output.contains(r#" 15"#)); assert!(!output.contains(r#" 9781402894626"#)); + // Remove PDF prices but keep book "non-OA" (no licence) + test_work.publications[1].prices.clear(); + let output = generate_test_output(true, &test_work); + assert!(output.contains(r#" 01"#)); + assert!(!output.contains( + r#" + + 02 + 5.95 + EUR + + WORLD + + "# + )); + assert!(!output.contains( + r#" + + 02 + 4.95 + GBP + + WORLD + + "# + )); + // Add withdrawn date test_work.withdrawn_date = chrono::NaiveDate::from_ymd_opt(2020, 12, 31); let output = generate_test_output(true, &test_work); diff --git a/thoth-export-server/src/xml/onix3_oapen.rs b/thoth-export-server/src/xml/onix3_oapen.rs index 16174d22..f484868f 100644 --- a/thoth-export-server/src/xml/onix3_oapen.rs +++ b/thoth-export-server/src/xml/onix3_oapen.rs @@ -60,6 +60,13 @@ impl XmlSpecification for Onix3Oapen { impl XmlElementBlock for Work { fn xml_element(&self, w: &mut EventWriter) -> ThothResult<()> { + // Don't output works with no licence, as we assume these are non-OA + if self.license.is_none() { + return Err(ThothError::IncompleteMetadataRecord( + ONIX_ERROR.to_string(), + "Missing License".to_string(), + )); + } // We can only generate the document if there's a PDF if let Some(pdf_url) = self .publications @@ -131,22 +138,21 @@ impl XmlElementBlock for Work { write_element_block("PrimaryContentType", w, |w| { w.write(XmlEvent::Characters("10")).map_err(|e| e.into()) })?; - if let Some(license) = &self.license { - write_element_block("EpubLicense", w, |w| { - write_element_block("EpubLicenseName", w, |w| { - w.write(XmlEvent::Characters("Creative Commons License")) - .map_err(|e| e.into()) + write_element_block("EpubLicense", w, |w| { + write_element_block("EpubLicenseName", w, |w| { + w.write(XmlEvent::Characters("Creative Commons License")) + .map_err(|e| e.into()) + })?; + write_element_block("EpubLicenseExpression", w, |w| { + write_element_block("EpubLicenseExpressionType", w, |w| { + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) })?; - write_element_block("EpubLicenseExpression", w, |w| { - write_element_block("EpubLicenseExpressionType", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) - })?; - write_element_block("EpubLicenseExpressionLink", w, |w| { - w.write(XmlEvent::Characters(license)).map_err(|e| e.into()) - }) + write_element_block("EpubLicenseExpressionLink", w, |w| { + w.write(XmlEvent::Characters(self.license.as_ref().unwrap())) + .map_err(|e| e.into()) }) - })?; - } + }) + })?; for issue in &self.issues { XmlElementBlock::::xml_element(issue, w).ok(); } @@ -1141,7 +1147,6 @@ mod tests { // Remove some values to test non-output of optional blocks test_work.doi = None; - test_work.license = None; test_work.subtitle = None; test_work.page_count = None; test_work.long_abstract = None; @@ -1152,14 +1157,6 @@ mod tests { // No DOI supplied assert!(!output.contains(r#" 06"#)); assert!(!output.contains(r#" 10.00001/BOOK.0001"#)); - // No licence supplied - assert!(!output.contains(r#" "#)); - assert!(!output - .contains(r#" Creative Commons License"#)); - assert!(!output.contains(r#" "#)); - assert!(!output - .contains(r#" 02"#)); - assert!(!output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#)); // No subtitle supplied (within Thoth UI this would automatically update full_title) assert!(!output.contains(r#" Book Subtitle"#)); // No page count supplied @@ -1241,8 +1238,17 @@ mod tests { assert!(!output .contains(r#" "https://www.book.com/cover""#)); - // Remove the only publication, which is the PDF + // Remove licence. Result: error + test_work.license = None; + let output = generate_test_output(false, &test_work); + assert_eq!( + output, + "Could not generate onix_3.0::oapen: Missing License".to_string() + ); + + // Replace licence, but remove the only publication, which is the PDF // Result: error (can't generate OAPEN ONIX without PDF URL) + test_work.license = Some("https://creativecommons.org/licenses/by/4.0/".to_string()); test_work.publications.clear(); let output = generate_test_output(false, &test_work); assert_eq!( diff --git a/thoth-export-server/src/xml/onix3_project_muse.rs b/thoth-export-server/src/xml/onix3_project_muse.rs index 365a75b3..29aec17f 100644 --- a/thoth-export-server/src/xml/onix3_project_muse.rs +++ b/thoth-export-server/src/xml/onix3_project_muse.rs @@ -80,6 +80,7 @@ impl XmlElementBlock for Work { .and_then(|l| l.full_text_url.as_ref()) { let work_id = format!("urn:uuid:{}", self.work_id); + let is_open_access = self.license.is_some(); let (main_isbn, isbns) = get_publications_data(&self.publications); write_element_block("Product", w, |w| { write_element_block("RecordReference", w, |w| { @@ -256,58 +257,63 @@ impl XmlElementBlock for Work { })?; Ok(()) })?; - write_element_block("CollateralDetail", w, |w| { - if let Some(labstract) = &self.long_abstract { - write_element_block("TextContent", w, |w| { - // 03 Description ("30 Abstract" not implemented in OAPEN) - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + if self.long_abstract.is_some() || self.toc.is_some() || is_open_access { + write_element_block("CollateralDetail", w, |w| { + if let Some(labstract) = &self.long_abstract { + write_element_block("TextContent", w, |w| { + // 03 Description ("30 Abstract" not implemented in OAPEN) + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_full_element_block( + "Text", + Some(vec![("language", "eng")]), + w, + |w| { + w.write(XmlEvent::Characters(labstract)) + .map_err(|e| e.into()) + }, + ) })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + } + if let Some(toc) = &self.toc { + write_element_block("TextContent", w, |w| { + // 04 Table of contents + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) + }) })?; - write_full_element_block( - "Text", - Some(vec![("language", "eng")]), - w, - |w| { - w.write(XmlEvent::Characters(labstract)) + } + if is_open_access { + write_element_block("TextContent", w, |w| { + // 20 Open access statement + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("20")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters("Open Access")) .map_err(|e| e.into()) - }, - ) - })?; - } - if let Some(toc) = &self.toc { - write_element_block("TextContent", w, |w| { - // 04 Table of contents - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + }) })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) - }) - })?; - } - write_element_block("TextContent", w, |w| { - // 20 Open access statement - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("20")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters("Open Access")) - .map_err(|e| e.into()) - }) - }) - })?; + } + Ok(()) + })?; + } write_element_block("PublishingDetail", w, |w| { write_element_block("Imprint", w, |w| { write_element_block("ImprintName", w, |w| { @@ -453,10 +459,47 @@ impl XmlElementBlock for Work { write_element_block("ProductAvailability", w, |w| { w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) })?; - // 01 Free of charge - write_element_block("UnpricedItemType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - }) + let prices = self + .publications + .iter() + .find(|p| p.publication_type.eq(&PublicationType::PDF)) + .map(|p| p.prices.clone()) + .unwrap_or_default(); + if is_open_access || prices.is_empty() { + // 01 Free of charge + write_element_block("UnpricedItemType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + }) + } else { + for price in prices { + let unit_price = price.unit_price; + let formatted_price = format!("{unit_price:.2}"); + write_element_block("Price", w, |w| { + // 02 RRP including tax + write_element_block("PriceType", w, |w| { + w.write(XmlEvent::Characters("02")) + .map_err(|e| e.into()) + })?; + write_element_block("PriceAmount", w, |w| { + w.write(XmlEvent::Characters(&formatted_price)) + .map_err(|e| e.into()) + })?; + write_element_block("CurrencyCode", w, |w| { + w.write(XmlEvent::Characters( + &price.currency_code.to_string(), + )) + .map_err(|e| e.into()) + })?; + write_element_block("Territory", w, |w| { + write_element_block("RegionsIncluded", w, |w| { + w.write(XmlEvent::Characters("WORLD")) + .map_err(|e| e.into()) + }) + }) + })?; + } + Ok(()) + } })?; } Ok(()) @@ -675,9 +718,10 @@ mod tests { use thoth_api::model::Isbn; use thoth_api::model::Orcid; use thoth_client::{ - ContributionType, LanguageCode, LanguageRelation, LocationPlatform, PublicationType, - WorkContributionsContributor, WorkFundings, WorkImprint, WorkImprintPublisher, WorkIssues, - WorkIssuesSeries, WorkPublicationsLocations, WorkStatus, WorkSubjects, WorkType, + ContributionType, CurrencyCode, LanguageCode, LanguageRelation, LocationPlatform, + PublicationType, WorkContributionsContributor, WorkFundings, WorkImprint, + WorkImprintPublisher, WorkIssues, WorkIssuesSeries, WorkPublicationsLocations, + WorkPublicationsPrices, WorkStatus, WorkSubjects, WorkType, }; use uuid::Uuid; @@ -897,7 +941,16 @@ mod tests { depth_in: None, weight_g: None, weight_oz: None, - prices: vec![], + prices: vec![ + WorkPublicationsPrices { + currency_code: CurrencyCode::EUR, + unit_price: 5.95, + }, + WorkPublicationsPrices { + currency_code: CurrencyCode::GBP, + unit_price: 4.95, + }, + ], locations: vec![WorkPublicationsLocations { landing_page: Some("https://www.book.com/pdf_landing".to_string()), full_text_url: Some("https://www.book.com/pdf_fulltext".to_string()), @@ -1115,6 +1168,32 @@ mod tests { assert!(!output .contains(r#" 02"#)); assert!(!output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#)); + // Absence of licence means we assume non-OA + assert!(!output.contains(r#" 20"#)); + assert!(!output.contains(r#" Open Access"#)); + assert!(!output.contains(r#" 01"#)); + assert!(output.contains( + r#" + + 02 + 5.95 + EUR + + WORLD + + "# + )); + assert!(output.contains( + r#" + + 02 + 4.95 + GBP + + WORLD + + "# + )); // No subtitle supplied (within Thoth UI this would automatically update full_title) assert!(!output.contains(r#" Book Subtitle"#)); // No page count supplied @@ -1128,10 +1207,10 @@ mod tests { // No TOC supplied assert!(!output.contains(r#" 04"#)); assert!(!output.contains(r#" 1. Chapter 1"#)); - // CollateralDetail block is still present as it always contains Open Access statement - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" 00"#)); + // No items left to go in CollateralDetail block so it's omitted + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" 00"#)); // No place supplied assert!(!output.contains(r#" León, Spain"#)); // No publication date supplied @@ -1154,6 +1233,33 @@ mod tests { assert!(!output.contains(r#" B2"#)); assert!(!output.contains(r#" custom1"#)); + // Remove PDF prices but keep book "non-OA" (no licence) + test_work.publications[0].prices.clear(); + let output = generate_test_output(true, &test_work); + assert!(output.contains(r#" 01"#)); + assert!(!output.contains( + r#" + + 02 + 5.95 + EUR + + WORLD + + "# + )); + assert!(!output.contains( + r#" + + 02 + 4.95 + GBP + + WORLD + + "# + )); + // Remove the only remaining (BIC) subject // Result: error (can't generate Project MUSE ONIX without either a BIC or BISAC subject) test_work.subjects.clear(); diff --git a/thoth-export-server/src/xml/onix3_thoth.rs b/thoth-export-server/src/xml/onix3_thoth.rs index 0cc0b941..10c74de1 100644 --- a/thoth-export-server/src/xml/onix3_thoth.rs +++ b/thoth-export-server/src/xml/onix3_thoth.rs @@ -78,6 +78,7 @@ impl XmlElementBlock for Work { )); } let work_id = format!("urn:uuid:{}", self.work_id); + let is_open_access = self.license.is_some(); let isbns: Vec = self .publications .iter() @@ -420,136 +421,152 @@ impl XmlElementBlock for Work { }) }) })?; - write_element_block("CollateralDetail", w, |w| { - if let Some(mut short_abstract) = self.short_abstract.clone() { - // Short description field may not exceed 350 characters. - // Ensure that the string is truncated at a valid UTF-8 boundary - // by finding the byte index of the 350th character and then truncating - // the string at that index, to avoid creating invalid UTF-8 sequences. - if let Some((byte_index, _)) = short_abstract.char_indices().nth(350) { - short_abstract.truncate(byte_index); - } - write_element_block("TextContent", w, |w| { - // 02 Short description - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(&short_abstract)) - .map_err(|e| e.into()) - }) - })?; - } - if let Some(long_abstract) = &self.long_abstract { - // 03 Description, 30 Abstract - for text_type in ["03", "30"] { + if self.short_abstract.is_some() + || self.long_abstract.is_some() + || self.toc.is_some() + || self.general_note.is_some() + || self.cover_url.is_some() + || is_open_access + { + write_element_block("CollateralDetail", w, |w| { + if let Some(mut short_abstract) = self.short_abstract.clone() { + // Short description field may not exceed 350 characters. + // Ensure that the string is truncated at a valid UTF-8 boundary + // by finding the byte index of the 350th character and then truncating + // the string at that index, to avoid creating invalid UTF-8 sequences. + if let Some((byte_index, _)) = short_abstract.char_indices().nth(350) { + short_abstract.truncate(byte_index); + } write_element_block("TextContent", w, |w| { + // 02 Short description write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters(text_type)) - .map_err(|e| e.into()) + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) })?; // 00 Unrestricted write_element_block("ContentAudience", w, |w| { w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) })?; write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(long_abstract)) + w.write(XmlEvent::Characters(&short_abstract)) .map_err(|e| e.into()) }) })?; } - } - if let Some(toc) = &self.toc { - write_element_block("TextContent", w, |w| { - // 04 Table of contents - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) - }) - })?; - } - write_element_block("TextContent", w, |w| { - // 20 Open access statement - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("20")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_full_element_block("Text", Some(vec![("language", "eng")]), w, |w| { - w.write(XmlEvent::Characters("Open Access")) - .map_err(|e| e.into()) - }) - })?; - if let Some(general_note) = &self.general_note { - write_element_block("TextContent", w, |w| { - // 13 Publisher's notice - // "A statement included by a publisher in fulfillment of contractual obligations" - // Used in many different ways - closest approximation - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("13")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(general_note)) - .map_err(|e| e.into()) - }) - })?; - } - if let Some(cover_url) = &self.cover_url { - write_element_block("SupportingResource", w, |w| { - // 01 Front cover - write_element_block("ResourceContentType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - // 03 Image - write_element_block("ResourceMode", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - })?; - if let Some(cover_caption) = &self.cover_caption { - write_element_block("ResourceFeature", w, |w| { - // 02 Caption - write_element_block("ResourceFeatureType", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) + if let Some(long_abstract) = &self.long_abstract { + // 03 Description, 30 Abstract + for text_type in ["03", "30"] { + write_element_block("TextContent", w, |w| { + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters(text_type)) + .map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) })?; - write_element_block("FeatureNote", w, |w| { - w.write(XmlEvent::Characters(cover_caption)) + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(long_abstract)) .map_err(|e| e.into()) }) })?; } - write_element_block("ResourceVersion", w, |w| { - // 02 Downloadable file - write_element_block("ResourceForm", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) + } + if let Some(toc) = &self.toc { + write_element_block("TextContent", w, |w| { + // 04 Table of contents + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) + }) + })?; + } + if is_open_access { + write_element_block("TextContent", w, |w| { + // 20 Open access statement + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("20")).map_err(|e| e.into()) })?; - write_element_block("ResourceLink", w, |w| { - w.write(XmlEvent::Characters(cover_url)) + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_full_element_block( + "Text", + Some(vec![("language", "eng")]), + w, + |w| { + w.write(XmlEvent::Characters("Open Access")) + .map_err(|e| e.into()) + }, + ) + })?; + } + if let Some(general_note) = &self.general_note { + write_element_block("TextContent", w, |w| { + // 13 Publisher's notice + // "A statement included by a publisher in fulfillment of contractual obligations" + // Used in many different ways - closest approximation + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("13")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(general_note)) .map_err(|e| e.into()) }) - }) - })?; - } - Ok(()) - })?; + })?; + } + if let Some(cover_url) = &self.cover_url { + write_element_block("SupportingResource", w, |w| { + // 01 Front cover + write_element_block("ResourceContentType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + // 03 Image + write_element_block("ResourceMode", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + })?; + if let Some(cover_caption) = &self.cover_caption { + write_element_block("ResourceFeature", w, |w| { + // 02 Caption + write_element_block("ResourceFeatureType", w, |w| { + w.write(XmlEvent::Characters("02")) + .map_err(|e| e.into()) + })?; + write_element_block("FeatureNote", w, |w| { + w.write(XmlEvent::Characters(cover_caption)) + .map_err(|e| e.into()) + }) + })?; + } + write_element_block("ResourceVersion", w, |w| { + // 02 Downloadable file + write_element_block("ResourceForm", w, |w| { + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) + })?; + write_element_block("ResourceLink", w, |w| { + w.write(XmlEvent::Characters(cover_url)) + .map_err(|e| e.into()) + }) + }) + })?; + } + Ok(()) + })?; + } let chapter_relations: Vec = self .relations .clone() @@ -3125,6 +3142,15 @@ mod tests { 13 00 This is a general note + "# + )); + // No licence means we assume the title is non-OA + assert!(!output.contains( + r#" + + 20 + 00 + Open Access "# )); // SupportingResource block still present but ResourceFeature absent @@ -3248,6 +3274,8 @@ mod tests { "# )); + // Test truncation of short abstract + test_work.short_abstract = Some("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum vel libero eleifend, ultrices purus vitae, suscipit ligula. Aliquam ornare quam et nulla vestibulum, id euismod tellus malesuada. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nullam ornare bibendum ex nec dapibus. Proin porta risus elementum odio feugiat tempus. Etiam eu felis ac metus viverra ornare. In consectetur neque sed feugiat ornare. Mauris at purus fringilla orci tincidunt pulvinar sed a massa. Nullam vestibulum posuere augue, sit amet tincidunt nisl pulvinar ac.".to_string()); // Remove even more values test_work.edition = None; test_work.table_count = None; @@ -3283,14 +3311,14 @@ mod tests { )); assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" "#)); - // No cover URL means no SupportingResource block - CollateralDetail only contains OA statement + // No cover URL means no SupportingResource block - CollateralDetail only contains short abstract assert!(output.contains( r#" - 20 + 02 00 - Open Access + Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum vel libero eleifend, ultrices purus vitae, suscipit ligula. Aliquam ornare quam et nulla vestibulum, id euismod tellus malesuada. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nullam ornare bibendum ex nec dapibus. Proin porta risus elementu "# )); @@ -3336,8 +3364,8 @@ mod tests { test_work.relations[0].related_work.doi = None; // Remove remaining related work DOI: can't output RelatedMaterial block test_work.relations[1].related_work.doi = None; - // Test truncation of short abstract - test_work.short_abstract = Some("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum vel libero eleifend, ultrices purus vitae, suscipit ligula. Aliquam ornare quam et nulla vestibulum, id euismod tellus malesuada. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nullam ornare bibendum ex nec dapibus. Proin porta risus elementum odio feugiat tempus. Etiam eu felis ac metus viverra ornare. In consectetur neque sed feugiat ornare. Mauris at purus fringilla orci tincidunt pulvinar sed a massa. Nullam vestibulum posuere augue, sit amet tincidunt nisl pulvinar ac.".to_string()); + // Remove short abstract: can't output CollateralDetail block + test_work.short_abstract = None; // Reinstate landing page: supplier block for publisher now contains it test_work.landing_page = Some("https://www.book.com".to_string()); let output = generate_test_output(true, &test_work); @@ -3345,14 +3373,7 @@ mod tests { assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" "#)); - assert!(output.contains( - r#" - - 02 - 00 - Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum vel libero eleifend, ultrices purus vitae, suscipit ligula. Aliquam ornare quam et nulla vestibulum, id euismod tellus malesuada. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nullam ornare bibendum ex nec dapibus. Proin porta risus elementu - "# - )); + assert!(!output.contains(r#" "#)); assert!(output.contains( r#"