From 2bb9ac869735f15ea8268abe40437876bf787fa9 Mon Sep 17 00:00:00 2001 From: rhigman <73792779+rhigman@users.noreply.github.com> Date: Mon, 16 Sep 2024 14:53:04 +0100 Subject: [PATCH 1/7] Restrict OA-specific output fields to works with licences: MARC --- .../src/marc21/marc21record_thoth.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/thoth-export-server/src/marc21/marc21record_thoth.rs b/thoth-export-server/src/marc21/marc21record_thoth.rs index 52d61e9dc..9765d65cd 100644 --- a/thoth-export-server/src/marc21/marc21record_thoth.rs +++ b/thoth-export-server/src/marc21/marc21record_thoth.rs @@ -319,12 +319,15 @@ impl Marc21Entry for Work { .and_then(|f| builder.add_field(f))?; } - // 506 - restrictions on access - FieldRepr::from((b"506", "0\\")) - .add_subfield(b"a", "Open Access") - .and_then(|f| f.add_subfield(b"f", "Unrestricted online access")) - .and_then(|f| f.add_subfield(b"2", "star")) - .and_then(|f| builder.add_field(f))?; + // Assume omission of licence means work is non-OA + if self.license.is_some() { + // 506 - restrictions on access + FieldRepr::from((b"506", "0\\")) + .add_subfield(b"a", "Open Access") + .and_then(|f| f.add_subfield(b"f", "Unrestricted online access")) + .and_then(|f| f.add_subfield(b"2", "star")) + .and_then(|f| builder.add_field(f))?; + } // 520 - abstract if let Some(mut long_abstract) = self.long_abstract.clone() { From 2ea9940f2029996ce3424c5e5a333e8372ef68cd Mon Sep 17 00:00:00 2001 From: rhigman <73792779+rhigman@users.noreply.github.com> Date: Mon, 16 Sep 2024 14:59:52 +0100 Subject: [PATCH 2/7] Restrict OA-specific output fields to works with licences: ONIX 3.0 --- thoth-export-server/src/xml/onix3_jstor.rs | 226 +++++++++++---- .../src/xml/onix3_project_muse.rs | 226 +++++++++++---- thoth-export-server/src/xml/onix3_thoth.rs | 267 ++++++++++-------- 3 files changed, 476 insertions(+), 243 deletions(-) diff --git a/thoth-export-server/src/xml/onix3_jstor.rs b/thoth-export-server/src/xml/onix3_jstor.rs index 67a4b5819..6d54f2bb1 100644 --- a/thoth-export-server/src/xml/onix3_jstor.rs +++ b/thoth-export-server/src/xml/onix3_jstor.rs @@ -81,6 +81,7 @@ impl XmlElementBlock for Work { .and_then(|l| l.full_text_url.as_ref()) { let work_id = format!("urn:uuid:{}", self.work_id); + let is_open_access = self.license.is_some(); let (main_isbn, print_isbn) = get_publications_data(&self.publications); write_element_block("Product", w, |w| { write_element_block("RecordReference", w, |w| { @@ -215,58 +216,63 @@ impl XmlElementBlock for Work { } Ok(()) })?; - write_element_block("CollateralDetail", w, |w| { - if let Some(labstract) = &self.long_abstract { - write_element_block("TextContent", w, |w| { - // 03 Description ("30 Abstract" not implemented in OAPEN) - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + if self.long_abstract.is_some() || self.toc.is_some() || is_open_access { + write_element_block("CollateralDetail", w, |w| { + if let Some(labstract) = &self.long_abstract { + write_element_block("TextContent", w, |w| { + // 03 Description ("30 Abstract" not implemented in OAPEN) + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_full_element_block( + "Text", + Some(vec![("language", "eng")]), + w, + |w| { + w.write(XmlEvent::Characters(labstract)) + .map_err(|e| e.into()) + }, + ) })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + } + if let Some(toc) = &self.toc { + write_element_block("TextContent", w, |w| { + // 04 Table of contents + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) + }) })?; - write_full_element_block( - "Text", - Some(vec![("language", "eng")]), - w, - |w| { - w.write(XmlEvent::Characters(labstract)) + } + if is_open_access { + write_element_block("TextContent", w, |w| { + // 20 Open access statement + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("20")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters("Open Access")) .map_err(|e| e.into()) - }, - ) - })?; - } - if let Some(toc) = &self.toc { - write_element_block("TextContent", w, |w| { - // 04 Table of contents - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + }) })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) - }) - })?; - } - write_element_block("TextContent", w, |w| { - // 20 Open access statement - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("20")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters("Open Access")) - .map_err(|e| e.into()) - }) - }) - })?; + } + Ok(()) + })?; + } write_element_block("PublishingDetail", w, |w| { write_element_block("Imprint", w, |w| { write_element_block("ImprintName", w, |w| { @@ -402,10 +408,47 @@ impl XmlElementBlock for Work { write_element_block("ProductAvailability", w, |w| { w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) })?; - // 01 Free of charge - write_element_block("UnpricedItemType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - }) + let prices = self + .publications + .iter() + .find(|p| p.publication_type.eq(&PublicationType::PDF)) + .map(|p| p.prices.clone()) + .unwrap_or_default(); + if is_open_access || prices.is_empty() { + // 01 Free of charge + write_element_block("UnpricedItemType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + }) + } else { + for price in prices { + let unit_price = price.unit_price; + let formatted_price = format!("{unit_price:.2}"); + write_element_block("Price", w, |w| { + // 02 RRP including tax + write_element_block("PriceType", w, |w| { + w.write(XmlEvent::Characters("02")) + .map_err(|e| e.into()) + })?; + write_element_block("PriceAmount", w, |w| { + w.write(XmlEvent::Characters(&formatted_price)) + .map_err(|e| e.into()) + })?; + write_element_block("CurrencyCode", w, |w| { + w.write(XmlEvent::Characters( + &price.currency_code.to_string(), + )) + .map_err(|e| e.into()) + })?; + write_element_block("Territory", w, |w| { + write_element_block("RegionsIncluded", w, |w| { + w.write(XmlEvent::Characters("WORLD")) + .map_err(|e| e.into()) + }) + }) + })?; + } + Ok(()) + } })?; } Ok(()) @@ -581,9 +624,10 @@ mod tests { use thoth_api::model::Isbn; use thoth_api::model::Orcid; use thoth_client::{ - ContributionType, LanguageCode, LanguageRelation, LocationPlatform, PublicationType, - WorkContributionsContributor, WorkFundings, WorkImprint, WorkImprintPublisher, WorkIssues, - WorkIssuesSeries, WorkPublicationsLocations, WorkStatus, WorkSubjects, WorkType, + ContributionType, CurrencyCode, LanguageCode, LanguageRelation, LocationPlatform, + PublicationType, WorkContributionsContributor, WorkFundings, WorkImprint, + WorkImprintPublisher, WorkIssues, WorkIssuesSeries, WorkPublicationsLocations, + WorkPublicationsPrices, WorkStatus, WorkSubjects, WorkType, }; use uuid::Uuid; @@ -807,7 +851,16 @@ mod tests { depth_in: None, weight_g: None, weight_oz: None, - prices: vec![], + prices: vec![ + WorkPublicationsPrices { + currency_code: CurrencyCode::EUR, + unit_price: 5.95, + }, + WorkPublicationsPrices { + currency_code: CurrencyCode::GBP, + unit_price: 4.95, + }, + ], locations: vec![WorkPublicationsLocations { landing_page: Some("https://www.book.com/pdf_landing".to_string()), full_text_url: Some("https://www.book.com/pdf_fulltext".to_string()), @@ -1009,6 +1062,32 @@ mod tests { assert!(!output .contains(r#" 02"#)); assert!(!output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#)); + // Absence of licence means we assume non-OA + assert!(!output.contains(r#" 20"#)); + assert!(!output.contains(r#" Open Access"#)); + assert!(!output.contains(r#" 01"#)); + assert!(output.contains( + r#" + + 02 + 5.95 + EUR + + WORLD + + "# + )); + assert!(output.contains( + r#" + + 02 + 4.95 + GBP + + WORLD + + "# + )); // No subtitle supplied (within Thoth UI this would automatically update full_title) assert!(!output.contains(r#" Book Subtitle"#)); // No page count supplied @@ -1022,10 +1101,10 @@ mod tests { // No TOC supplied assert!(!output.contains(r#" 04"#)); assert!(!output.contains(r#" 1. Chapter 1"#)); - // CollateralDetail block is still present as it always contains Open Access statement - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" 00"#)); + // No items left to go in CollateralDetail block so it's omitted + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" 00"#)); // No place supplied assert!(!output.contains(r#" León, Spain"#)); // No publication date supplied @@ -1045,6 +1124,33 @@ mod tests { assert!(!output.contains(r#" 15"#)); assert!(!output.contains(r#" 9781402894626"#)); + // Remove PDF prices but keep book "non-OA" (no licence) + test_work.publications[1].prices.clear(); + let output = generate_test_output(true, &test_work); + assert!(output.contains(r#" 01"#)); + assert!(!output.contains( + r#" + + 02 + 5.95 + EUR + + WORLD + + "# + )); + assert!(!output.contains( + r#" + + 02 + 4.95 + GBP + + WORLD + + "# + )); + // Add withdrawn date test_work.withdrawn_date = chrono::NaiveDate::from_ymd_opt(2020, 12, 31); let output = generate_test_output(true, &test_work); diff --git a/thoth-export-server/src/xml/onix3_project_muse.rs b/thoth-export-server/src/xml/onix3_project_muse.rs index 365a75b3e..29aec17f2 100644 --- a/thoth-export-server/src/xml/onix3_project_muse.rs +++ b/thoth-export-server/src/xml/onix3_project_muse.rs @@ -80,6 +80,7 @@ impl XmlElementBlock for Work { .and_then(|l| l.full_text_url.as_ref()) { let work_id = format!("urn:uuid:{}", self.work_id); + let is_open_access = self.license.is_some(); let (main_isbn, isbns) = get_publications_data(&self.publications); write_element_block("Product", w, |w| { write_element_block("RecordReference", w, |w| { @@ -256,58 +257,63 @@ impl XmlElementBlock for Work { })?; Ok(()) })?; - write_element_block("CollateralDetail", w, |w| { - if let Some(labstract) = &self.long_abstract { - write_element_block("TextContent", w, |w| { - // 03 Description ("30 Abstract" not implemented in OAPEN) - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + if self.long_abstract.is_some() || self.toc.is_some() || is_open_access { + write_element_block("CollateralDetail", w, |w| { + if let Some(labstract) = &self.long_abstract { + write_element_block("TextContent", w, |w| { + // 03 Description ("30 Abstract" not implemented in OAPEN) + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_full_element_block( + "Text", + Some(vec![("language", "eng")]), + w, + |w| { + w.write(XmlEvent::Characters(labstract)) + .map_err(|e| e.into()) + }, + ) })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + } + if let Some(toc) = &self.toc { + write_element_block("TextContent", w, |w| { + // 04 Table of contents + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) + }) })?; - write_full_element_block( - "Text", - Some(vec![("language", "eng")]), - w, - |w| { - w.write(XmlEvent::Characters(labstract)) + } + if is_open_access { + write_element_block("TextContent", w, |w| { + // 20 Open access statement + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("20")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters("Open Access")) .map_err(|e| e.into()) - }, - ) - })?; - } - if let Some(toc) = &self.toc { - write_element_block("TextContent", w, |w| { - // 04 Table of contents - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + }) })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) - }) - })?; - } - write_element_block("TextContent", w, |w| { - // 20 Open access statement - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("20")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters("Open Access")) - .map_err(|e| e.into()) - }) - }) - })?; + } + Ok(()) + })?; + } write_element_block("PublishingDetail", w, |w| { write_element_block("Imprint", w, |w| { write_element_block("ImprintName", w, |w| { @@ -453,10 +459,47 @@ impl XmlElementBlock for Work { write_element_block("ProductAvailability", w, |w| { w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) })?; - // 01 Free of charge - write_element_block("UnpricedItemType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - }) + let prices = self + .publications + .iter() + .find(|p| p.publication_type.eq(&PublicationType::PDF)) + .map(|p| p.prices.clone()) + .unwrap_or_default(); + if is_open_access || prices.is_empty() { + // 01 Free of charge + write_element_block("UnpricedItemType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + }) + } else { + for price in prices { + let unit_price = price.unit_price; + let formatted_price = format!("{unit_price:.2}"); + write_element_block("Price", w, |w| { + // 02 RRP including tax + write_element_block("PriceType", w, |w| { + w.write(XmlEvent::Characters("02")) + .map_err(|e| e.into()) + })?; + write_element_block("PriceAmount", w, |w| { + w.write(XmlEvent::Characters(&formatted_price)) + .map_err(|e| e.into()) + })?; + write_element_block("CurrencyCode", w, |w| { + w.write(XmlEvent::Characters( + &price.currency_code.to_string(), + )) + .map_err(|e| e.into()) + })?; + write_element_block("Territory", w, |w| { + write_element_block("RegionsIncluded", w, |w| { + w.write(XmlEvent::Characters("WORLD")) + .map_err(|e| e.into()) + }) + }) + })?; + } + Ok(()) + } })?; } Ok(()) @@ -675,9 +718,10 @@ mod tests { use thoth_api::model::Isbn; use thoth_api::model::Orcid; use thoth_client::{ - ContributionType, LanguageCode, LanguageRelation, LocationPlatform, PublicationType, - WorkContributionsContributor, WorkFundings, WorkImprint, WorkImprintPublisher, WorkIssues, - WorkIssuesSeries, WorkPublicationsLocations, WorkStatus, WorkSubjects, WorkType, + ContributionType, CurrencyCode, LanguageCode, LanguageRelation, LocationPlatform, + PublicationType, WorkContributionsContributor, WorkFundings, WorkImprint, + WorkImprintPublisher, WorkIssues, WorkIssuesSeries, WorkPublicationsLocations, + WorkPublicationsPrices, WorkStatus, WorkSubjects, WorkType, }; use uuid::Uuid; @@ -897,7 +941,16 @@ mod tests { depth_in: None, weight_g: None, weight_oz: None, - prices: vec![], + prices: vec![ + WorkPublicationsPrices { + currency_code: CurrencyCode::EUR, + unit_price: 5.95, + }, + WorkPublicationsPrices { + currency_code: CurrencyCode::GBP, + unit_price: 4.95, + }, + ], locations: vec![WorkPublicationsLocations { landing_page: Some("https://www.book.com/pdf_landing".to_string()), full_text_url: Some("https://www.book.com/pdf_fulltext".to_string()), @@ -1115,6 +1168,32 @@ mod tests { assert!(!output .contains(r#" 02"#)); assert!(!output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#)); + // Absence of licence means we assume non-OA + assert!(!output.contains(r#" 20"#)); + assert!(!output.contains(r#" Open Access"#)); + assert!(!output.contains(r#" 01"#)); + assert!(output.contains( + r#" + + 02 + 5.95 + EUR + + WORLD + + "# + )); + assert!(output.contains( + r#" + + 02 + 4.95 + GBP + + WORLD + + "# + )); // No subtitle supplied (within Thoth UI this would automatically update full_title) assert!(!output.contains(r#" Book Subtitle"#)); // No page count supplied @@ -1128,10 +1207,10 @@ mod tests { // No TOC supplied assert!(!output.contains(r#" 04"#)); assert!(!output.contains(r#" 1. Chapter 1"#)); - // CollateralDetail block is still present as it always contains Open Access statement - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" 00"#)); + // No items left to go in CollateralDetail block so it's omitted + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" 00"#)); // No place supplied assert!(!output.contains(r#" León, Spain"#)); // No publication date supplied @@ -1154,6 +1233,33 @@ mod tests { assert!(!output.contains(r#" B2"#)); assert!(!output.contains(r#" custom1"#)); + // Remove PDF prices but keep book "non-OA" (no licence) + test_work.publications[0].prices.clear(); + let output = generate_test_output(true, &test_work); + assert!(output.contains(r#" 01"#)); + assert!(!output.contains( + r#" + + 02 + 5.95 + EUR + + WORLD + + "# + )); + assert!(!output.contains( + r#" + + 02 + 4.95 + GBP + + WORLD + + "# + )); + // Remove the only remaining (BIC) subject // Result: error (can't generate Project MUSE ONIX without either a BIC or BISAC subject) test_work.subjects.clear(); diff --git a/thoth-export-server/src/xml/onix3_thoth.rs b/thoth-export-server/src/xml/onix3_thoth.rs index 0cc0b941a..10c74de16 100644 --- a/thoth-export-server/src/xml/onix3_thoth.rs +++ b/thoth-export-server/src/xml/onix3_thoth.rs @@ -78,6 +78,7 @@ impl XmlElementBlock for Work { )); } let work_id = format!("urn:uuid:{}", self.work_id); + let is_open_access = self.license.is_some(); let isbns: Vec = self .publications .iter() @@ -420,136 +421,152 @@ impl XmlElementBlock for Work { }) }) })?; - write_element_block("CollateralDetail", w, |w| { - if let Some(mut short_abstract) = self.short_abstract.clone() { - // Short description field may not exceed 350 characters. - // Ensure that the string is truncated at a valid UTF-8 boundary - // by finding the byte index of the 350th character and then truncating - // the string at that index, to avoid creating invalid UTF-8 sequences. - if let Some((byte_index, _)) = short_abstract.char_indices().nth(350) { - short_abstract.truncate(byte_index); - } - write_element_block("TextContent", w, |w| { - // 02 Short description - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(&short_abstract)) - .map_err(|e| e.into()) - }) - })?; - } - if let Some(long_abstract) = &self.long_abstract { - // 03 Description, 30 Abstract - for text_type in ["03", "30"] { + if self.short_abstract.is_some() + || self.long_abstract.is_some() + || self.toc.is_some() + || self.general_note.is_some() + || self.cover_url.is_some() + || is_open_access + { + write_element_block("CollateralDetail", w, |w| { + if let Some(mut short_abstract) = self.short_abstract.clone() { + // Short description field may not exceed 350 characters. + // Ensure that the string is truncated at a valid UTF-8 boundary + // by finding the byte index of the 350th character and then truncating + // the string at that index, to avoid creating invalid UTF-8 sequences. + if let Some((byte_index, _)) = short_abstract.char_indices().nth(350) { + short_abstract.truncate(byte_index); + } write_element_block("TextContent", w, |w| { + // 02 Short description write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters(text_type)) - .map_err(|e| e.into()) + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) })?; // 00 Unrestricted write_element_block("ContentAudience", w, |w| { w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) })?; write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(long_abstract)) + w.write(XmlEvent::Characters(&short_abstract)) .map_err(|e| e.into()) }) })?; } - } - if let Some(toc) = &self.toc { - write_element_block("TextContent", w, |w| { - // 04 Table of contents - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) - }) - })?; - } - write_element_block("TextContent", w, |w| { - // 20 Open access statement - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("20")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_full_element_block("Text", Some(vec![("language", "eng")]), w, |w| { - w.write(XmlEvent::Characters("Open Access")) - .map_err(|e| e.into()) - }) - })?; - if let Some(general_note) = &self.general_note { - write_element_block("TextContent", w, |w| { - // 13 Publisher's notice - // "A statement included by a publisher in fulfillment of contractual obligations" - // Used in many different ways - closest approximation - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("13")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(general_note)) - .map_err(|e| e.into()) - }) - })?; - } - if let Some(cover_url) = &self.cover_url { - write_element_block("SupportingResource", w, |w| { - // 01 Front cover - write_element_block("ResourceContentType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - // 03 Image - write_element_block("ResourceMode", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - })?; - if let Some(cover_caption) = &self.cover_caption { - write_element_block("ResourceFeature", w, |w| { - // 02 Caption - write_element_block("ResourceFeatureType", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) + if let Some(long_abstract) = &self.long_abstract { + // 03 Description, 30 Abstract + for text_type in ["03", "30"] { + write_element_block("TextContent", w, |w| { + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters(text_type)) + .map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) })?; - write_element_block("FeatureNote", w, |w| { - w.write(XmlEvent::Characters(cover_caption)) + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(long_abstract)) .map_err(|e| e.into()) }) })?; } - write_element_block("ResourceVersion", w, |w| { - // 02 Downloadable file - write_element_block("ResourceForm", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) + } + if let Some(toc) = &self.toc { + write_element_block("TextContent", w, |w| { + // 04 Table of contents + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) + }) + })?; + } + if is_open_access { + write_element_block("TextContent", w, |w| { + // 20 Open access statement + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("20")).map_err(|e| e.into()) })?; - write_element_block("ResourceLink", w, |w| { - w.write(XmlEvent::Characters(cover_url)) + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_full_element_block( + "Text", + Some(vec![("language", "eng")]), + w, + |w| { + w.write(XmlEvent::Characters("Open Access")) + .map_err(|e| e.into()) + }, + ) + })?; + } + if let Some(general_note) = &self.general_note { + write_element_block("TextContent", w, |w| { + // 13 Publisher's notice + // "A statement included by a publisher in fulfillment of contractual obligations" + // Used in many different ways - closest approximation + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("13")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(general_note)) .map_err(|e| e.into()) }) - }) - })?; - } - Ok(()) - })?; + })?; + } + if let Some(cover_url) = &self.cover_url { + write_element_block("SupportingResource", w, |w| { + // 01 Front cover + write_element_block("ResourceContentType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + // 03 Image + write_element_block("ResourceMode", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + })?; + if let Some(cover_caption) = &self.cover_caption { + write_element_block("ResourceFeature", w, |w| { + // 02 Caption + write_element_block("ResourceFeatureType", w, |w| { + w.write(XmlEvent::Characters("02")) + .map_err(|e| e.into()) + })?; + write_element_block("FeatureNote", w, |w| { + w.write(XmlEvent::Characters(cover_caption)) + .map_err(|e| e.into()) + }) + })?; + } + write_element_block("ResourceVersion", w, |w| { + // 02 Downloadable file + write_element_block("ResourceForm", w, |w| { + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) + })?; + write_element_block("ResourceLink", w, |w| { + w.write(XmlEvent::Characters(cover_url)) + .map_err(|e| e.into()) + }) + }) + })?; + } + Ok(()) + })?; + } let chapter_relations: Vec = self .relations .clone() @@ -3125,6 +3142,15 @@ mod tests { 13 00 This is a general note + "# + )); + // No licence means we assume the title is non-OA + assert!(!output.contains( + r#" + + 20 + 00 + Open Access "# )); // SupportingResource block still present but ResourceFeature absent @@ -3248,6 +3274,8 @@ mod tests { "# )); + // Test truncation of short abstract + test_work.short_abstract = Some("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum vel libero eleifend, ultrices purus vitae, suscipit ligula. Aliquam ornare quam et nulla vestibulum, id euismod tellus malesuada. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nullam ornare bibendum ex nec dapibus. Proin porta risus elementum odio feugiat tempus. Etiam eu felis ac metus viverra ornare. In consectetur neque sed feugiat ornare. Mauris at purus fringilla orci tincidunt pulvinar sed a massa. Nullam vestibulum posuere augue, sit amet tincidunt nisl pulvinar ac.".to_string()); // Remove even more values test_work.edition = None; test_work.table_count = None; @@ -3283,14 +3311,14 @@ mod tests { )); assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" "#)); - // No cover URL means no SupportingResource block - CollateralDetail only contains OA statement + // No cover URL means no SupportingResource block - CollateralDetail only contains short abstract assert!(output.contains( r#" - 20 + 02 00 - Open Access + Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum vel libero eleifend, ultrices purus vitae, suscipit ligula. Aliquam ornare quam et nulla vestibulum, id euismod tellus malesuada. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nullam ornare bibendum ex nec dapibus. Proin porta risus elementu "# )); @@ -3336,8 +3364,8 @@ mod tests { test_work.relations[0].related_work.doi = None; // Remove remaining related work DOI: can't output RelatedMaterial block test_work.relations[1].related_work.doi = None; - // Test truncation of short abstract - test_work.short_abstract = Some("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum vel libero eleifend, ultrices purus vitae, suscipit ligula. Aliquam ornare quam et nulla vestibulum, id euismod tellus malesuada. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nullam ornare bibendum ex nec dapibus. Proin porta risus elementum odio feugiat tempus. Etiam eu felis ac metus viverra ornare. In consectetur neque sed feugiat ornare. Mauris at purus fringilla orci tincidunt pulvinar sed a massa. Nullam vestibulum posuere augue, sit amet tincidunt nisl pulvinar ac.".to_string()); + // Remove short abstract: can't output CollateralDetail block + test_work.short_abstract = None; // Reinstate landing page: supplier block for publisher now contains it test_work.landing_page = Some("https://www.book.com".to_string()); let output = generate_test_output(true, &test_work); @@ -3345,14 +3373,7 @@ mod tests { assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" "#)); - assert!(output.contains( - r#" - - 02 - 00 - Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum vel libero eleifend, ultrices purus vitae, suscipit ligula. Aliquam ornare quam et nulla vestibulum, id euismod tellus malesuada. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nullam ornare bibendum ex nec dapibus. Proin porta risus elementu - "# - )); + assert!(!output.contains(r#" "#)); assert!(output.contains( r#" From 02112f93cb823ba5fc68e1d6c0df2160ebfd908b Mon Sep 17 00:00:00 2001 From: rhigman <73792779+rhigman@users.noreply.github.com> Date: Mon, 16 Sep 2024 15:02:59 +0100 Subject: [PATCH 3/7] Restrict OA-specific output fields to works with licences: ONIX 2.1 --- .../src/xml/onix21_ebsco_host.rs | 231 ++++++++++++------ .../src/xml/onix21_proquest_ebrary.rs | 216 +++++++++++----- 2 files changed, 315 insertions(+), 132 deletions(-) diff --git a/thoth-export-server/src/xml/onix21_ebsco_host.rs b/thoth-export-server/src/xml/onix21_ebsco_host.rs index e426ebc0b..72411587e 100644 --- a/thoth-export-server/src/xml/onix21_ebsco_host.rs +++ b/thoth-export-server/src/xml/onix21_ebsco_host.rs @@ -58,31 +58,39 @@ impl XmlSpecification for Onix21EbscoHost { impl XmlElementBlock for Work { fn xml_element(&self, w: &mut EventWriter) -> ThothResult<()> { - // EBSCO Host can only accept PDFs and EPUBs, and can only - // process them as Open Access if they are unpriced - let pdf_url = self + // EBSCO Host can only accept PDFs and EPUBs + let pdf_publication = self .publications .iter() - .find(|p| { - p.publication_type.eq(&PublicationType::PDF) - && !p.locations.is_empty() - // Thoth database only accepts non-zero prices - && p.prices.is_empty() - }) + .find(|p| p.publication_type.eq(&PublicationType::PDF) && !p.locations.is_empty()); + let pdf_url = pdf_publication .and_then(|p| p.locations.iter().find(|l| l.canonical)) .and_then(|l| l.full_text_url.as_ref()); - let epub_url = self + let epub_publication = self .publications .iter() - .find(|p| { - p.publication_type.eq(&PublicationType::EPUB) - && !p.locations.is_empty() - // Thoth database only accepts non-zero prices - && p.prices.is_empty() - }) + .find(|p| p.publication_type.eq(&PublicationType::EPUB) && !p.locations.is_empty()); + let epub_url = epub_publication .and_then(|p| p.locations.iter().find(|l| l.canonical)) .and_then(|l| l.full_text_url.as_ref()); - if pdf_url.is_some() || epub_url.is_some() { + if pdf_url.is_none() && epub_url.is_none() { + return Err(ThothError::IncompleteMetadataRecord( + ONIX_ERROR.to_string(), + "No PDF or EPUB URL".to_string(), + )); + } + // EBSCO Host can only process works as Open Access if they are unpriced + let is_open_access = self.license.is_some(); + if is_open_access && + // Thoth database only accepts non-zero prices + !(pdf_publication.is_some_and(|p| p.prices.is_empty()) || + epub_publication.is_some_and(|p| p.prices.is_empty())) + { + Err(ThothError::IncompleteMetadataRecord( + ONIX_ERROR.to_string(), + "No unpriced PDF or EPUB URL (must be supplied for OA works)".to_string(), + )) + } else { let work_id = format!("urn:uuid:{}", self.work_id); let (main_isbn, isbns) = get_publications_data(&self.publications); write_element_block("Product", w, |w| { @@ -260,18 +268,20 @@ impl XmlElementBlock for Work { w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) }) })?; - write_element_block("OtherText", w, |w| { - // 47 Open access statement - // "Should always be accompanied by a link to the complete license (see code 46)" - // (not specified as required by EBSCO Host themselves) - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("47")).map_err(|e| e.into()) + if is_open_access { + write_element_block("OtherText", w, |w| { + // 47 Open access statement + // "Should always be accompanied by a link to the complete license (see code 46)" + // (not specified as required by EBSCO Host themselves) + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("47")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters("Open access - no commercial use")) + .map_err(|e| e.into()) + }) })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters("Open access - no commercial use")) - .map_err(|e| e.into()) - }) - })?; + } if let Some(license) = &self.license { write_element_block("OtherText", w, |w| { // 46 License @@ -408,34 +418,58 @@ impl XmlElementBlock for Work { write_element_block("ProductAvailability", w, |w| { w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) })?; - // R Restrictions apply, see note - write_element_block("AudienceRestrictionFlag", w, |w| { - w.write(XmlEvent::Characters("R")).map_err(|e| e.into()) - })?; - write_element_block("AudienceRestrictionNote", w, |w| { - w.write(XmlEvent::Characters("Open access")) - .map_err(|e| e.into()) - })?; - // EBSCO Host require the price point for Open Access titles to be listed as "0.01 USD". - write_element_block("Price", w, |w| { - // 01 RRP excluding tax (price code requested by EBSCO) - write_element_block("PriceTypeCode", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + if is_open_access { + // R Restrictions apply, see note + write_element_block("AudienceRestrictionFlag", w, |w| { + w.write(XmlEvent::Characters("R")).map_err(|e| e.into()) })?; - write_element_block("PriceAmount", w, |w| { - w.write(XmlEvent::Characters("0.01")).map_err(|e| e.into()) + write_element_block("AudienceRestrictionNote", w, |w| { + w.write(XmlEvent::Characters("Open access")) + .map_err(|e| e.into()) })?; - write_element_block("CurrencyCode", w, |w| { - w.write(XmlEvent::Characters("USD")).map_err(|e| e.into()) + } + let publication = match pdf_url.is_some() { + true => pdf_publication, + false => epub_publication, + }; + let prices = publication.map(|p| p.prices.clone()).unwrap_or_default(); + if is_open_access || prices.is_empty() { + // EBSCO Host require the price point for unpriced/Open Access titles to be listed as "0.01 USD". + write_element_block("Price", w, |w| { + // 01 RRP excluding tax (price code requested by EBSCO) + write_element_block("PriceTypeCode", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("PriceAmount", w, |w| { + w.write(XmlEvent::Characters("0.01")).map_err(|e| e.into()) + })?; + write_element_block("CurrencyCode", w, |w| { + w.write(XmlEvent::Characters("USD")).map_err(|e| e.into()) + }) }) - }) + } else { + for price in prices { + let unit_price = price.unit_price; + let formatted_price = format!("{unit_price:.2}"); + write_element_block("Price", w, |w| { + // 01 RRP excluding tax (price code requested by EBSCO) + write_element_block("PriceTypeCode", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("PriceAmount", w, |w| { + w.write(XmlEvent::Characters(&formatted_price)) + .map_err(|e| e.into()) + })?; + write_element_block("CurrencyCode", w, |w| { + w.write(XmlEvent::Characters(&price.currency_code.to_string())) + .map_err(|e| e.into()) + }) + })?; + } + Ok(()) + } }) }) - } else { - Err(ThothError::IncompleteMetadataRecord( - ONIX_ERROR.to_string(), - "No unpriced PDF or EPUB URL".to_string(), - )) } } } @@ -960,7 +994,16 @@ mod tests { depth_in: None, weight_g: None, weight_oz: None, - prices: vec![], + prices: vec![ + WorkPublicationsPrices { + currency_code: CurrencyCode::GBP, + unit_price: 5.99, + }, + WorkPublicationsPrices { + currency_code: CurrencyCode::EUR, + unit_price: 7.99, + }, + ], locations: vec![WorkPublicationsLocations { landing_page: Some("https://www.book.com/pdf_landing".to_string()), full_text_url: Some("https://www.book.com/pdf_fulltext".to_string()), @@ -1113,17 +1156,11 @@ mod tests { test_work.long_abstract = None; test_work.place = None; test_work.publication_date = None; - test_work.license = None; test_work.landing_page = None; test_work.cover_url = None; test_work.imprint.publisher.publisher_url = None; // Remove third (paperback) publication test_work.publications.pop(); - // Give PDF publication a positive price point - test_work.publications[1].prices = vec![WorkPublicationsPrices { - currency_code: CurrencyCode::USD, - unit_price: 7.99, - }]; let output = generate_test_output(true, &test_work); // Paperback publication removed, so its ISBN no longer appears // (either as the main ISBN or in RelatedProducts) @@ -1141,12 +1178,6 @@ mod tests { r#" Publisher's website: web shop"# )); assert!(!output.contains(r#" https://www.book.com"#)); - // PDF publication is no longer unpriced, hence no PDF URL, and EpubType changes - assert!( - !output.contains(r#" https://www.book.com/pdf_fulltext"#) - ); - assert!(!output.contains(r#" 002"#)); - assert!(output.contains(r#" 029"#)); // No page count supplied assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" 00"#)); @@ -1156,11 +1187,6 @@ mod tests { assert!(!output.contains(r#" 03"#)); assert!(!output.contains(r#" 06"#)); assert!(!output.contains(r#" Lorem ipsum dolor sit amet"#)); - // No licence supplied - assert!(!output.contains(r#" 46"#)); - assert!( - !output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#) - ); // No cover URL supplied assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" 04"#)); @@ -1176,13 +1202,78 @@ mod tests { // No publication date supplied assert!(!output.contains(r#" 19991231"#)); assert!(!output.contains(r#" 1999"#)); + // No licence supplied: assume non-OA, output real PDF price + assert!(!output.contains(r#" 47"#)); + assert!(!output.contains(r#" Open access - no commercial use"#)); + assert!(!output.contains(r#" 46"#)); + assert!( + !output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#) + ); + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" R"#)); + assert!(!output + .contains(r#" Open access"#)); + assert!(!output.contains(r#" 0.01"#)); + assert!(!output.contains(r#" USD"#)); + assert!(output.contains(r#" 5.99"#)); + assert!(output.contains(r#" GBP"#)); + assert!(output.contains(r#" 7.99"#)); + assert!(output.contains(r#" EUR"#)); + + // Remove PDF location + test_work.publications[1].locations.clear(); + let output = generate_test_output(true, &test_work); + // PDF no longer has a URL, so EpubType changes, and EPUB price (unpriced) is output + assert!( + !output.contains(r#" https://www.book.com/pdf_fulltext"#) + ); + assert!(!output.contains(r#" 002"#)); + assert!(output.contains(r#" 029"#)); + assert!(!output.contains(r#" 5.99"#)); + assert!(!output.contains(r#" GBP"#)); + assert!(!output.contains(r#" 7.99"#)); + assert!(!output.contains(r#" EUR"#)); + assert!(output.contains(r#" 0.01"#)); + assert!(output.contains(r#" USD"#)); + + // Give EPUB a price + test_work.publications[0].prices = vec![WorkPublicationsPrices { + currency_code: CurrencyCode::AUD, + unit_price: 10.00, + }]; + let output = generate_test_output(true, &test_work); + assert!(!output.contains(r#" 5.99"#)); + assert!(!output.contains(r#" GBP"#)); + assert!(!output.contains(r#" 7.99"#)); + assert!(!output.contains(r#" EUR"#)); + assert!(!output.contains(r#" 0.01"#)); + assert!(!output.contains(r#" USD"#)); + assert!(output.contains(r#" 10.00"#)); + assert!(output.contains(r#" AUD"#)); + + // Replace licence: error + test_work.license = Some("https://creativecommons.org/licenses/by/4.0/".to_string()); + let output = generate_test_output(false, &test_work); + assert_eq!( + output, + "Could not generate onix_2.1::ebsco_host: No unpriced PDF or EPUB URL (must be supplied for OA works)".to_string() + ); // Remove the EPUB publication's only location: error test_work.publications[0].locations.clear(); let output = generate_test_output(false, &test_work); assert_eq!( output, - "Could not generate onix_2.1::ebsco_host: No unpriced PDF or EPUB URL".to_string() + "Could not generate onix_2.1::ebsco_host: No PDF or EPUB URL".to_string() + ); + + // This occurs whether or not work is OA/priced + test_work.license = None; + test_work.publications[0].prices.clear(); + test_work.publications[1].prices.clear(); + assert_eq!( + output, + "Could not generate onix_2.1::ebsco_host: No PDF or EPUB URL".to_string() ); } } diff --git a/thoth-export-server/src/xml/onix21_proquest_ebrary.rs b/thoth-export-server/src/xml/onix21_proquest_ebrary.rs index 3c23d2981..3f7824a5b 100644 --- a/thoth-export-server/src/xml/onix21_proquest_ebrary.rs +++ b/thoth-export-server/src/xml/onix21_proquest_ebrary.rs @@ -60,31 +60,39 @@ impl XmlSpecification for Onix21ProquestEbrary { impl XmlElementBlock for Work { fn xml_element(&self, w: &mut EventWriter) -> ThothResult<()> { - // ProQuest Ebrary can only accept PDFs and EPUBs, and can only - // process them as Open Access if they are unpriced - let pdf_url = self + // ProQuest Ebrary can only accept PDFs and EPUBs + let pdf_publication = self .publications .iter() - .find(|p| { - p.publication_type.eq(&PublicationType::PDF) - && !p.locations.is_empty() - // Thoth database only accepts non-zero prices - && p.prices.is_empty() - }) + .find(|p| p.publication_type.eq(&PublicationType::PDF) && !p.locations.is_empty()); + let pdf_url = pdf_publication .and_then(|p| p.locations.iter().find(|l| l.canonical)) .and_then(|l| l.full_text_url.as_ref()); - let epub_url = self + let epub_publication = self .publications .iter() - .find(|p| { - p.publication_type.eq(&PublicationType::EPUB) - && !p.locations.is_empty() - // Thoth database only accepts non-zero prices - && p.prices.is_empty() - }) + .find(|p| p.publication_type.eq(&PublicationType::EPUB) && !p.locations.is_empty()); + let epub_url = epub_publication .and_then(|p| p.locations.iter().find(|l| l.canonical)) .and_then(|l| l.full_text_url.as_ref()); - if pdf_url.is_some() || epub_url.is_some() { + if pdf_url.is_none() && epub_url.is_none() { + return Err(ThothError::IncompleteMetadataRecord( + ONIX_ERROR.to_string(), + "No PDF or EPUB URL".to_string(), + )); + } + // ProQuest Ebrary can only process works as Open Access if they are unpriced + let is_open_access = self.license.is_some(); + if is_open_access && + // Thoth database only accepts non-zero prices + !(pdf_publication.is_some_and(|p| p.prices.is_empty()) || + epub_publication.is_some_and(|p| p.prices.is_empty())) + { + Err(ThothError::IncompleteMetadataRecord( + ONIX_ERROR.to_string(), + "No unpriced PDF or EPUB URL (must be supplied for OA works)".to_string(), + )) + } else { let work_id = format!("urn:uuid:{}", self.work_id); let (main_isbn, isbns) = get_publications_data(&self.publications); write_element_block("Product", w, |w| { @@ -261,16 +269,18 @@ impl XmlElementBlock for Work { w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) }) })?; - write_element_block("OtherText", w, |w| { - // 47 Open access statement - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("47")).map_err(|e| e.into()) + if is_open_access { + write_element_block("OtherText", w, |w| { + // 47 Open access statement + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("47")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters("Open access - no commercial use")) + .map_err(|e| e.into()) + }) })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters("Open access - no commercial use")) - .map_err(|e| e.into()) - }) - })?; + } if let Some(license) = &self.license { write_element_block("OtherText", w, |w| { // 46 License @@ -402,25 +412,48 @@ impl XmlElementBlock for Work { write_element_block("ProductAvailability", w, |w| { w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) })?; - // R Restrictions apply, see note - write_element_block("AudienceRestrictionFlag", w, |w| { - w.write(XmlEvent::Characters("R")).map_err(|e| e.into()) - })?; - write_element_block("AudienceRestrictionNote", w, |w| { - w.write(XmlEvent::Characters("Open access")) - .map_err(|e| e.into()) - })?; - // ProQuest Ebrary require Open Access titles to be listed as 01 Free of charge - write_element_block("UnpricedItemType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - }) + if is_open_access { + // R Restrictions apply, see note + write_element_block("AudienceRestrictionFlag", w, |w| { + w.write(XmlEvent::Characters("R")).map_err(|e| e.into()) + })?; + write_element_block("AudienceRestrictionNote", w, |w| { + w.write(XmlEvent::Characters("Open access")) + .map_err(|e| e.into()) + })?; + } + let publication = match pdf_url.is_some() { + true => pdf_publication, + false => epub_publication, + }; + let prices = publication.map(|p| p.prices.clone()).unwrap_or_default(); + if is_open_access || prices.is_empty() { + write_element_block("UnpricedItemType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + }) + } else { + for price in prices { + let unit_price = price.unit_price; + let formatted_price = format!("{unit_price:.2}"); + write_element_block("Price", w, |w| { + // 02 RRP including tax + write_element_block("PriceTypeCode", w, |w| { + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) + })?; + write_element_block("PriceAmount", w, |w| { + w.write(XmlEvent::Characters(&formatted_price)) + .map_err(|e| e.into()) + })?; + write_element_block("CurrencyCode", w, |w| { + w.write(XmlEvent::Characters(&price.currency_code.to_string())) + .map_err(|e| e.into()) + }) + })?; + } + Ok(()) + } }) }) - } else { - Err(ThothError::IncompleteMetadataRecord( - ONIX_ERROR.to_string(), - "No unpriced PDF or EPUB URL".to_string(), - )) } } } @@ -939,7 +972,16 @@ mod tests { depth_in: None, weight_g: None, weight_oz: None, - prices: vec![], + prices: vec![ + WorkPublicationsPrices { + currency_code: CurrencyCode::GBP, + unit_price: 5.99, + }, + WorkPublicationsPrices { + currency_code: CurrencyCode::EUR, + unit_price: 7.99, + }, + ], locations: vec![WorkPublicationsLocations { landing_page: Some("https://www.book.com/pdf_landing".to_string()), full_text_url: Some("https://www.book.com/pdf_fulltext".to_string()), @@ -1088,17 +1130,11 @@ mod tests { test_work.long_abstract = None; test_work.place = None; test_work.publication_date = None; - test_work.license = None; test_work.landing_page = None; test_work.cover_url = None; test_work.imprint.publisher.publisher_url = None; // Remove third (paperback) publication test_work.publications.pop(); - // Give PDF publication a positive price point - test_work.publications[1].prices = vec![WorkPublicationsPrices { - currency_code: CurrencyCode::USD, - unit_price: 7.99, - }]; let output = generate_test_output(true, &test_work); // Paperback publication removed, so its ISBN no longer appears // (either as the main ISBN or in RelatedProducts) @@ -1116,12 +1152,6 @@ mod tests { r#" Publisher's website: web shop"# )); assert!(!output.contains(r#" https://www.book.com"#)); - // PDF publication is no longer unpriced, hence no PDF URL, and EpubType changes - assert!( - !output.contains(r#" https://www.book.com/pdf_fulltext"#) - ); - assert!(!output.contains(r#" 002"#)); - assert!(output.contains(r#" 029"#)); // No page count supplied assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" 00"#)); @@ -1131,11 +1161,6 @@ mod tests { assert!(!output.contains(r#" 03"#)); assert!(!output.contains(r#" 06"#)); assert!(!output.contains(r#" Lorem ipsum dolor sit amet"#)); - // No licence supplied - assert!(!output.contains(r#" 46"#)); - assert!( - !output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#) - ); // No cover URL supplied assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" 04"#)); @@ -1151,13 +1176,80 @@ mod tests { // No publication date supplied assert!(!output.contains(r#" 19991231"#)); assert!(!output.contains(r#" 1999"#)); + // No licence supplied: assume non-OA, output real PDF prices + assert!(!output.contains(r#" 47"#)); + assert!(!output.contains(r#" Open access - no commercial use"#)); + assert!(!output.contains(r#" 46"#)); + assert!( + !output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#) + ); + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" R"#)); + assert!(!output + .contains(r#" Open access"#)); + assert!(!output.contains(r#" 01"#)); + assert!(output.contains(r#" "#)); + assert!(output.contains(r#" 02"#)); + assert!(output.contains(r#" 5.99"#)); + assert!(output.contains(r#" GBP"#)); + assert!(output.contains(r#" 7.99"#)); + assert!(output.contains(r#" EUR"#)); + + // Remove PDF location + test_work.publications[1].locations.clear(); + let output = generate_test_output(true, &test_work); + // PDF no longer has a URL, so EpubType changes, and EPUB price (unpriced) is output + assert!( + !output.contains(r#" https://www.book.com/pdf_fulltext"#) + ); + assert!(!output.contains(r#" 002"#)); + assert!(output.contains(r#" 029"#)); + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" 02"#)); + assert!(!output.contains(r#" 5.99"#)); + assert!(!output.contains(r#" GBP"#)); + assert!(output.contains(r#" 01"#)); + + // Give EPUB a price + test_work.publications[0].prices = vec![WorkPublicationsPrices { + currency_code: CurrencyCode::AUD, + unit_price: 10.00, + }]; + let output = generate_test_output(true, &test_work); + assert!(!output.contains(r#" 01"#)); + assert!(!output.contains(r#" 5.99"#)); + assert!(!output.contains(r#" GBP"#)); + assert!(!output.contains(r#" 7.99"#)); + assert!(!output.contains(r#" EUR"#)); + assert!(output.contains(r#" "#)); + assert!(output.contains(r#" 02"#)); + assert!(output.contains(r#" 10.00"#)); + assert!(output.contains(r#" AUD"#)); + + // Replace licence: error + test_work.license = Some("https://creativecommons.org/licenses/by/4.0/".to_string()); + let output = generate_test_output(false, &test_work); + assert_eq!( + output, + "Could not generate onix_2.1::proquest_ebrary: No unpriced PDF or EPUB URL (must be supplied for OA works)".to_string() + ); // Remove the EPUB publication's only location: error test_work.publications[0].locations.clear(); let output = generate_test_output(false, &test_work); assert_eq!( output, - "Could not generate onix_2.1::proquest_ebrary: No unpriced PDF or EPUB URL".to_string() + "Could not generate onix_2.1::proquest_ebrary: No PDF or EPUB URL".to_string() + ); + + // This occurs whether or not work is OA/priced + test_work.license = None; + test_work.publications[0].prices.clear(); + test_work.publications[1].prices.clear(); + let output = generate_test_output(false, &test_work); + assert_eq!( + output, + "Could not generate onix_2.1::proquest_ebrary: No PDF or EPUB URL".to_string() ); } } From 54dafac8e90ca57ba1b936e5295dcd4283e74368 Mon Sep 17 00:00:00 2001 From: rhigman <73792779+rhigman@users.noreply.github.com> Date: Mon, 16 Sep 2024 16:20:17 +0100 Subject: [PATCH 4/7] Don't output OAPEN ONIX for works without licences (platform is OA-only) --- thoth-export-server/src/xml/onix3_oapen.rs | 54 ++++++++++++---------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/thoth-export-server/src/xml/onix3_oapen.rs b/thoth-export-server/src/xml/onix3_oapen.rs index 16174d220..f484868f4 100644 --- a/thoth-export-server/src/xml/onix3_oapen.rs +++ b/thoth-export-server/src/xml/onix3_oapen.rs @@ -60,6 +60,13 @@ impl XmlSpecification for Onix3Oapen { impl XmlElementBlock for Work { fn xml_element(&self, w: &mut EventWriter) -> ThothResult<()> { + // Don't output works with no licence, as we assume these are non-OA + if self.license.is_none() { + return Err(ThothError::IncompleteMetadataRecord( + ONIX_ERROR.to_string(), + "Missing License".to_string(), + )); + } // We can only generate the document if there's a PDF if let Some(pdf_url) = self .publications @@ -131,22 +138,21 @@ impl XmlElementBlock for Work { write_element_block("PrimaryContentType", w, |w| { w.write(XmlEvent::Characters("10")).map_err(|e| e.into()) })?; - if let Some(license) = &self.license { - write_element_block("EpubLicense", w, |w| { - write_element_block("EpubLicenseName", w, |w| { - w.write(XmlEvent::Characters("Creative Commons License")) - .map_err(|e| e.into()) + write_element_block("EpubLicense", w, |w| { + write_element_block("EpubLicenseName", w, |w| { + w.write(XmlEvent::Characters("Creative Commons License")) + .map_err(|e| e.into()) + })?; + write_element_block("EpubLicenseExpression", w, |w| { + write_element_block("EpubLicenseExpressionType", w, |w| { + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) })?; - write_element_block("EpubLicenseExpression", w, |w| { - write_element_block("EpubLicenseExpressionType", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) - })?; - write_element_block("EpubLicenseExpressionLink", w, |w| { - w.write(XmlEvent::Characters(license)).map_err(|e| e.into()) - }) + write_element_block("EpubLicenseExpressionLink", w, |w| { + w.write(XmlEvent::Characters(self.license.as_ref().unwrap())) + .map_err(|e| e.into()) }) - })?; - } + }) + })?; for issue in &self.issues { XmlElementBlock::::xml_element(issue, w).ok(); } @@ -1141,7 +1147,6 @@ mod tests { // Remove some values to test non-output of optional blocks test_work.doi = None; - test_work.license = None; test_work.subtitle = None; test_work.page_count = None; test_work.long_abstract = None; @@ -1152,14 +1157,6 @@ mod tests { // No DOI supplied assert!(!output.contains(r#" 06"#)); assert!(!output.contains(r#" 10.00001/BOOK.0001"#)); - // No licence supplied - assert!(!output.contains(r#" "#)); - assert!(!output - .contains(r#" Creative Commons License"#)); - assert!(!output.contains(r#" "#)); - assert!(!output - .contains(r#" 02"#)); - assert!(!output.contains(r#" https://creativecommons.org/licenses/by/4.0/"#)); // No subtitle supplied (within Thoth UI this would automatically update full_title) assert!(!output.contains(r#" Book Subtitle"#)); // No page count supplied @@ -1241,8 +1238,17 @@ mod tests { assert!(!output .contains(r#" "https://www.book.com/cover""#)); - // Remove the only publication, which is the PDF + // Remove licence. Result: error + test_work.license = None; + let output = generate_test_output(false, &test_work); + assert_eq!( + output, + "Could not generate onix_3.0::oapen: Missing License".to_string() + ); + + // Replace licence, but remove the only publication, which is the PDF // Result: error (can't generate OAPEN ONIX without PDF URL) + test_work.license = Some("https://creativecommons.org/licenses/by/4.0/".to_string()); test_work.publications.clear(); let output = generate_test_output(false, &test_work); assert_eq!( From fde92de20acaa56b793c5d342414cebe97007dc4 Mon Sep 17 00:00:00 2001 From: rhigman <73792779+rhigman@users.noreply.github.com> Date: Mon, 16 Sep 2024 16:26:33 +0100 Subject: [PATCH 5/7] Explicitly return from fast-fail checks to reduce branching --- .../src/xml/onix21_ebsco_host.rs | 643 +++++++++--------- .../src/xml/onix21_proquest_ebrary.rs | 621 +++++++++-------- .../src/xml/onix3_google_books.rs | 1 - 3 files changed, 631 insertions(+), 634 deletions(-) diff --git a/thoth-export-server/src/xml/onix21_ebsco_host.rs b/thoth-export-server/src/xml/onix21_ebsco_host.rs index 72411587e..9751ffea7 100644 --- a/thoth-export-server/src/xml/onix21_ebsco_host.rs +++ b/thoth-export-server/src/xml/onix21_ebsco_host.rs @@ -86,391 +86,390 @@ impl XmlElementBlock for Work { !(pdf_publication.is_some_and(|p| p.prices.is_empty()) || epub_publication.is_some_and(|p| p.prices.is_empty())) { - Err(ThothError::IncompleteMetadataRecord( + return Err(ThothError::IncompleteMetadataRecord( ONIX_ERROR.to_string(), "No unpriced PDF or EPUB URL (must be supplied for OA works)".to_string(), - )) - } else { - let work_id = format!("urn:uuid:{}", self.work_id); - let (main_isbn, isbns) = get_publications_data(&self.publications); - write_element_block("Product", w, |w| { - write_element_block("RecordReference", w, |w| { + )); + } + let work_id = format!("urn:uuid:{}", self.work_id); + let (main_isbn, isbns) = get_publications_data(&self.publications); + write_element_block("Product", w, |w| { + write_element_block("RecordReference", w, |w| { + w.write(XmlEvent::Characters(&work_id)) + .map_err(|e| e.into()) + })?; + // 03 Notification confirmed on publication + write_element_block("NotificationType", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + })?; + // 01 Publisher + write_element_block("RecordSourceType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("ProductIdentifier", w, |w| { + // 01 Proprietary + write_element_block("ProductIDType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("IDValue", w, |w| { w.write(XmlEvent::Characters(&work_id)) .map_err(|e| e.into()) + }) + })?; + if let Some(isbn) = &main_isbn { + write_element_block("ProductIdentifier", w, |w| { + // 15 ISBN-13 + write_element_block("ProductIDType", w, |w| { + w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) + })?; + write_element_block("IDValue", w, |w| { + w.write(XmlEvent::Characters(isbn)).map_err(|e| e.into()) + }) })?; - // 03 Notification confirmed on publication - write_element_block("NotificationType", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - })?; - // 01 Publisher - write_element_block("RecordSourceType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; + } + if let Some(doi) = &self.doi { write_element_block("ProductIdentifier", w, |w| { - // 01 Proprietary write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) })?; write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&work_id)) + w.write(XmlEvent::Characters(&doi.to_string())) .map_err(|e| e.into()) }) })?; - if let Some(isbn) = &main_isbn { - write_element_block("ProductIdentifier", w, |w| { - // 15 ISBN-13 - write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) - })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(isbn)).map_err(|e| e.into()) - }) - })?; - } - if let Some(doi) = &self.doi { - write_element_block("ProductIdentifier", w, |w| { - write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) - })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&doi.to_string())) - .map_err(|e| e.into()) - }) - })?; + } + // DG Electronic book text in proprietary or open standard format + write_element_block("ProductForm", w, |w| { + w.write(XmlEvent::Characters("DG")).map_err(|e| e.into()) + })?; + write_element_block("EpubType", w, |w| { + // 002 PDF + let mut epub_type = "002"; + // We definitely have either a PDF URL or an EPUB URL (or both) + if pdf_url.is_none() { + // 029 EPUB + epub_type = "029"; } - // DG Electronic book text in proprietary or open standard format - write_element_block("ProductForm", w, |w| { - w.write(XmlEvent::Characters("DG")).map_err(|e| e.into()) + w.write(XmlEvent::Characters(epub_type)) + .map_err(|e| e.into()) + })?; + for issue in &self.issues { + XmlElementBlock::::xml_element(issue, w).ok(); + } + write_element_block("Title", w, |w| { + // 01 Distinctive title (book) + write_element_block("TitleType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - write_element_block("EpubType", w, |w| { - // 002 PDF - let mut epub_type = "002"; - // We definitely have either a PDF URL or an EPUB URL (or both) - if pdf_url.is_none() { - // 029 EPUB - epub_type = "029"; - } - w.write(XmlEvent::Characters(epub_type)) + write_element_block("TitleText", w, |w| { + w.write(XmlEvent::Characters(&self.title)) .map_err(|e| e.into()) })?; - for issue in &self.issues { - XmlElementBlock::::xml_element(issue, w).ok(); - } - write_element_block("Title", w, |w| { - // 01 Distinctive title (book) - write_element_block("TitleType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - write_element_block("TitleText", w, |w| { - w.write(XmlEvent::Characters(&self.title)) + if let Some(subtitle) = &self.subtitle { + write_element_block("Subtitle", w, |w| { + w.write(XmlEvent::Characters(subtitle)) .map_err(|e| e.into()) })?; - if let Some(subtitle) = &self.subtitle { - write_element_block("Subtitle", w, |w| { - w.write(XmlEvent::Characters(subtitle)) - .map_err(|e| e.into()) - })?; - } - Ok(()) + } + Ok(()) + })?; + write_element_block("WorkIdentifier", w, |w| { + // 01 Proprietary + write_element_block("WorkIDType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - write_element_block("WorkIdentifier", w, |w| { - // 01 Proprietary - write_element_block("WorkIDType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - write_element_block("IDTypeName", w, |w| { - w.write(XmlEvent::Characters("Thoth WorkID")) + write_element_block("IDTypeName", w, |w| { + w.write(XmlEvent::Characters("Thoth WorkID")) + .map_err(|e| e.into()) + })?; + write_element_block("IDValue", w, |w| { + w.write(XmlEvent::Characters(&work_id)) + .map_err(|e| e.into()) + }) + })?; + let mut websites: HashMap = HashMap::new(); + if let Some(pdf) = pdf_url { + websites.insert( + pdf.to_string(), + ( + "29".to_string(), + "Publisher's website: download the title".to_string(), + ), + ); + } + if let Some(epub) = epub_url { + websites.insert( + epub.to_string(), + ( + "29".to_string(), + "Publisher's website: download the title".to_string(), + ), + ); + } + if let Some(landing_page) = &self.landing_page { + websites.insert( + landing_page.to_string(), + ( + "02".to_string(), + "Publisher's website: web shop".to_string(), + ), + ); + } + for (url, description) in websites.iter() { + write_element_block("Website", w, |w| { + write_element_block("WebsiteRole", w, |w| { + w.write(XmlEvent::Characters(&description.0)) .map_err(|e| e.into()) })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&work_id)) + write_element_block("WebsiteDescription", w, |w| { + w.write(XmlEvent::Characters(&description.1)) .map_err(|e| e.into()) + })?; + write_element_block("WebsiteLink", w, |w| { + w.write(XmlEvent::Characters(url)).map_err(|e| e.into()) }) })?; - let mut websites: HashMap = HashMap::new(); - if let Some(pdf) = pdf_url { - websites.insert( - pdf.to_string(), - ( - "29".to_string(), - "Publisher's website: download the title".to_string(), - ), - ); - } - if let Some(epub) = epub_url { - websites.insert( - epub.to_string(), - ( - "29".to_string(), - "Publisher's website: download the title".to_string(), - ), - ); - } - if let Some(landing_page) = &self.landing_page { - websites.insert( - landing_page.to_string(), - ( - "02".to_string(), - "Publisher's website: web shop".to_string(), - ), - ); - } - for (url, description) in websites.iter() { - write_element_block("Website", w, |w| { - write_element_block("WebsiteRole", w, |w| { - w.write(XmlEvent::Characters(&description.0)) - .map_err(|e| e.into()) - })?; - write_element_block("WebsiteDescription", w, |w| { - w.write(XmlEvent::Characters(&description.1)) - .map_err(|e| e.into()) - })?; - write_element_block("WebsiteLink", w, |w| { - w.write(XmlEvent::Characters(url)).map_err(|e| e.into()) - }) - })?; - } - for contribution in &self.contributions { - // A51 Research by is not supported in ONIX 2 - if contribution.contribution_type != ContributionType::RESEARCH_BY { - XmlElementBlock::::xml_element(contribution, w).ok(); - } - } - for language in &self.languages { - XmlElementBlock::::xml_element(language, w).ok(); + } + for contribution in &self.contributions { + // A51 Research by is not supported in ONIX 2 + if contribution.contribution_type != ContributionType::RESEARCH_BY { + XmlElementBlock::::xml_element(contribution, w).ok(); } - if let Some(page_count) = self.page_count { - write_element_block("Extent", w, |w| { - // 00 Main content - write_element_block("ExtentType", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("ExtentValue", w, |w| { - w.write(XmlEvent::Characters(&page_count.to_string())) - .map_err(|e| e.into()) - })?; - // 03 Pages - write_element_block("ExtentUnit", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - }) + } + for language in &self.languages { + XmlElementBlock::::xml_element(language, w).ok(); + } + if let Some(page_count) = self.page_count { + write_element_block("Extent", w, |w| { + // 00 Main content + write_element_block("ExtentType", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) })?; - } - for subject in &self.subjects { - XmlElementBlock::::xml_element(subject, w).ok(); - } - write_element_block("Audience", w, |w| { - // 01 ONIX audience codes - write_element_block("AudienceCodeType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + write_element_block("ExtentValue", w, |w| { + w.write(XmlEvent::Characters(&page_count.to_string())) + .map_err(|e| e.into()) })?; - // 06 Professional and scholarly - write_element_block("AudienceCodeValue", w, |w| { - w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) + // 03 Pages + write_element_block("ExtentUnit", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) }) })?; - if is_open_access { - write_element_block("OtherText", w, |w| { - // 47 Open access statement - // "Should always be accompanied by a link to the complete license (see code 46)" - // (not specified as required by EBSCO Host themselves) - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("47")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters("Open access - no commercial use")) - .map_err(|e| e.into()) - }) + } + for subject in &self.subjects { + XmlElementBlock::::xml_element(subject, w).ok(); + } + write_element_block("Audience", w, |w| { + // 01 ONIX audience codes + write_element_block("AudienceCodeType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + // 06 Professional and scholarly + write_element_block("AudienceCodeValue", w, |w| { + w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) + }) + })?; + if is_open_access { + write_element_block("OtherText", w, |w| { + // 47 Open access statement + // "Should always be accompanied by a link to the complete license (see code 46)" + // (not specified as required by EBSCO Host themselves) + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("47")).map_err(|e| e.into()) })?; - } - if let Some(license) = &self.license { - write_element_block("OtherText", w, |w| { - // 46 License - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("46")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(license)).map_err(|e| e.into()) - }) + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters("Open access - no commercial use")) + .map_err(|e| e.into()) + }) + })?; + } + if let Some(license) = &self.license { + write_element_block("OtherText", w, |w| { + // 46 License + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("46")).map_err(|e| e.into()) })?; - } - if let Some(labstract) = &self.long_abstract { - write_element_block("OtherText", w, |w| { - // 03 Long description - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - })?; - // 06 Default text format - write_element_block("TextFormat", w, |w| { - w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(labstract)) - .map_err(|e| e.into()) - }) + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(license)).map_err(|e| e.into()) + }) + })?; + } + if let Some(labstract) = &self.long_abstract { + write_element_block("OtherText", w, |w| { + // 03 Long description + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) })?; - } - if let Some(cover_url) = &self.cover_url { - write_element_block("MediaFile", w, |w| { - // 04 Image: front cover - write_element_block("MediaFileTypeCode", w, |w| { - w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) - })?; - // 01 URL - write_element_block("MediaFileLinkTypeCode", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - write_element_block("MediaFileLink", w, |w| { - w.write(XmlEvent::Characters(cover_url)) - .map_err(|e| e.into()) - }) + // 06 Default text format + write_element_block("TextFormat", w, |w| { + w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) })?; - } - write_element_block("Imprint", w, |w| { - write_element_block("ImprintName", w, |w| { - w.write(XmlEvent::Characters(&self.imprint.imprint_name)) + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(labstract)) .map_err(|e| e.into()) }) })?; - write_element_block("Publisher", w, |w| { - // 01 Publisher - write_element_block("PublishingRole", w, |w| { + } + if let Some(cover_url) = &self.cover_url { + write_element_block("MediaFile", w, |w| { + // 04 Image: front cover + write_element_block("MediaFileTypeCode", w, |w| { + w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) + })?; + // 01 URL + write_element_block("MediaFileLinkTypeCode", w, |w| { w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - write_element_block("PublisherName", w, |w| { - w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) + write_element_block("MediaFileLink", w, |w| { + w.write(XmlEvent::Characters(cover_url)) .map_err(|e| e.into()) - })?; - if let Some(publisher_url) = &self.imprint.publisher.publisher_url { - write_element_block("Website", w, |w| { - write_element_block("WebsiteLink", w, |w| { - w.write(XmlEvent::Characters(publisher_url)) - .map_err(|e| e.into()) - }) - })?; - } - Ok(()) + }) })?; - if let Some(place) = &self.place { - write_element_block("CityOfPublication", w, |w| { - w.write(XmlEvent::Characters(place)).map_err(|e| e.into()) - })?; - } - XmlElement::::xml_element(&self.work_status, w)?; - if let Some(date) = self.publication_date { - write_element_block("PublicationDate", w, |w| { - w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) - .map_err(|e| e.into()) - })?; - write_element_block("CopyrightYear", w, |w| { - w.write(XmlEvent::Characters(&date.format("%Y").to_string())) - .map_err(|e| e.into()) + } + write_element_block("Imprint", w, |w| { + write_element_block("ImprintName", w, |w| { + w.write(XmlEvent::Characters(&self.imprint.imprint_name)) + .map_err(|e| e.into()) + }) + })?; + write_element_block("Publisher", w, |w| { + // 01 Publisher + write_element_block("PublishingRole", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("PublisherName", w, |w| { + w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) + .map_err(|e| e.into()) + })?; + if let Some(publisher_url) = &self.imprint.publisher.publisher_url { + write_element_block("Website", w, |w| { + write_element_block("WebsiteLink", w, |w| { + w.write(XmlEvent::Characters(publisher_url)) + .map_err(|e| e.into()) + }) })?; } - write_element_block("SalesRights", w, |w| { - // 02 For sale with non-exclusive rights in the specified countries or territories - write_element_block("SalesRightsType", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) - })?; - write_element_block("RightsTerritory", w, |w| { - w.write(XmlEvent::Characters("WORLD")).map_err(|e| e.into()) - }) + Ok(()) + })?; + if let Some(place) = &self.place { + write_element_block("CityOfPublication", w, |w| { + w.write(XmlEvent::Characters(place)).map_err(|e| e.into()) + })?; + } + XmlElement::::xml_element(&self.work_status, w)?; + if let Some(date) = self.publication_date { + write_element_block("PublicationDate", w, |w| { + w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) + .map_err(|e| e.into()) + })?; + write_element_block("CopyrightYear", w, |w| { + w.write(XmlEvent::Characters(&date.format("%Y").to_string())) + .map_err(|e| e.into()) })?; - if !isbns.is_empty() { - for (publication_type, isbn) in &isbns { - let relation_code = match publication_type { - PublicationType::PAPERBACK | PublicationType::HARDBACK => "13", // Epublication based on (print product) - _ => "06", // Alternative format - }; + } + write_element_block("SalesRights", w, |w| { + // 02 For sale with non-exclusive rights in the specified countries or territories + write_element_block("SalesRightsType", w, |w| { + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) + })?; + write_element_block("RightsTerritory", w, |w| { + w.write(XmlEvent::Characters("WORLD")).map_err(|e| e.into()) + }) + })?; + if !isbns.is_empty() { + for (publication_type, isbn) in &isbns { + let relation_code = match publication_type { + PublicationType::PAPERBACK | PublicationType::HARDBACK => "13", // Epublication based on (print product) + _ => "06", // Alternative format + }; - write_element_block("RelatedProduct", w, |w| { - write_element_block("RelationCode", w, |w| { - w.write(XmlEvent::Characters(relation_code)) - .map_err(|e| e.into()) + write_element_block("RelatedProduct", w, |w| { + write_element_block("RelationCode", w, |w| { + w.write(XmlEvent::Characters(relation_code)) + .map_err(|e| e.into()) + })?; + write_element_block("ProductIdentifier", w, |w| { + // 15 ISBN-13 + write_element_block("ProductIDType", w, |w| { + w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) })?; - write_element_block("ProductIdentifier", w, |w| { - // 15 ISBN-13 - write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) - })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(isbn)).map_err(|e| e.into()) - }) + write_element_block("IDValue", w, |w| { + w.write(XmlEvent::Characters(isbn)).map_err(|e| e.into()) }) - })?; - } - } - if let Some(date) = self.withdrawn_date { - write_element_block("OutofPrintDate", w, |w| { - w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) - .map_err(|e| e.into()) + }) })?; } - write_element_block("SupplyDetail", w, |w| { - write_element_block("SupplierName", w, |w| { - w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) - .map_err(|e| e.into()) - })?; - // 09 Publisher to end-customers - write_element_block("SupplierRole", w, |w| { - w.write(XmlEvent::Characters("09")).map_err(|e| e.into()) + } + if let Some(date) = self.withdrawn_date { + write_element_block("OutofPrintDate", w, |w| { + w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) + .map_err(|e| e.into()) + })?; + } + write_element_block("SupplyDetail", w, |w| { + write_element_block("SupplierName", w, |w| { + w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) + .map_err(|e| e.into()) + })?; + // 09 Publisher to end-customers + write_element_block("SupplierRole", w, |w| { + w.write(XmlEvent::Characters("09")).map_err(|e| e.into()) + })?; + // 99 Contact supplier + write_element_block("ProductAvailability", w, |w| { + w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) + })?; + if is_open_access { + // R Restrictions apply, see note + write_element_block("AudienceRestrictionFlag", w, |w| { + w.write(XmlEvent::Characters("R")).map_err(|e| e.into()) })?; - // 99 Contact supplier - write_element_block("ProductAvailability", w, |w| { - w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) + write_element_block("AudienceRestrictionNote", w, |w| { + w.write(XmlEvent::Characters("Open access")) + .map_err(|e| e.into()) })?; - if is_open_access { - // R Restrictions apply, see note - write_element_block("AudienceRestrictionFlag", w, |w| { - w.write(XmlEvent::Characters("R")).map_err(|e| e.into()) + } + let publication = match pdf_url.is_some() { + true => pdf_publication, + false => epub_publication, + }; + let prices = publication.map(|p| p.prices.clone()).unwrap_or_default(); + if is_open_access || prices.is_empty() { + // EBSCO Host require the price point for unpriced/Open Access titles to be listed as "0.01 USD". + write_element_block("Price", w, |w| { + // 01 RRP excluding tax (price code requested by EBSCO) + write_element_block("PriceTypeCode", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - write_element_block("AudienceRestrictionNote", w, |w| { - w.write(XmlEvent::Characters("Open access")) - .map_err(|e| e.into()) + write_element_block("PriceAmount", w, |w| { + w.write(XmlEvent::Characters("0.01")).map_err(|e| e.into()) })?; - } - let publication = match pdf_url.is_some() { - true => pdf_publication, - false => epub_publication, - }; - let prices = publication.map(|p| p.prices.clone()).unwrap_or_default(); - if is_open_access || prices.is_empty() { - // EBSCO Host require the price point for unpriced/Open Access titles to be listed as "0.01 USD". + write_element_block("CurrencyCode", w, |w| { + w.write(XmlEvent::Characters("USD")).map_err(|e| e.into()) + }) + }) + } else { + for price in prices { + let unit_price = price.unit_price; + let formatted_price = format!("{unit_price:.2}"); write_element_block("Price", w, |w| { // 01 RRP excluding tax (price code requested by EBSCO) write_element_block("PriceTypeCode", w, |w| { w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; write_element_block("PriceAmount", w, |w| { - w.write(XmlEvent::Characters("0.01")).map_err(|e| e.into()) + w.write(XmlEvent::Characters(&formatted_price)) + .map_err(|e| e.into()) })?; write_element_block("CurrencyCode", w, |w| { - w.write(XmlEvent::Characters("USD")).map_err(|e| e.into()) + w.write(XmlEvent::Characters(&price.currency_code.to_string())) + .map_err(|e| e.into()) }) - }) - } else { - for price in prices { - let unit_price = price.unit_price; - let formatted_price = format!("{unit_price:.2}"); - write_element_block("Price", w, |w| { - // 01 RRP excluding tax (price code requested by EBSCO) - write_element_block("PriceTypeCode", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - write_element_block("PriceAmount", w, |w| { - w.write(XmlEvent::Characters(&formatted_price)) - .map_err(|e| e.into()) - })?; - write_element_block("CurrencyCode", w, |w| { - w.write(XmlEvent::Characters(&price.currency_code.to_string())) - .map_err(|e| e.into()) - }) - })?; - } - Ok(()) + })?; } - }) + Ok(()) + } }) - } + }) } } diff --git a/thoth-export-server/src/xml/onix21_proquest_ebrary.rs b/thoth-export-server/src/xml/onix21_proquest_ebrary.rs index 3f7824a5b..cb24b2e00 100644 --- a/thoth-export-server/src/xml/onix21_proquest_ebrary.rs +++ b/thoth-export-server/src/xml/onix21_proquest_ebrary.rs @@ -88,373 +88,372 @@ impl XmlElementBlock for Work { !(pdf_publication.is_some_and(|p| p.prices.is_empty()) || epub_publication.is_some_and(|p| p.prices.is_empty())) { - Err(ThothError::IncompleteMetadataRecord( + return Err(ThothError::IncompleteMetadataRecord( ONIX_ERROR.to_string(), "No unpriced PDF or EPUB URL (must be supplied for OA works)".to_string(), - )) - } else { - let work_id = format!("urn:uuid:{}", self.work_id); - let (main_isbn, isbns) = get_publications_data(&self.publications); - write_element_block("Product", w, |w| { - write_element_block("RecordReference", w, |w| { + )); + } + let work_id = format!("urn:uuid:{}", self.work_id); + let (main_isbn, isbns) = get_publications_data(&self.publications); + write_element_block("Product", w, |w| { + write_element_block("RecordReference", w, |w| { + w.write(XmlEvent::Characters(&work_id)) + .map_err(|e| e.into()) + })?; + // 03 Notification confirmed on publication + write_element_block("NotificationType", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + })?; + // 01 Publisher + write_element_block("RecordSourceType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("ProductIdentifier", w, |w| { + // 01 Proprietary + write_element_block("ProductIDType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("IDValue", w, |w| { w.write(XmlEvent::Characters(&work_id)) .map_err(|e| e.into()) + }) + })?; + write_element_block("ProductIdentifier", w, |w| { + // 15 ISBN-13 + write_element_block("ProductIDType", w, |w| { + w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) })?; - // 03 Notification confirmed on publication - write_element_block("NotificationType", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - })?; - // 01 Publisher - write_element_block("RecordSourceType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; + write_element_block("IDValue", w, |w| { + w.write(XmlEvent::Characters(&main_isbn)) + .map_err(|e| e.into()) + }) + })?; + if let Some(doi) = &self.doi { write_element_block("ProductIdentifier", w, |w| { - // 01 Proprietary write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) })?; write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&work_id)) + w.write(XmlEvent::Characters(&doi.to_string())) .map_err(|e| e.into()) }) })?; - write_element_block("ProductIdentifier", w, |w| { - // 15 ISBN-13 - write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) - })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&main_isbn)) - .map_err(|e| e.into()) - }) + } + // DG Electronic book text in proprietary or open standard format + write_element_block("ProductForm", w, |w| { + w.write(XmlEvent::Characters("DG")).map_err(|e| e.into()) + })?; + write_element_block("EpubType", w, |w| { + // 002 PDF + let mut epub_type = "002"; + // We definitely have either a PDF URL or an EPUB URL (or both) + if pdf_url.is_none() { + // 029 EPUB + epub_type = "029"; + } + w.write(XmlEvent::Characters(epub_type)) + .map_err(|e| e.into()) + })?; + for issue in &self.issues { + XmlElementBlock::::xml_element(issue, w).ok(); + } + write_element_block("Title", w, |w| { + // 01 Distinctive title (book) + write_element_block("TitleType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - if let Some(doi) = &self.doi { - write_element_block("ProductIdentifier", w, |w| { - write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) - })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&doi.to_string())) - .map_err(|e| e.into()) - }) + write_element_block("TitleText", w, |w| { + w.write(XmlEvent::Characters(&self.title)) + .map_err(|e| e.into()) + })?; + if let Some(subtitle) = &self.subtitle { + write_element_block("Subtitle", w, |w| { + w.write(XmlEvent::Characters(subtitle)) + .map_err(|e| e.into()) })?; } - // DG Electronic book text in proprietary or open standard format - write_element_block("ProductForm", w, |w| { - w.write(XmlEvent::Characters("DG")).map_err(|e| e.into()) + Ok(()) + })?; + write_element_block("WorkIdentifier", w, |w| { + // 01 Proprietary + write_element_block("WorkIDType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - write_element_block("EpubType", w, |w| { - // 002 PDF - let mut epub_type = "002"; - // We definitely have either a PDF URL or an EPUB URL (or both) - if pdf_url.is_none() { - // 029 EPUB - epub_type = "029"; - } - w.write(XmlEvent::Characters(epub_type)) + write_element_block("IDTypeName", w, |w| { + w.write(XmlEvent::Characters("Thoth WorkID")) .map_err(|e| e.into()) })?; - for issue in &self.issues { - XmlElementBlock::::xml_element(issue, w).ok(); - } - write_element_block("Title", w, |w| { - // 01 Distinctive title (book) - write_element_block("TitleType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + write_element_block("IDValue", w, |w| { + w.write(XmlEvent::Characters(&work_id)) + .map_err(|e| e.into()) + }) + })?; + let mut websites: HashMap = HashMap::new(); + if let Some(pdf) = pdf_url { + websites.insert( + pdf.to_string(), + ( + "29".to_string(), + "Publisher's website: download the title".to_string(), + ), + ); + } + if let Some(epub) = epub_url { + websites.insert( + epub.to_string(), + ( + "29".to_string(), + "Publisher's website: download the title".to_string(), + ), + ); + } + if let Some(landing_page) = &self.landing_page { + websites.insert( + landing_page.to_string(), + ( + "02".to_string(), + "Publisher's website: web shop".to_string(), + ), + ); + } + for (url, description) in websites.iter() { + write_element_block("Website", w, |w| { + write_element_block("WebsiteRole", w, |w| { + w.write(XmlEvent::Characters(&description.0)) + .map_err(|e| e.into()) })?; - write_element_block("TitleText", w, |w| { - w.write(XmlEvent::Characters(&self.title)) + write_element_block("WebsiteDescription", w, |w| { + w.write(XmlEvent::Characters(&description.1)) .map_err(|e| e.into()) })?; - if let Some(subtitle) = &self.subtitle { - write_element_block("Subtitle", w, |w| { - w.write(XmlEvent::Characters(subtitle)) - .map_err(|e| e.into()) - })?; - } - Ok(()) + write_element_block("WebsiteLink", w, |w| { + w.write(XmlEvent::Characters(url)).map_err(|e| e.into()) + }) })?; - write_element_block("WorkIdentifier", w, |w| { - // 01 Proprietary - write_element_block("WorkIDType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + } + for contribution in &self.contributions { + // A51 Research by is not supported in ONIX 2 + if contribution.contribution_type != ContributionType::RESEARCH_BY { + XmlElementBlock::::xml_element(contribution, w).ok(); + } + } + for language in &self.languages { + XmlElementBlock::::xml_element(language, w).ok(); + } + if let Some(page_count) = self.page_count { + write_element_block("Extent", w, |w| { + // 00 Main content + write_element_block("ExtentType", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) })?; - write_element_block("IDTypeName", w, |w| { - w.write(XmlEvent::Characters("Thoth WorkID")) + write_element_block("ExtentValue", w, |w| { + w.write(XmlEvent::Characters(&page_count.to_string())) .map_err(|e| e.into()) })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(&work_id)) - .map_err(|e| e.into()) + // 03 Pages + write_element_block("ExtentUnit", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) }) })?; - let mut websites: HashMap = HashMap::new(); - if let Some(pdf) = pdf_url { - websites.insert( - pdf.to_string(), - ( - "29".to_string(), - "Publisher's website: download the title".to_string(), - ), - ); - } - if let Some(epub) = epub_url { - websites.insert( - epub.to_string(), - ( - "29".to_string(), - "Publisher's website: download the title".to_string(), - ), - ); - } - if let Some(landing_page) = &self.landing_page { - websites.insert( - landing_page.to_string(), - ( - "02".to_string(), - "Publisher's website: web shop".to_string(), - ), - ); - } - for (url, description) in websites.iter() { - write_element_block("Website", w, |w| { - write_element_block("WebsiteRole", w, |w| { - w.write(XmlEvent::Characters(&description.0)) - .map_err(|e| e.into()) - })?; - write_element_block("WebsiteDescription", w, |w| { - w.write(XmlEvent::Characters(&description.1)) - .map_err(|e| e.into()) - })?; - write_element_block("WebsiteLink", w, |w| { - w.write(XmlEvent::Characters(url)).map_err(|e| e.into()) - }) + } + for subject in &self.subjects { + XmlElementBlock::::xml_element(subject, w).ok(); + } + write_element_block("Audience", w, |w| { + // 01 ONIX audience codes + write_element_block("AudienceCodeType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + // 06 Professional and scholarly + write_element_block("AudienceCodeValue", w, |w| { + w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) + }) + })?; + if is_open_access { + write_element_block("OtherText", w, |w| { + // 47 Open access statement + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("47")).map_err(|e| e.into()) })?; - } - for contribution in &self.contributions { - // A51 Research by is not supported in ONIX 2 - if contribution.contribution_type != ContributionType::RESEARCH_BY { - XmlElementBlock::::xml_element(contribution, w).ok(); - } - } - for language in &self.languages { - XmlElementBlock::::xml_element(language, w).ok(); - } - if let Some(page_count) = self.page_count { - write_element_block("Extent", w, |w| { - // 00 Main content - write_element_block("ExtentType", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("ExtentValue", w, |w| { - w.write(XmlEvent::Characters(&page_count.to_string())) - .map_err(|e| e.into()) - })?; - // 03 Pages - write_element_block("ExtentUnit", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - }) + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters("Open access - no commercial use")) + .map_err(|e| e.into()) + }) + })?; + } + if let Some(license) = &self.license { + write_element_block("OtherText", w, |w| { + // 46 License + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("46")).map_err(|e| e.into()) })?; - } - for subject in &self.subjects { - XmlElementBlock::::xml_element(subject, w).ok(); - } - write_element_block("Audience", w, |w| { - // 01 ONIX audience codes - write_element_block("AudienceCodeType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(license)).map_err(|e| e.into()) + }) + })?; + } + if let Some(labstract) = &self.long_abstract { + write_element_block("OtherText", w, |w| { + // 03 Long description + write_element_block("TextTypeCode", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) })?; - // 06 Professional and scholarly - write_element_block("AudienceCodeValue", w, |w| { + // 06 Default text format + write_element_block("TextFormat", w, |w| { w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(labstract)) + .map_err(|e| e.into()) }) })?; - if is_open_access { - write_element_block("OtherText", w, |w| { - // 47 Open access statement - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("47")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters("Open access - no commercial use")) - .map_err(|e| e.into()) - }) + } + if let Some(cover_url) = &self.cover_url { + write_element_block("MediaFile", w, |w| { + // 04 Image: front cover + write_element_block("MediaFileTypeCode", w, |w| { + w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) })?; - } - if let Some(license) = &self.license { - write_element_block("OtherText", w, |w| { - // 46 License - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("46")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(license)).map_err(|e| e.into()) - }) + // 01 URL + write_element_block("MediaFileLinkTypeCode", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - } - if let Some(labstract) = &self.long_abstract { - write_element_block("OtherText", w, |w| { - // 03 Long description - write_element_block("TextTypeCode", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - })?; - // 06 Default text format - write_element_block("TextFormat", w, |w| { - w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(labstract)) + write_element_block("MediaFileLink", w, |w| { + w.write(XmlEvent::Characters(cover_url)) + .map_err(|e| e.into()) + }) + })?; + } + write_element_block("Imprint", w, |w| { + write_element_block("ImprintName", w, |w| { + w.write(XmlEvent::Characters(&self.imprint.imprint_name)) + .map_err(|e| e.into()) + }) + })?; + write_element_block("Publisher", w, |w| { + // 01 Publisher + write_element_block("PublishingRole", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + })?; + write_element_block("PublisherName", w, |w| { + w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) + .map_err(|e| e.into()) + })?; + if let Some(publisher_url) = &self.imprint.publisher.publisher_url { + write_element_block("Website", w, |w| { + write_element_block("WebsiteLink", w, |w| { + w.write(XmlEvent::Characters(publisher_url)) .map_err(|e| e.into()) }) })?; } - if let Some(cover_url) = &self.cover_url { - write_element_block("MediaFile", w, |w| { - // 04 Image: front cover - write_element_block("MediaFileTypeCode", w, |w| { - w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) - })?; - // 01 URL - write_element_block("MediaFileLinkTypeCode", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) + Ok(()) + })?; + if let Some(place) = &self.place { + write_element_block("CityOfPublication", w, |w| { + w.write(XmlEvent::Characters(place)).map_err(|e| e.into()) + })?; + } + XmlElement::::xml_element(&self.work_status, w)?; + if let Some(date) = self.publication_date { + write_element_block("PublicationDate", w, |w| { + w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) + .map_err(|e| e.into()) + })?; + write_element_block("CopyrightYear", w, |w| { + w.write(XmlEvent::Characters(&date.format("%Y").to_string())) + .map_err(|e| e.into()) + })?; + } + write_element_block("SalesRights", w, |w| { + // 02 For sale with non-exclusive rights in the specified countries or territories + write_element_block("SalesRightsType", w, |w| { + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) + })?; + write_element_block("RightsTerritory", w, |w| { + w.write(XmlEvent::Characters("WORLD")).map_err(|e| e.into()) + }) + })?; + if !isbns.is_empty() { + for isbn in &isbns { + write_element_block("RelatedProduct", w, |w| { + // 06 Alternative format + write_element_block("RelationCode", w, |w| { + w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) })?; - write_element_block("MediaFileLink", w, |w| { - w.write(XmlEvent::Characters(cover_url)) - .map_err(|e| e.into()) + write_element_block("ProductIdentifier", w, |w| { + // 15 ISBN-13 + write_element_block("ProductIDType", w, |w| { + w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) + })?; + write_element_block("IDValue", w, |w| { + w.write(XmlEvent::Characters(isbn)).map_err(|e| e.into()) + }) }) })?; } - write_element_block("Imprint", w, |w| { - write_element_block("ImprintName", w, |w| { - w.write(XmlEvent::Characters(&self.imprint.imprint_name)) - .map_err(|e| e.into()) - }) + } + if let Some(date) = self.withdrawn_date { + write_element_block("OutofPrintDate", w, |w| { + w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) + .map_err(|e| e.into()) })?; - write_element_block("Publisher", w, |w| { - // 01 Publisher - write_element_block("PublishingRole", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - })?; - write_element_block("PublisherName", w, |w| { - w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) - .map_err(|e| e.into()) - })?; - if let Some(publisher_url) = &self.imprint.publisher.publisher_url { - write_element_block("Website", w, |w| { - write_element_block("WebsiteLink", w, |w| { - w.write(XmlEvent::Characters(publisher_url)) - .map_err(|e| e.into()) - }) - })?; - } - Ok(()) + } + write_element_block("SupplyDetail", w, |w| { + write_element_block("SupplierName", w, |w| { + w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) + .map_err(|e| e.into()) })?; - if let Some(place) = &self.place { - write_element_block("CityOfPublication", w, |w| { - w.write(XmlEvent::Characters(place)).map_err(|e| e.into()) - })?; - } - XmlElement::::xml_element(&self.work_status, w)?; - if let Some(date) = self.publication_date { - write_element_block("PublicationDate", w, |w| { - w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) - .map_err(|e| e.into()) + // 09 Publisher to end-customers + write_element_block("SupplierRole", w, |w| { + w.write(XmlEvent::Characters("09")).map_err(|e| e.into()) + })?; + // 99 Contact supplier + write_element_block("ProductAvailability", w, |w| { + w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) + })?; + if is_open_access { + // R Restrictions apply, see note + write_element_block("AudienceRestrictionFlag", w, |w| { + w.write(XmlEvent::Characters("R")).map_err(|e| e.into()) })?; - write_element_block("CopyrightYear", w, |w| { - w.write(XmlEvent::Characters(&date.format("%Y").to_string())) + write_element_block("AudienceRestrictionNote", w, |w| { + w.write(XmlEvent::Characters("Open access")) .map_err(|e| e.into()) })?; } - write_element_block("SalesRights", w, |w| { - // 02 For sale with non-exclusive rights in the specified countries or territories - write_element_block("SalesRightsType", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) - })?; - write_element_block("RightsTerritory", w, |w| { - w.write(XmlEvent::Characters("WORLD")).map_err(|e| e.into()) + let publication = match pdf_url.is_some() { + true => pdf_publication, + false => epub_publication, + }; + let prices = publication.map(|p| p.prices.clone()).unwrap_or_default(); + if is_open_access || prices.is_empty() { + write_element_block("UnpricedItemType", w, |w| { + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) }) - })?; - if !isbns.is_empty() { - for isbn in &isbns { - write_element_block("RelatedProduct", w, |w| { - // 06 Alternative format - write_element_block("RelationCode", w, |w| { - w.write(XmlEvent::Characters("06")).map_err(|e| e.into()) + } else { + for price in prices { + let unit_price = price.unit_price; + let formatted_price = format!("{unit_price:.2}"); + write_element_block("Price", w, |w| { + // 02 RRP including tax + write_element_block("PriceTypeCode", w, |w| { + w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) + })?; + write_element_block("PriceAmount", w, |w| { + w.write(XmlEvent::Characters(&formatted_price)) + .map_err(|e| e.into()) })?; - write_element_block("ProductIdentifier", w, |w| { - // 15 ISBN-13 - write_element_block("ProductIDType", w, |w| { - w.write(XmlEvent::Characters("15")).map_err(|e| e.into()) - })?; - write_element_block("IDValue", w, |w| { - w.write(XmlEvent::Characters(isbn)).map_err(|e| e.into()) - }) + write_element_block("CurrencyCode", w, |w| { + w.write(XmlEvent::Characters(&price.currency_code.to_string())) + .map_err(|e| e.into()) }) })?; } + Ok(()) } - if let Some(date) = self.withdrawn_date { - write_element_block("OutofPrintDate", w, |w| { - w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) - .map_err(|e| e.into()) - })?; - } - write_element_block("SupplyDetail", w, |w| { - write_element_block("SupplierName", w, |w| { - w.write(XmlEvent::Characters(&self.imprint.publisher.publisher_name)) - .map_err(|e| e.into()) - })?; - // 09 Publisher to end-customers - write_element_block("SupplierRole", w, |w| { - w.write(XmlEvent::Characters("09")).map_err(|e| e.into()) - })?; - // 99 Contact supplier - write_element_block("ProductAvailability", w, |w| { - w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) - })?; - if is_open_access { - // R Restrictions apply, see note - write_element_block("AudienceRestrictionFlag", w, |w| { - w.write(XmlEvent::Characters("R")).map_err(|e| e.into()) - })?; - write_element_block("AudienceRestrictionNote", w, |w| { - w.write(XmlEvent::Characters("Open access")) - .map_err(|e| e.into()) - })?; - } - let publication = match pdf_url.is_some() { - true => pdf_publication, - false => epub_publication, - }; - let prices = publication.map(|p| p.prices.clone()).unwrap_or_default(); - if is_open_access || prices.is_empty() { - write_element_block("UnpricedItemType", w, |w| { - w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) - }) - } else { - for price in prices { - let unit_price = price.unit_price; - let formatted_price = format!("{unit_price:.2}"); - write_element_block("Price", w, |w| { - // 02 RRP including tax - write_element_block("PriceTypeCode", w, |w| { - w.write(XmlEvent::Characters("02")).map_err(|e| e.into()) - })?; - write_element_block("PriceAmount", w, |w| { - w.write(XmlEvent::Characters(&formatted_price)) - .map_err(|e| e.into()) - })?; - write_element_block("CurrencyCode", w, |w| { - w.write(XmlEvent::Characters(&price.currency_code.to_string())) - .map_err(|e| e.into()) - }) - })?; - } - Ok(()) - } - }) }) - } + }) } } diff --git a/thoth-export-server/src/xml/onix3_google_books.rs b/thoth-export-server/src/xml/onix3_google_books.rs index a02e1fb4b..f62af98bc 100644 --- a/thoth-export-server/src/xml/onix3_google_books.rs +++ b/thoth-export-server/src/xml/onix3_google_books.rs @@ -68,7 +68,6 @@ impl XmlElementBlock for Work { fn xml_element(&self, w: &mut EventWriter) -> ThothResult<()> { // Don't output works with no BIC, BISAC or LCC subject code // Google Books can only ingest works which have at least one - if !self.subjects.iter().any(|s| { matches!( s.subject_type, From 499d724c49a4314f8523b953d3d675119fa35c90 Mon Sep 17 00:00:00 2001 From: rhigman <73792779+rhigman@users.noreply.github.com> Date: Mon, 16 Sep 2024 16:43:33 +0100 Subject: [PATCH 6/7] Don't output Crossref free_to_read tag if no licence supplied --- .../src/xml/doideposit_crossref.rs | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/thoth-export-server/src/xml/doideposit_crossref.rs b/thoth-export-server/src/xml/doideposit_crossref.rs index d90d52334..c98b89b2e 100644 --- a/thoth-export-server/src/xml/doideposit_crossref.rs +++ b/thoth-export-server/src/xml/doideposit_crossref.rs @@ -529,20 +529,20 @@ fn write_access_content( license: &Option, w: &mut EventWriter, ) -> ThothResult<()> { - write_full_element_block( - "ai:program", - Some(vec![("name", "AccessIndicators")]), - w, - |w| { - write_element_block("ai:free_to_read", w, |_w| Ok(()))?; - if let Some(license) = license { + // Assume works without licences are non-OA + if let Some(license) = license { + write_full_element_block( + "ai:program", + Some(vec![("name", "AccessIndicators")]), + w, + |w| { + write_element_block("ai:free_to_read", w, |_w| Ok(()))?; write_element_block("ai:license_ref", w, |w| { w.write(XmlEvent::Characters(license)).map_err(|e| e.into()) - })?; - } - Ok(()) - }, - )?; + }) + }, + )?; + } Ok(()) } @@ -1400,9 +1400,9 @@ mod tests { assert!(!output.contains(r#" 02"#)); assert!(!output.contains(r#" 28"#)); assert!(!output.contains(r#" 2000"#)); - // No licence supplied - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" "#)); + // No licence supplied: assume non-OA + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" "#)); assert!(!output.contains( r#" https://creativecommons.org/licenses/by/4.0/"# )); @@ -2001,9 +2001,9 @@ mod tests { assert!(!output.contains(r#" 978-1-4028-9462-6"#)); // No place supplied assert!(!output.contains(r#" León, Spain"#)); - // No licence supplied - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" "#)); + // No licence supplied: assume non-OA + assert!(!output.contains(r#" "#)); + assert!(!output.contains(r#" "#)); assert!(!output.contains( r#" https://creativecommons.org/licenses/by/4.0/"# )); From 376b2ef70e293ed5c40009e7f6271c3863177c9e Mon Sep 17 00:00:00 2001 From: rhigman <73792779+rhigman@users.noreply.github.com> Date: Mon, 16 Sep 2024 16:47:46 +0100 Subject: [PATCH 7/7] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ee583e9ae..6522243da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [551](https://github.com/thoth-pub/thoth/issues/551) - Only include chapters in Crossref metadata output if they have DOIs - [628](https://github.com/thoth-pub/thoth/pull/628) - Upgrade `paperclip` to v0.9.1 - [628](https://github.com/thoth-pub/thoth/pull/628) - Upgrade rust to `1.81.0` in production and development `Dockerfile` + - [544](https://github.com/thoth-pub/thoth/issues/544) - Implement non-OA metadata in export outputs ### Fixed - [565](https://github.com/thoth-pub/thoth/issues/565) - Don't generate Crossref metadata output if no DOIs (work or chapter) are present