Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/404 447 prevent output with no bisac #567

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
### Changed
- [499](https://github.com/thoth-pub/thoth/issues/499) - Default main\_contribution to true

### Fixed
- [447](https://github.com/thoth-pub/thoth/issues/447) - Prevents Google Books Onix3 format output from Export API if Thoth record doesn't contain at least one BIC, BISAC or LCC subject code
- [404](https://github.com/thoth-pub/thoth/issues/404) - Prevents JSTOR Onix3 format output from Export API if Thoth record doesn't contain at least one BISAC subject code

## [[0.11.16]](https://github.com/thoth-pub/thoth/releases/tag/v0.11.16) - 2024-02-19
### Changed
- [499](https://github.com/thoth-pub/thoth/issues/499) - Default main\_contribution to true
- [561](https://github.com/thoth-pub/thoth/issues/561) - Add "Publisher Website" as a location platform
- [553](https://github.com/thoth-pub/thoth/pull/553) - Upgrade rust to `1.76.0` in production and development `Dockerfile`
- [305](https://github.com/thoth-pub/thoth/issues/305) - Update rust edition to 2021
Expand Down
57 changes: 38 additions & 19 deletions thoth-export-server/src/xml/onix3_google_books.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,20 +66,36 @@ impl XmlSpecification for Onix3GoogleBooks {

impl XmlElementBlock<Onix3GoogleBooks> for Work {
fn xml_element<W: Write>(&self, w: &mut EventWriter<W>) -> ThothResult<()> {
// Don't output works with no BIC, BISAC or LCC subject code
// Google Books can only ingest works which have at least one

if !self.subjects.iter().any(|s| {
matches!(
s.subject_type,
SubjectType::BISAC | SubjectType::BIC | SubjectType::LCC
)
}) {
return Err(ThothError::IncompleteMetadataRecord(
ONIX_ERROR.to_string(),
"No BIC, BISAC or LCC subject code".to_string(),
));
}
// Don't output works with no publication date (mandatory in Google Books)
if self.publication_date.is_none() {
Err(ThothError::IncompleteMetadataRecord(
return Err(ThothError::IncompleteMetadataRecord(
ONIX_ERROR.to_string(),
"Missing Publication Date".to_string(),
))
));
}
// Don't output works with no contributors (at least one required for Google Books)
} else if self.contributions.is_empty() {
Err(ThothError::IncompleteMetadataRecord(
if self.contributions.is_empty() {
return Err(ThothError::IncompleteMetadataRecord(
ONIX_ERROR.to_string(),
"No contributors supplied".to_string(),
))
));
}
// We can only generate the document if there's an EPUB or PDF
} else if let Some(main_publication) = self
if let Some(main_publication) = self
.publications
.iter()
// For preference, distribute the EPUB only
Expand Down Expand Up @@ -1050,7 +1066,6 @@ mod tests {
test_work.long_abstract = None;
test_work.publications[0].prices.pop();
test_work.publications[0].publication_type = PublicationType::EPUB;
test_work.subjects.clear();
let output = generate_test_output(true, &test_work);
// Ebook type changed
assert!(!output.contains(r#" <ProductFormDetail>E107</ProductFormDetail>"#));
Expand All @@ -1077,17 +1092,6 @@ mod tests {
assert!(!output.contains(r#" <Territory>"#));
assert!(!output.contains(r#" <RegionsIncluded>WORLD</RegionsIncluded>"#));
assert!(output.contains(r#" <UnpricedItemType>01</UnpricedItemType>"#));
// No subjects supplied
assert!(!output.contains(r#" <Subject>"#));
assert!(!output.contains(r#" <SubjectSchemeIdentifier>12</SubjectSchemeIdentifier>"#));
assert!(!output.contains(r#" <SubjectCode>AAB</SubjectCode>"#));
assert!(!output.contains(r#" <SubjectSchemeIdentifier>10</SubjectSchemeIdentifier>"#));
assert!(!output.contains(r#" <SubjectCode>AAA000000</SubjectCode>"#));
assert!(!output.contains(r#" <SubjectSchemeIdentifier>04</SubjectSchemeIdentifier>"#));
assert!(!output.contains(r#" <SubjectCode>JA85</SubjectCode>"#));
assert!(!output.contains(r#" <SubjectSchemeIdentifier>23</SubjectSchemeIdentifier>"#));
assert!(!output.contains(r#" <SubjectCode>keyword1</SubjectCode>"#));
assert!(!output.contains(r#" <SubjectCode>custom1</SubjectCode>"#));

// Replace long abstract but remove table of contents
// Result: CollateralDetail block still present, but now only contains long abstract
Expand All @@ -1114,7 +1118,22 @@ mod tests {
assert!(!output.contains(r#" <TextType>04</TextType>"#));
assert!(!output.contains(r#" <Text language="eng">1. Chapter 1</Text>"#));

// Remove publication date: result is error
// Remove all subjects
// Result: error (can't generate Google Books ONIX without a BIC, BISAC, or LCC subject)
test_work.subjects.clear();
let output = generate_test_output(false, &test_work);
assert_eq!(
output,
"Could not generate onix_3.0::google_books: No BIC, BISAC or LCC subject code"
.to_string()
);

// Reinstate the BIC subject but remove publication date: result is error
test_work.subjects = vec![WorkSubjects {
subject_code: "AAB".to_string(),
subject_type: SubjectType::BIC,
subject_ordinal: 1,
}];
test_work.publication_date = None;
let output = generate_test_output(false, &test_work);
assert_eq!(
Expand Down
47 changes: 29 additions & 18 deletions thoth-export-server/src/xml/onix3_jstor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,18 @@ impl XmlSpecification for Onix3Jstor {

impl XmlElementBlock<Onix3Jstor> for Work {
fn xml_element<W: Write>(&self, w: &mut EventWriter<W>) -> ThothResult<()> {
// Don't output works with no BISAC subject code
// JSTOR can only ingest works which have at least one
if !self
.subjects
.iter()
.any(|s| matches!(s.subject_type, SubjectType::BISAC))
{
return Err(ThothError::IncompleteMetadataRecord(
ONIX_ERROR.to_string(),
"No BISAC subject code".to_string(),
));
}
// We can only generate the document if there's a PDF
if let Some(pdf_url) = self
.publications
Expand Down Expand Up @@ -969,7 +981,6 @@ mod tests {
test_work.place = None;
test_work.publication_date = None;
test_work.landing_page = None;
test_work.subjects.clear();
test_work.publications[0].publication_type = PublicationType::XML;
let output = generate_test_output(true, &test_work);
// No DOI supplied
Expand Down Expand Up @@ -1012,30 +1023,30 @@ mod tests {
r#" <WebsiteDescription>Publisher's website: web shop</WebsiteDescription>"#
));
assert!(!output.contains(r#" <WebsiteLink>https://www.book.com</WebsiteLink>"#));
// No subjects supplied
assert!(!output.contains(r#" <Subject>"#));
assert!(!output.contains(r#" <SubjectSchemeIdentifier>12</SubjectSchemeIdentifier>"#));
assert!(!output.contains(r#" <SubjectCode>AAB</SubjectCode>"#));
assert!(!output.contains(r#" <SubjectSchemeIdentifier>10</SubjectSchemeIdentifier>"#));
assert!(!output.contains(r#" <SubjectCode>AAA000000</SubjectCode>"#));
assert!(!output.contains(r#" <SubjectSchemeIdentifier>04</SubjectSchemeIdentifier>"#));
assert!(!output.contains(r#" <SubjectCode>JA85</SubjectCode>"#));
assert!(!output.contains(r#" <SubjectSchemeIdentifier>93</SubjectSchemeIdentifier>"#));
assert!(!output.contains(r#" <SubjectCode>JWA</SubjectCode>"#));
assert!(!output.contains(r#" <SubjectSchemeIdentifier>20</SubjectSchemeIdentifier>"#));
assert!(!output.contains(r#" <SubjectCode>keyword1</SubjectCode>"#));
assert!(!output.contains(r#" <SubjectSchemeIdentifier>B2</SubjectSchemeIdentifier>"#));
assert!(!output.contains(r#" <SubjectCode>custom1</SubjectCode>"#));
// No print ISBN supplied
assert!(!output.contains(r#" <RelatedProduct>"#));
assert!(!output.contains(r#" <ProductRelationCode>13</ProductRelationCode>"#));
assert!(!output.contains(r#" <ProductIdentifier>"#));
assert!(!output.contains(r#" <ProductIDType>15</ProductIDType>"#));
assert!(!output.contains(r#" <IDValue>9781402894626</IDValue>"#));

// Remove the last publication, which is the PDF
// Result: error (can't generate OAPEN ONIX without PDF URL)
test_work.publications.pop();
// Remove all subjects
// Result: error (can't generate JSTOR ONIX without a BISAC subject)
test_work.subjects.clear();
let output = generate_test_output(false, &test_work);
assert_eq!(
output,
"Could not generate onix_3.0::jstor: No BISAC subject code".to_string()
);

// Reinstate the BISAC subject but remove the only publication, which is the PDF
// Result: error (can't generate JSTOR ONIX without PDF URL)
test_work.subjects = vec![WorkSubjects {
subject_code: "AAA000000".to_string(),
subject_type: SubjectType::BISAC,
subject_ordinal: 1,
}];
test_work.publications.clear();
let output = generate_test_output(false, &test_work);
assert_eq!(
output,
Expand Down
Loading