Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SFR-2308_PubBacklistMapping #460

Merged
merged 6 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions mappings/UofM.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,6 @@ def formatSubjects(self):
return subjectList

def formatRights(self):
'''
The pipe delimiter is to separate the Rights table attributes into this format:
source|license|reason|statement|date
which makes it easy to place the right data into the columns when clustered
'''

if not self.record.rights:
return None

Expand All @@ -102,9 +96,3 @@ def formatRights(self):
return 'UofM|{}||{}|'.format('public_domain', 'Public Domain')

return None






96 changes: 96 additions & 0 deletions mappings/publisher_backlist.py
mitri-slory marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from .json import JSONMapping

class PublisherBacklistMapping(JSONMapping):
def __init__(self, source):
super().__init__(source, {})
self.mapping = self.createMapping()

def createMapping(self):
return {
'title': ('Title', '{0}'),
'authors': ('Author(s)', '{0}'),
'dates': [('Pub Date', '{0}|publication_date')],
'publisher': [('Publisher (from Projects)', '{0}||')],
'identifiers': [
('ISBN', '{0}|isbn'),
('OCLC', '{0}|oclc')
],
'rights': ('DRB Rights Classification', '{0}||||'),
'contributors': [('Contributors', '{0}|||contributor')],
'subjects': ('Subject 1', '{0}'),
'source': ('Project Name (from Projects)', '{0}'),
'source_id': ('DRB Record_ID', '{0}'),
'publisher_project_source': ('Publisher (from Projects)', '{0}')
}

def applyFormatting(self):
self.record.has_part = []
if self.record.source:
source_list = self.record.source[0].split(' ')
self.record.source = source_list[0]

if self.record.publisher_project_source:
publisher_source = self.record.publisher_project_source[0]
self.record.publisher_project_source = publisher_source

if self.record.authors:
self.record.authors = self.format_authors()

if self.record.subjects:
self.record.subjects = self.format_subjects()

if self.record.identifiers:
mitri-slory marked this conversation as resolved.
Show resolved Hide resolved
self.record.identifiers = self.format_identifiers()

self.record.rights = self.format_rights()

def format_authors(self):
author_list = []

if ';' in self.record.authors:
author_list = self.record.authors.split('; ')
new_author_list = [f'{author}|||true' for author in author_list]
return new_author_list
else:
author_list.append(f'{self.record.authors}|||true)')
return author_list


def format_identifiers(self):
if 'isbn' in self.record.identifiers[0]:
isbn_string = self.record.identifiers[0].split('|')[0]
if ';' in isbn_string:
isbns = isbn_string.split('; ')
formatted_isbns = [f'{isbn}|isbn' for isbn in isbns]
if len(self.record.identifiers) > 1 and 'oclc' in self.record.identifiers[1]:
formatted_isbns.append(f'{self.record.identifiers[1]}')
return formatted_isbns
else:
return formatted_isbns

return self.record.identifiers

def format_subjects(self):
subject_list = []

if '|' in self.record.subjects:
subject_list = self.record.subjects.split('|')
return [f'{subject}||' for subject in subject_list]
else:
subject_list.append(f'{self.record.subjects}||')
return subject_list

def format_rights(self):
if not self.record.rights:
return None

rights_elements = self.record.rights.split('|')
rights_status = rights_elements[0]

if rights_status == 'in copyright':
return '{}|{}||{}|'.format('self.record.source', 'in_copyright', 'In Copyright')

if rights_status == 'public domain':
return '{}|{}||{}|'.format('self.record.source', 'public_domain', 'Public Domain')

return None
1 change: 0 additions & 1 deletion processes/ingest/publisher_backlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def runProcess(self):
else:
logger.warning(f'Unknown Publisher Backlist ingestion process type {self.process}')
return

for record in records:
self.addDCDWToUpdateList(record)

Expand Down
11 changes: 5 additions & 6 deletions services/sources/publisher_backlist_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import requests
import json
import urllib.parse
from typing import Optional, Dict
from typing import Optional

from logger import create_log
from mappings.UofM import UofMMapping
from mappings.publisher_backlist import PublisherBacklistMapping
from .source_service import SourceService

logger = create_log(__name__)
Expand All @@ -23,14 +23,14 @@ def get_records(
start_timestamp: datetime=None,
offset: Optional[int]=None,
limit: Optional[int]=None
) -> list[UofMMapping]:
) -> list[PublisherBacklistMapping]:
array_json_records = self.get_records_json(full_import, start_timestamp, offset, limit)

for json_dict in array_json_records:
for records_value in json_dict['records']:
try:
record_metadata_dict = records_value
record = UofMMapping(record_metadata_dict)
record_metadata_dict = records_value['fields']
record = PublisherBacklistMapping(record_metadata_dict)
record.applyMapping()
except Exception:
logger.exception(f'Failed to process Publisher Backlist record')
Expand Down Expand Up @@ -89,7 +89,6 @@ def get_records_array(self,
pub_backlist_records_response = requests.get(url, headers=headers)
pub_backlist_records_response_json = pub_backlist_records_response.json()
array_json = [pub_backlist_records_response_json]

while 'offset' in pub_backlist_records_response_json:
next_page_url = url + f"&offset={pub_backlist_records_response_json['offset']}"
pub_backlist_records_response = requests.get(next_page_url, headers=headers)
Expand Down
51 changes: 51 additions & 0 deletions tests/unit/test_pub_backlist_mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import pytest

from mappings.publisher_backlist import PublisherBacklistMapping

class TestPublisherBacklistMapping:
@pytest.fixture
def test_mapping(self):
class TestPublisherBacklistMapping(PublisherBacklistMapping):
def __init__(self):
self.mapping = None

return TestPublisherBacklistMapping()

@pytest.fixture
def testRecordStandard(self, mocker):
return mocker.MagicMock(
title='testTitle',
authors=['testAuthor|||true'],
dates=['testDate|publication_date'],
publisher=['testPublisher||'],
identifiers=['testISBN|isbn', 'testOCLC|oclc'],
rights='in copyright||||',
contributor=['testContributor|||contributor'],
subjects='testSubject',
source=['UofMichigan Backlist'],
source_id='testSourceID',
publisher_project_source=['University of Michigan Press']
)

def test_createMapping(self, test_mapping):
record_mapping = test_mapping.createMapping()

assert list(record_mapping.keys()) == [
'title', 'authors', 'dates', 'publisher',
'identifiers', 'rights', 'contributors', 'subjects',
'source', 'source_id', 'publisher_project_source'
]
assert record_mapping['title'] == ('Title', '{0}')

def test_applyFormatting_standard(self, test_mapping, testRecordStandard):
test_mapping.record = testRecordStandard

test_mapping.applyFormatting()

assert test_mapping.record.has_part == []
assert test_mapping.record.source == 'UofMichigan'
assert test_mapping.record.identifiers == ['testISBN|isbn', 'testOCLC|oclc']
assert test_mapping.record.source_id == 'testSourceID'
assert test_mapping.record.publisher == ['testPublisher||']
assert test_mapping.record.publisher_project_source == 'University of Michigan Press'
assert test_mapping.record.subjects == ['testSubject||']
Loading