Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SFR-2288 Querying by OCLC Number uses metadata API MARCXML endpoint #464

Merged
merged 5 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion managers/oclc_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class OCLCAuthManager:
_metadata_token = None
_metadata_token_expires_at = None
OCLC_SEARCH_AUTH_URL = 'https://oauth.oclc.org/token?scope=wcapi&grant_type=client_credentials'
OCLC_METADATA_AUTH_URL = 'https://oauth.oclc.org/token?scope=WorldCatMetadataAPI&grant_type=client_credentials'
OCLC_METADATA_AUTH_URL = 'https://oauth.oclc.org/token?scope=WorldCatMetadataAPI:view_marc_bib&grant_type=client_credentials'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!


TIME_TO_REFRESH_IN_SECONDS = 60

Expand Down
30 changes: 3 additions & 27 deletions managers/oclc_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,21 @@


class OCLCCatalogManager:
CATALOG_URL = 'http://www.worldcat.org/webservices/catalog/content/{}?wskey={}'
METADATA_BIB_URL = 'https://metadata.api.oclc.org/worldcat/manage/bibs/{}'
OCLC_SEARCH_URL = 'https://americas.discovery.api.oclc.org/worldcat/search/v2/'
ITEM_TYPES = ['archv', 'audiobook', 'book', 'encyc', 'jrnl']
LIMIT = 50
MAX_NUMBER_OF_RECORDS = 100
BEST_MATCH = 'bestMatch'

def __init__(self):
self.oclc_key = os.environ['OCLC_API_KEY']

def query_catalog_v2(self, oclc_no):
catalog_query = self.CATALOG_URL.format(oclc_no, self.oclc_key)
def query_catalog(self, oclc_no):
catalog_query = self.METADATA_BIB_URL.format(oclc_no)

for _ in range(0, 3):
try:
token = OCLCAuthManager.get_metadata_token()
headers = { 'Authorization': f'Bearer {token}' }

catalog_response = requests.get(catalog_query, headers=headers, timeout=3)

if catalog_response.status_code != 200:
Expand All @@ -42,27 +38,7 @@ def query_catalog_v2(self, oclc_no):
except Exception as e:
logger.error(f'Failed to query catalog with query {catalog_query} due to {e}')
return None

return None

def query_catalog(self, oclc_no):
catalog_query = self.CATALOG_URL.format(oclc_no, self.oclc_key)

for _ in range(0, 3):
try:
catalog_response = requests.get(catalog_query, timeout=3)

if catalog_response.status_code != 200:
logger.warning(f'OCLC catalog request failed with status {catalog_response.status_code}')
return None

return catalog_response.text
except (Timeout, ConnectionError):
logger.warning(f'Could not connect to {catalog_query} or timed out')
except Exception as e:
logger.error(f'Failed to query catalog with query {catalog_query} due to {e}')
return None

return None

def get_related_oclc_numbers(self, oclc_number: int) -> list[int]:
Expand Down
8 changes: 6 additions & 2 deletions mappings/oclcCatalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ def __init__(self, source, namespace, constants):
super(CatalogMapping, self).__init__(source, namespace, constants)
self.mapping = self.createMapping()

def remove_oclc_prefixes(self, oclc_id):
return oclc_id.removeprefix('(OCoLC)').removeprefix('on').removeprefix('ocn').removeprefix('ocm')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I recommend pulling up prefixes into a set or list above as a constant, e.g. OCLC_PREFIXES = ['(OCoLC)', 'on', 'ocn', 'ocm'].

That way we can just do:

for prefix in OCLC_PREFIXES:
  oclc_id = oclc_id.removeprefix(prefix)

In theory ha

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated!


def createMapping(self):
return {
'title': ('//oclc:datafield[@tag=\'245\']/oclc:subfield[@code=\'a\' or @code=\'b\']/text()', '{0} {1}'),
Expand Down Expand Up @@ -158,12 +161,13 @@ def createMapping(self):

def applyFormatting(self):
self.record.source = 'oclcCatalog'
self.record.identifiers[0] = self.remove_oclc_prefixes(self.record.identifiers[0])
self.record.source_id = self.record.identifiers[0]
self.record.frbr_status = 'complete'

_, _, lang_3, *_ = tuple(self.record.languages[0].split('|'))
self.record.languages = [('||{}'.format(lang_3[35:38]))]

self.record.has_part = self.record.has_part[:10]

self.record.has_part = list(filter(None, [
Expand Down
27 changes: 18 additions & 9 deletions tests/unit/test_oclcCatalog_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,25 @@
class TestOCLCCatalogManager:
@pytest.fixture
def testInstance(self, mocker):
mocker.patch.dict('os.environ', {'OCLC_API_KEY': 'test_api_key'})

return OCLCCatalogManager()

def test_initializer(self, testInstance):
assert testInstance.oclc_key == 'test_api_key'

def test_query_catalog_success(self, testInstance, mocker):
mockResponse = mocker.MagicMock()
mockRequest = mocker.patch('managers.oclc_catalog.requests')
mockRequest.get.return_value = mockResponse

mock_auth = mocker.patch('managers.oclc_auth.OCLCAuthManager.get_metadata_token')
mock_auth.return_value = 'foo'

mockResponse.status_code = 200
mockResponse.text = 'testClassifyRecord'

testResponse = testInstance.query_catalog(1)

assert testResponse == 'testClassifyRecord'
mockRequest.get.assert_called_once_with(
'http://www.worldcat.org/webservices/catalog/content/1?wskey=test_api_key',
'https://metadata.api.oclc.org/worldcat/manage/bibs/1',
headers={'Authorization': 'Bearer foo'},
timeout=3
)

Expand All @@ -36,14 +35,18 @@ def test_query_catalog_error(self, testInstance, mocker):
mockRequest = mocker.patch('managers.oclc_catalog.requests')
mockRequest.get.return_value = mockResponse

mock_auth = mocker.patch('managers.oclc_auth.OCLCAuthManager.get_metadata_token')
mock_auth.return_value = 'foo'

mockResponse.status_code = 500
mockResponse.text = 'testClassifyRecord'

testResponse = testInstance.query_catalog(1)

assert testResponse == None
mockRequest.get.assert_called_once_with(
'http://www.worldcat.org/webservices/catalog/content/1?wskey=test_api_key',
'https://metadata.api.oclc.org/worldcat/manage/bibs/1',
headers={'Authorization': 'Bearer foo'},
timeout=3
)

Expand All @@ -52,29 +55,35 @@ def test_query_catalog_single_retry_then_success(self, testInstance, mocker):
mockRequest = mocker.patch('managers.oclc_catalog.requests')
mockRequest.get.side_effect = [ConnectionError, mockResponse]

mock_auth = mocker.patch('managers.oclc_auth.OCLCAuthManager.get_metadata_token')
mock_auth.return_value = 'foo'

mockResponse.status_code = 200
mockResponse.text = 'testClassifyRecord'

testResponse = testInstance.query_catalog(1)

assert testResponse == 'testClassifyRecord'
mockRequest.get.assert_has_calls(
[mocker.call('http://www.worldcat.org/webservices/catalog/content/1?wskey=test_api_key', timeout=3)] * 2
[mocker.call('https://metadata.api.oclc.org/worldcat/manage/bibs/1', timeout=3, headers={'Authorization': 'Bearer foo'})] * 2
)

def test_query_catalog_exhaust_retries(self, testInstance, mocker):
mockResponse = mocker.MagicMock()
mockRequest = mocker.patch('managers.oclc_catalog.requests')
mockRequest.get.side_effect = [ConnectionError, ConnectionError, Timeout]

mock_auth = mocker.patch('managers.oclc_auth.OCLCAuthManager.get_metadata_token')
mock_auth.return_value = 'foo'

mockResponse.status_code = 200
mockResponse.text = 'testClassifyRecord'

testResponse = testInstance.query_catalog(1)

assert testResponse == None
mockRequest.get.assert_has_calls(
[mocker.call('http://www.worldcat.org/webservices/catalog/content/1?wskey=test_api_key', timeout=3)] * 3
[mocker.call('https://metadata.api.oclc.org/worldcat/manage/bibs/1', timeout=3, headers={'Authorization': 'Bearer foo'})] * 3
)

def test_generate_search_query_w_identifier(self, testInstance):
Expand Down
6 changes: 6 additions & 0 deletions tests/unit/test_oclcCatalog_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ def testRecord_standard(self, mocker):
has_part=['1|uri|test|text/html|{}', '1|uri|bad|text/html|{}']
)

def test_remove_oclc_prefixes(self, testMapping):
assert testMapping.remove_oclc_prefixes('on48542660') == '48542660'
assert testMapping.remove_oclc_prefixes('ocm48542660') == '48542660'
assert testMapping.remove_oclc_prefixes('(OCoLC)on48542660') == '48542660'
assert testMapping.remove_oclc_prefixes('foo48542660') == 'foo48542660'

def test_createMapping(self, testMapping):
recordMapping = testMapping.createMapping()

Expand Down
Loading