diff --git a/managers/oclc_auth.py b/managers/oclc_auth.py index 7626e74f11..454f789d45 100644 --- a/managers/oclc_auth.py +++ b/managers/oclc_auth.py @@ -16,7 +16,7 @@ class OCLCAuthManager: _metadata_token = None _metadata_token_expires_at = None OCLC_SEARCH_AUTH_URL = 'https://oauth.oclc.org/token?scope=wcapi&grant_type=client_credentials' - OCLC_METADATA_AUTH_URL = 'https://oauth.oclc.org/token?scope=WorldCatMetadataAPI&grant_type=client_credentials' + OCLC_METADATA_AUTH_URL = 'https://oauth.oclc.org/token?scope=WorldCatMetadataAPI:view_marc_bib&grant_type=client_credentials' TIME_TO_REFRESH_IN_SECONDS = 60 diff --git a/managers/oclc_catalog.py b/managers/oclc_catalog.py index 3b5069d456..a3e35b8121 100644 --- a/managers/oclc_catalog.py +++ b/managers/oclc_catalog.py @@ -11,7 +11,6 @@ class OCLCCatalogManager: - CATALOG_URL = 'http://www.worldcat.org/webservices/catalog/content/{}?wskey={}' METADATA_BIB_URL = 'https://metadata.api.oclc.org/worldcat/manage/bibs/{}' OCLC_SEARCH_URL = 'https://americas.discovery.api.oclc.org/worldcat/search/v2/' ITEM_TYPES = ['archv', 'audiobook', 'book', 'encyc', 'jrnl'] @@ -19,17 +18,14 @@ class OCLCCatalogManager: MAX_NUMBER_OF_RECORDS = 100 BEST_MATCH = 'bestMatch' - def __init__(self): - self.oclc_key = os.environ['OCLC_API_KEY'] - - def query_catalog_v2(self, oclc_no): - catalog_query = self.CATALOG_URL.format(oclc_no, self.oclc_key) + def query_catalog(self, oclc_no): + catalog_query = self.METADATA_BIB_URL.format(oclc_no) for _ in range(0, 3): try: token = OCLCAuthManager.get_metadata_token() headers = { 'Authorization': f'Bearer {token}' } - + catalog_response = requests.get(catalog_query, headers=headers, timeout=3) if catalog_response.status_code != 200: @@ -42,27 +38,7 @@ def query_catalog_v2(self, oclc_no): except Exception as e: logger.error(f'Failed to query catalog with query {catalog_query} due to {e}') return None - - return None - - def query_catalog(self, oclc_no): - catalog_query = self.CATALOG_URL.format(oclc_no, self.oclc_key) - - for _ in range(0, 3): - try: - catalog_response = requests.get(catalog_query, timeout=3) - if catalog_response.status_code != 200: - logger.warning(f'OCLC catalog request failed with status {catalog_response.status_code}') - return None - - return catalog_response.text - except (Timeout, ConnectionError): - logger.warning(f'Could not connect to {catalog_query} or timed out') - except Exception as e: - logger.error(f'Failed to query catalog with query {catalog_query} due to {e}') - return None - return None def get_related_oclc_numbers(self, oclc_number: int) -> list[int]: diff --git a/mappings/oclcCatalog.py b/mappings/oclcCatalog.py index 16c2906993..e4d9d62e80 100644 --- a/mappings/oclcCatalog.py +++ b/mappings/oclcCatalog.py @@ -16,10 +16,18 @@ class CatalogMapping(XMLMapping): 'hathitrust': r'catalog.hathitrust.org\/api\/volumes\/([a-z]{3,6}\/[a-zA-Z0-9]+)\.html' } + OCLC_PREFIXES = ['(OCoLC)', 'on', 'ocn', 'ocm'] + def __init__(self, source, namespace, constants): super(CatalogMapping, self).__init__(source, namespace, constants) self.mapping = self.createMapping() + def remove_oclc_prefixes(self, oclc_id): + for prefix in self.OCLC_PREFIXES: + oclc_id = oclc_id.removeprefix(prefix) + return oclc_id + + def createMapping(self): return { 'title': ('//oclc:datafield[@tag=\'245\']/oclc:subfield[@code=\'a\' or @code=\'b\']/text()', '{0} {1}'), @@ -158,12 +166,13 @@ def createMapping(self): def applyFormatting(self): self.record.source = 'oclcCatalog' + self.record.identifiers[0] = self.remove_oclc_prefixes(self.record.identifiers[0]) self.record.source_id = self.record.identifiers[0] self.record.frbr_status = 'complete' - + _, _, lang_3, *_ = tuple(self.record.languages[0].split('|')) self.record.languages = [('||{}'.format(lang_3[35:38]))] - + self.record.has_part = self.record.has_part[:10] self.record.has_part = list(filter(None, [ diff --git a/tests/unit/test_oclcCatalog_manager.py b/tests/unit/test_oclcCatalog_manager.py index 83ed226ff9..2a7dcb2de1 100644 --- a/tests/unit/test_oclcCatalog_manager.py +++ b/tests/unit/test_oclcCatalog_manager.py @@ -8,18 +8,16 @@ class TestOCLCCatalogManager: @pytest.fixture def testInstance(self, mocker): - mocker.patch.dict('os.environ', {'OCLC_API_KEY': 'test_api_key'}) - return OCLCCatalogManager() - def test_initializer(self, testInstance): - assert testInstance.oclc_key == 'test_api_key' - def test_query_catalog_success(self, testInstance, mocker): mockResponse = mocker.MagicMock() mockRequest = mocker.patch('managers.oclc_catalog.requests') mockRequest.get.return_value = mockResponse + mock_auth = mocker.patch('managers.oclc_auth.OCLCAuthManager.get_metadata_token') + mock_auth.return_value = 'foo' + mockResponse.status_code = 200 mockResponse.text = 'testClassifyRecord' @@ -27,7 +25,8 @@ def test_query_catalog_success(self, testInstance, mocker): assert testResponse == 'testClassifyRecord' mockRequest.get.assert_called_once_with( - 'http://www.worldcat.org/webservices/catalog/content/1?wskey=test_api_key', + 'https://metadata.api.oclc.org/worldcat/manage/bibs/1', + headers={'Authorization': 'Bearer foo'}, timeout=3 ) @@ -36,6 +35,9 @@ def test_query_catalog_error(self, testInstance, mocker): mockRequest = mocker.patch('managers.oclc_catalog.requests') mockRequest.get.return_value = mockResponse + mock_auth = mocker.patch('managers.oclc_auth.OCLCAuthManager.get_metadata_token') + mock_auth.return_value = 'foo' + mockResponse.status_code = 500 mockResponse.text = 'testClassifyRecord' @@ -43,7 +45,8 @@ def test_query_catalog_error(self, testInstance, mocker): assert testResponse == None mockRequest.get.assert_called_once_with( - 'http://www.worldcat.org/webservices/catalog/content/1?wskey=test_api_key', + 'https://metadata.api.oclc.org/worldcat/manage/bibs/1', + headers={'Authorization': 'Bearer foo'}, timeout=3 ) @@ -52,6 +55,9 @@ def test_query_catalog_single_retry_then_success(self, testInstance, mocker): mockRequest = mocker.patch('managers.oclc_catalog.requests') mockRequest.get.side_effect = [ConnectionError, mockResponse] + mock_auth = mocker.patch('managers.oclc_auth.OCLCAuthManager.get_metadata_token') + mock_auth.return_value = 'foo' + mockResponse.status_code = 200 mockResponse.text = 'testClassifyRecord' @@ -59,7 +65,7 @@ def test_query_catalog_single_retry_then_success(self, testInstance, mocker): assert testResponse == 'testClassifyRecord' mockRequest.get.assert_has_calls( - [mocker.call('http://www.worldcat.org/webservices/catalog/content/1?wskey=test_api_key', timeout=3)] * 2 + [mocker.call('https://metadata.api.oclc.org/worldcat/manage/bibs/1', timeout=3, headers={'Authorization': 'Bearer foo'})] * 2 ) def test_query_catalog_exhaust_retries(self, testInstance, mocker): @@ -67,6 +73,9 @@ def test_query_catalog_exhaust_retries(self, testInstance, mocker): mockRequest = mocker.patch('managers.oclc_catalog.requests') mockRequest.get.side_effect = [ConnectionError, ConnectionError, Timeout] + mock_auth = mocker.patch('managers.oclc_auth.OCLCAuthManager.get_metadata_token') + mock_auth.return_value = 'foo' + mockResponse.status_code = 200 mockResponse.text = 'testClassifyRecord' @@ -74,7 +83,7 @@ def test_query_catalog_exhaust_retries(self, testInstance, mocker): assert testResponse == None mockRequest.get.assert_has_calls( - [mocker.call('http://www.worldcat.org/webservices/catalog/content/1?wskey=test_api_key', timeout=3)] * 3 + [mocker.call('https://metadata.api.oclc.org/worldcat/manage/bibs/1', timeout=3, headers={'Authorization': 'Bearer foo'})] * 3 ) def test_generate_search_query_w_identifier(self, testInstance): diff --git a/tests/unit/test_oclcCatalog_mapping.py b/tests/unit/test_oclcCatalog_mapping.py index c56426366b..25f9df8b8c 100644 --- a/tests/unit/test_oclcCatalog_mapping.py +++ b/tests/unit/test_oclcCatalog_mapping.py @@ -28,6 +28,12 @@ def testRecord_standard(self, mocker): has_part=['1|uri|test|text/html|{}', '1|uri|bad|text/html|{}'] ) + def test_remove_oclc_prefixes(self, testMapping): + assert testMapping.remove_oclc_prefixes('on48542660') == '48542660' + assert testMapping.remove_oclc_prefixes('ocm48542660') == '48542660' + assert testMapping.remove_oclc_prefixes('(OCoLC)on48542660') == '48542660' + assert testMapping.remove_oclc_prefixes('foo48542660') == 'foo48542660' + def test_createMapping(self, testMapping): recordMapping = testMapping.createMapping()