NYPL · Apophenia · Dec 5, 2024 · Dec 3, 2024 · Dec 3, 2024 · Dec 3, 2024
diff --git a/managers/oclc_auth.py b/managers/oclc_auth.py
@@ -16,7 +16,7 @@ class OCLCAuthManager:
     _metadata_token = None
     _metadata_token_expires_at = None
     OCLC_SEARCH_AUTH_URL = 'https://oauth.oclc.org/token?scope=wcapi&grant_type=client_credentials'
-    OCLC_METADATA_AUTH_URL = 'https://oauth.oclc.org/token?scope=WorldCatMetadataAPI&grant_type=client_credentials'
+    OCLC_METADATA_AUTH_URL = 'https://oauth.oclc.org/token?scope=WorldCatMetadataAPI:view_marc_bib&grant_type=client_credentials'
 
     TIME_TO_REFRESH_IN_SECONDS = 60
 

diff --git a/managers/oclc_catalog.py b/managers/oclc_catalog.py
@@ -11,25 +11,21 @@
 
 
 class OCLCCatalogManager:
-    CATALOG_URL = 'http://www.worldcat.org/webservices/catalog/content/{}?wskey={}'
     METADATA_BIB_URL = 'https://metadata.api.oclc.org/worldcat/manage/bibs/{}'
     OCLC_SEARCH_URL = 'https://americas.discovery.api.oclc.org/worldcat/search/v2/'
     ITEM_TYPES = ['archv', 'audiobook', 'book', 'encyc', 'jrnl']
     LIMIT = 50
     MAX_NUMBER_OF_RECORDS = 100
     BEST_MATCH = 'bestMatch'
 
-    def __init__(self):
-        self.oclc_key = os.environ['OCLC_API_KEY']
-
-    def query_catalog_v2(self, oclc_no):
-        catalog_query = self.CATALOG_URL.format(oclc_no, self.oclc_key)
+    def query_catalog(self, oclc_no):
+        catalog_query = self.METADATA_BIB_URL.format(oclc_no)
 
         for _ in range(0, 3):
             try:
                 token = OCLCAuthManager.get_metadata_token()
                 headers = { 'Authorization': f'Bearer {token}' }
-                
+
                 catalog_response = requests.get(catalog_query, headers=headers, timeout=3)
 
                 if catalog_response.status_code != 200:
@@ -42,27 +38,7 @@ def query_catalog_v2(self, oclc_no):
             except Exception as e:
                 logger.error(f'Failed to query catalog with query {catalog_query} due to {e}')
                 return None
-
-        return None
-
-    def query_catalog(self, oclc_no):
-        catalog_query = self.CATALOG_URL.format(oclc_no, self.oclc_key)
-
-        for _ in range(0, 3):
-            try:
-                catalog_response = requests.get(catalog_query, timeout=3)
 
-                if catalog_response.status_code != 200:
-                    logger.warning(f'OCLC catalog request failed with status {catalog_response.status_code}')
-                    return None
-
-                return catalog_response.text
-            except (Timeout, ConnectionError):
-                logger.warning(f'Could not connect to {catalog_query} or timed out')
-            except Exception as e:
-                logger.error(f'Failed to query catalog with query {catalog_query} due to {e}')
-                return None
-
         return None
 
     def get_related_oclc_numbers(self, oclc_number: int) -> list[int]:

diff --git a/mappings/oclcCatalog.py b/mappings/oclcCatalog.py
@@ -20,6 +20,9 @@ def __init__(self, source, namespace, constants):
         super(CatalogMapping, self).__init__(source, namespace, constants)
         self.mapping = self.createMapping()
 
+    def remove_oclc_prefixes(self, oclc_id):
+        return oclc_id.removeprefix('(OCoLC)').removeprefix('on').removeprefix('ocn').removeprefix('ocm')
+
     def createMapping(self):
         return {
             'title': ('//oclc:datafield[@tag=\'245\']/oclc:subfield[@code=\'a\' or @code=\'b\']/text()', '{0} {1}'),
@@ -158,12 +161,13 @@ def createMapping(self):
 
     def applyFormatting(self):
         self.record.source = 'oclcCatalog'
+        self.record.identifiers[0] = self.remove_oclc_prefixes(self.record.identifiers[0])
         self.record.source_id = self.record.identifiers[0]
         self.record.frbr_status = 'complete'
-        
+
         _, _, lang_3, *_ = tuple(self.record.languages[0].split('|'))
         self.record.languages = [('||{}'.format(lang_3[35:38]))]
-        
+
         self.record.has_part = self.record.has_part[:10]
 
         self.record.has_part = list(filter(None, [

diff --git a/tests/unit/test_oclcCatalog_manager.py b/tests/unit/test_oclcCatalog_manager.py
@@ -8,26 +8,25 @@
 class TestOCLCCatalogManager:
     @pytest.fixture
     def testInstance(self, mocker):
-        mocker.patch.dict('os.environ', {'OCLC_API_KEY': 'test_api_key'})
-
         return OCLCCatalogManager()
 
-    def test_initializer(self, testInstance):
-        assert testInstance.oclc_key == 'test_api_key'
-
     def test_query_catalog_success(self, testInstance, mocker):
         mockResponse = mocker.MagicMock()
         mockRequest = mocker.patch('managers.oclc_catalog.requests')
         mockRequest.get.return_value = mockResponse
 
+        mock_auth = mocker.patch('managers.oclc_auth.OCLCAuthManager.get_metadata_token')
+        mock_auth.return_value = 'foo'
+
         mockResponse.status_code = 200
         mockResponse.text = 'testClassifyRecord'
 
         testResponse = testInstance.query_catalog(1)
 
         assert testResponse == 'testClassifyRecord'
         mockRequest.get.assert_called_once_with(
-            'http://www.worldcat.org/webservices/catalog/content/1?wskey=test_api_key',
+            'https://metadata.api.oclc.org/worldcat/manage/bibs/1',
+            headers={'Authorization': 'Bearer foo'},
             timeout=3
         )
 
@@ -36,14 +35,18 @@ def test_query_catalog_error(self, testInstance, mocker):
         mockRequest = mocker.patch('managers.oclc_catalog.requests')
         mockRequest.get.return_value = mockResponse
 
+        mock_auth = mocker.patch('managers.oclc_auth.OCLCAuthManager.get_metadata_token')
+        mock_auth.return_value = 'foo'
+
         mockResponse.status_code = 500
         mockResponse.text = 'testClassifyRecord'
 
         testResponse = testInstance.query_catalog(1)
 
         assert testResponse == None
         mockRequest.get.assert_called_once_with(
-            'http://www.worldcat.org/webservices/catalog/content/1?wskey=test_api_key',
+            'https://metadata.api.oclc.org/worldcat/manage/bibs/1',
+            headers={'Authorization': 'Bearer foo'},
             timeout=3
         )
 
@@ -52,29 +55,35 @@ def test_query_catalog_single_retry_then_success(self, testInstance, mocker):
         mockRequest = mocker.patch('managers.oclc_catalog.requests')
         mockRequest.get.side_effect = [ConnectionError, mockResponse]
 
+        mock_auth = mocker.patch('managers.oclc_auth.OCLCAuthManager.get_metadata_token')
+        mock_auth.return_value = 'foo'
+
         mockResponse.status_code = 200
         mockResponse.text = 'testClassifyRecord'
 
         testResponse = testInstance.query_catalog(1)
 
         assert testResponse == 'testClassifyRecord'
         mockRequest.get.assert_has_calls(
-            [mocker.call('http://www.worldcat.org/webservices/catalog/content/1?wskey=test_api_key', timeout=3)] * 2
+            [mocker.call('https://metadata.api.oclc.org/worldcat/manage/bibs/1', timeout=3, headers={'Authorization': 'Bearer foo'})] * 2
         )
 
     def test_query_catalog_exhaust_retries(self, testInstance, mocker):
         mockResponse = mocker.MagicMock()
         mockRequest = mocker.patch('managers.oclc_catalog.requests')
         mockRequest.get.side_effect = [ConnectionError, ConnectionError, Timeout]
 
+        mock_auth = mocker.patch('managers.oclc_auth.OCLCAuthManager.get_metadata_token')
+        mock_auth.return_value = 'foo'
+
         mockResponse.status_code = 200
         mockResponse.text = 'testClassifyRecord'
 
         testResponse = testInstance.query_catalog(1)
 
         assert testResponse == None
         mockRequest.get.assert_has_calls(
-            [mocker.call('http://www.worldcat.org/webservices/catalog/content/1?wskey=test_api_key', timeout=3)] * 3
+            [mocker.call('https://metadata.api.oclc.org/worldcat/manage/bibs/1', timeout=3, headers={'Authorization': 'Bearer foo'})] * 3
         )
 
     def test_generate_search_query_w_identifier(self, testInstance):

diff --git a/tests/unit/test_oclcCatalog_mapping.py b/tests/unit/test_oclcCatalog_mapping.py
@@ -28,6 +28,12 @@ def testRecord_standard(self, mocker):
             has_part=['1|uri|test|text/html|{}', '1|uri|bad|text/html|{}']
         )
 
+    def test_remove_oclc_prefixes(self, testMapping):
+        assert testMapping.remove_oclc_prefixes('on48542660') == '48542660'
+        assert testMapping.remove_oclc_prefixes('ocm48542660') == '48542660'
+        assert testMapping.remove_oclc_prefixes('(OCoLC)on48542660') == '48542660'
+        assert testMapping.remove_oclc_prefixes('foo48542660') == 'foo48542660'
+
     def test_createMapping(self, testMapping):
         recordMapping = testMapping.createMapping()