-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
210 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build" | |
|
||
[project] | ||
name = "nypl_py_utils" | ||
version = "1.0.1" | ||
version = "1.0.3" | ||
authors = [ | ||
{ name="Aaron Friedman", email="[email protected]" }, | ||
] | ||
|
@@ -63,8 +63,11 @@ config-helper = [ | |
obfuscation-helper = [ | ||
"bcrypt>=4.0.1" | ||
] | ||
research-catalog-identifier-helper = [ | ||
"requests>=2.28.1" | ||
] | ||
development = [ | ||
"nypl_py_utils[avro-encoder,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,postgresql-pool-client,redshift-client,s3-client,config-helper,obfuscation-helper]", | ||
"nypl_py_utils[avro-encoder,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,postgresql-pool-client,redshift-client,s3-client,config-helper,obfuscation-helper,research-catalog-identifier-helper]", | ||
"flake8>=6.0.0", | ||
"freezegun>=1.2.2", | ||
"mock>=4.0.3", | ||
|
108 changes: 108 additions & 0 deletions
108
src/nypl_py_utils/functions/research_catalog_identifier_helper.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
import os | ||
import re | ||
import requests | ||
from requests.exceptions import JSONDecodeError, RequestException | ||
|
||
CACHE = {} | ||
|
||
|
||
def parse_research_catalog_identifier(identifier: str): | ||
""" | ||
Given a RC identifier (e.g. "b1234", "pb9876", "pi4567"), returns a dict | ||
defining: | ||
- nyplSource: One of sierra-nypl, recap-pul, recap-cul, or recap-hl (at | ||
writing) | ||
- nyplType: One of bib, holding, or item | ||
- id: The numeric string id | ||
""" | ||
if not isinstance(identifier, str): | ||
raise ResearchCatalogIdentifierError( | ||
f'Invalid RC identifier: {identifier}') | ||
|
||
# Extract prefix from the identifier: | ||
match = re.match(r'^([a-z]+)', identifier) | ||
if match is None: | ||
raise ResearchCatalogIdentifierError( | ||
f'Invalid RC identifier: {identifier}') | ||
prefix = match[0] | ||
|
||
# The id is the identifier without the prefix: | ||
id = identifier.replace(prefix, '') | ||
nyplType = None | ||
nyplSource = None | ||
|
||
# Look up nyplType and nyplSource in nypl-core based on the prefix: | ||
for _nyplSource, mapping in nypl_core_source_mapping().items(): | ||
if mapping.get('bibPrefix') == prefix: | ||
nyplType = 'bib' | ||
elif mapping.get('itemPrefix') == prefix: | ||
nyplType = 'item' | ||
elif mapping.get('holdingPrefix') == prefix: | ||
nyplType = 'holding' | ||
if nyplType is not None: | ||
nyplSource = _nyplSource | ||
break | ||
|
||
if nyplSource is None: | ||
raise ResearchCatalogIdentifierError( | ||
f'Invalid RC identifier: {identifier}') | ||
|
||
return { | ||
'nyplSource': nyplSource, | ||
'nyplType': nyplType, | ||
'id': id | ||
} | ||
|
||
|
||
def research_catalog_id_prefix(nyplSource: str, nyplType='bib'): | ||
""" | ||
Given a nyplSource (e.g. 'sierra-nypl') and nyplType (e.g. 'item'), returns | ||
the relevant prefix used in the RC identifier (e.g. 'i') | ||
""" | ||
if nypl_core_source_mapping().get(nyplSource) is None: | ||
raise ResearchCatalogIdentifierError( | ||
f'Invalid nyplSource: {nyplSource}') | ||
|
||
if not isinstance(nyplType, str): | ||
raise ResearchCatalogIdentifierError( | ||
f'Invalid nyplType: {nyplType}') | ||
|
||
prefixKey = f'{nyplType}Prefix' | ||
if nypl_core_source_mapping()[nyplSource].get(prefixKey) is None: | ||
raise ResearchCatalogIdentifierError(f'Invalid nyplType: {nyplType}') | ||
|
||
return nypl_core_source_mapping()[nyplSource][prefixKey] | ||
|
||
|
||
def nypl_core_source_mapping(): | ||
""" | ||
Builds a nypl-source-mapping by retrieving the mapping from NYPL-Core | ||
""" | ||
name = 'nypl-core-source-mapping' | ||
if not CACHE.get(name) is None: | ||
return CACHE[name] | ||
|
||
url = os.environ.get('NYPL_CORE_SOURCE_MAPPING_URL', | ||
'https://raw.githubusercontent.com/NYPL/nypl-core/master/mappings/recap-discovery/nypl-source-mapping.json') # noqa | ||
try: | ||
response = requests.get(url) | ||
response.raise_for_status() | ||
except RequestException as e: | ||
raise ResearchCatalogIdentifierError( | ||
'Failed to retrieve nypl-core source-mapping file from {url}:' | ||
' {errorType} {errorMessage}' | ||
.format(url=url, errorType=type(e), errorMessage=e)) from None | ||
|
||
try: | ||
CACHE[name] = response.json() | ||
return CACHE[name] | ||
except (JSONDecodeError, KeyError) as e: | ||
raise ResearchCatalogIdentifierError( | ||
'Failed to parse nypl-core source-mapping file: {errorType}' | ||
' {errorMessage}' | ||
.format(errorType=type(e), errorMessage=e)) from None | ||
|
||
|
||
class ResearchCatalogIdentifierError(Exception): | ||
def __init__(self, message=None): | ||
self.message = message |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import pytest | ||
import json | ||
|
||
from nypl_py_utils.functions.research_catalog_identifier_helper import ( | ||
parse_research_catalog_identifier, research_catalog_id_prefix, | ||
ResearchCatalogIdentifierError) | ||
|
||
_TEST_MAPPING = { | ||
'sierra-nypl': { | ||
'organization': 'nyplOrg:0001', | ||
'bibPrefix': 'b', | ||
'holdingPrefix': 'h', | ||
'itemPrefix': 'i' | ||
}, | ||
'recap-pul': { | ||
'organization': 'nyplOrg:0003', | ||
'bibPrefix': 'pb', | ||
'itemPrefix': 'pi' | ||
}, | ||
'recap-cul': { | ||
'organization': 'nyplOrg:0002', | ||
'bibPrefix': 'cb', | ||
'itemPrefix': 'ci' | ||
}, | ||
'recap-hl': { | ||
'organization': 'nyplOrg:0004', | ||
'bibPrefix': 'hb', | ||
'itemPrefix': 'hi' | ||
} | ||
} | ||
|
||
|
||
class TestResearchCatalogIdentifierHelper: | ||
@pytest.fixture(autouse=True) | ||
def test_instance(self, requests_mock): | ||
requests_mock.get( | ||
'https://raw.githubusercontent.com/NYPL/nypl-core/master/mappings/recap-discovery/nypl-source-mapping.json', # noqa | ||
text=json.dumps(_TEST_MAPPING)) | ||
|
||
def test_parse_research_catalog_identifier_parses_valid(self): | ||
assert parse_research_catalog_identifier('b1234') == \ | ||
{'id': '1234', 'nyplSource': 'sierra-nypl', 'nyplType': 'bib'} | ||
assert parse_research_catalog_identifier('cb1234') == \ | ||
{'id': '1234', 'nyplSource': 'recap-cul', 'nyplType': 'bib'} | ||
assert parse_research_catalog_identifier('pi1234') == \ | ||
{'id': '1234', 'nyplSource': 'recap-pul', 'nyplType': 'item'} | ||
assert parse_research_catalog_identifier('h1234') == \ | ||
{'id': '1234', 'nyplSource': 'sierra-nypl', | ||
'nyplType': 'holding'} | ||
|
||
def test_parse_research_catalog_identifier_fails_nonsense(self): | ||
for invalidIdentifier in [None, 1234, 'z1234', '1234']: | ||
with pytest.raises(ResearchCatalogIdentifierError): | ||
parse_research_catalog_identifier(invalidIdentifier) | ||
|
||
def test_research_catalog_id_prefix_parses_valid(self, mocker): | ||
assert research_catalog_id_prefix('sierra-nypl') == 'b' | ||
assert research_catalog_id_prefix('sierra-nypl', 'bib') == 'b' | ||
assert research_catalog_id_prefix('sierra-nypl', 'item') == 'i' | ||
assert research_catalog_id_prefix('sierra-nypl', 'holding') == 'h' | ||
assert research_catalog_id_prefix('recap-pul', 'bib') == 'pb' | ||
assert research_catalog_id_prefix('recap-hl', 'bib') == 'hb' | ||
assert research_catalog_id_prefix('recap-hl', 'item') == 'hi' | ||
assert research_catalog_id_prefix('recap-pul', 'item') == 'pi' | ||
|
||
def test_research_catalog_id_prefix_fails_nonsense(self, mocker): | ||
for invalidSource in ['sierra-cul', None, 'recap-nypl']: | ||
with pytest.raises(ResearchCatalogIdentifierError): | ||
research_catalog_id_prefix(invalidSource) | ||
for invalidType in [None, '...']: | ||
with pytest.raises(ResearchCatalogIdentifierError): | ||
research_catalog_id_prefix('sierra-nypl', invalidType) |