Merge branch 'main' into qa

NYPL · May 19, 2023 · 139a550 · 139a550
2 parents f41143d + 40a97cc
commit 139a550
Show file tree

Hide file tree

Showing 5 changed files with 210 additions and 4 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Changelog
 
+## v1.0.3 - 5/19/23
+- Add research_catalog_identifier_helper function
+
+## v1.0.2 - 5/18/23
+- Identical to v1.0.1 -- this was mistakenly deployed to QA without any changes
+
 ## v1.0.1 - 4/3/23
 - Add transaction support to RedshiftClient
 

diff --git a/README.md b/README.md
@@ -17,6 +17,7 @@ This package contains common Python utility classes and functions.
 * Reading a YAML config file and putting the contents in os.environ
 * Creating a logger in the appropriate format
 * Obfuscating a value using bcrypt
+* Parsing/building Research Catalog identifiers
 
 ## Usage
 ```python
@@ -57,7 +58,22 @@ When a new client or helper file is created, a new optional dependency set shoul
 The optional dependency sets also give the developer the option to manually list out the dependencies of the clients rather than relying upon what the package thinks is required, which can be beneficial in certain circumstances. For instance, AWS lambda functions come with `boto3` and `botocore` pre-installed, so it's not necessary to include these (rather hefty) dependencies in the lambda deployment package.
 
 ### Troubleshooting
-If running `main.py` in this virtual environment produces the following error:
+#### Using PostgreSQLClient in an AWS Lambda
+Because `psycopg` requires a statically linked version of the `libpq` library, the `PostgreSQLClient` cannot be installed as-is in an AWS Lambda function. Instead, it must be packaged as follows:
+```bash
+pip install --target ./package nypl-py-utils[postgresql-client]==1.0.1
+
+pip install \
+    --platform manylinux2014_x86_64 \
+    --target=./package \
+    --implementation cp \
+    --python 3.9 \
+    --only-binary=:all: --upgrade \
+    'psycopg[binary]'
+```
+
+#### Using PostgreSQLClient locally
+If using the `PostgreSQLClient` produces the following error locally:
 ```
 ImportError: no pq wrapper available.
 Attempts made:
@@ -67,7 +83,7 @@ Attempts made:
 ```
 
 then try running:
-```
+```bash
 pip uninstall psycopg
 pip install "psycopg[c]"
 ```
@@ -81,6 +97,7 @@ This repo uses the [Main-QA-Production](https://github.com/NYPL/engineering-gene
 - Cut a feature branch off of `main`
 - Commit changes to your feature branch
 - File a pull request against `main` and assign a reviewer (who must be an owner)
+  - Include relevant updates to pyproject.toml and README
   - In order for the PR to be accepted, it must pass all unit tests, have no lint issues, and update the CHANGELOG (or contain the `Skip-Changelog` label in GitHub)
 - After the PR is accepted, merge into `main`
 - Merge `main` > `qa`

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "nypl_py_utils"
-version = "1.0.1"
+version = "1.0.3"
 authors = [
   { name="Aaron Friedman", email="[email protected]" },
 ]
@@ -63,8 +63,11 @@ config-helper = [
 obfuscation-helper = [
     "bcrypt>=4.0.1"
 ]
+research-catalog-identifier-helper = [
+    "requests>=2.28.1"
+]
 development = [
-    "nypl_py_utils[avro-encoder,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,postgresql-pool-client,redshift-client,s3-client,config-helper,obfuscation-helper]",
+    "nypl_py_utils[avro-encoder,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,postgresql-pool-client,redshift-client,s3-client,config-helper,obfuscation-helper,research-catalog-identifier-helper]",
     "flake8>=6.0.0",
     "freezegun>=1.2.2",
     "mock>=4.0.3",

diff --git a/src/nypl_py_utils/functions/research_catalog_identifier_helper.py b/src/nypl_py_utils/functions/research_catalog_identifier_helper.py
@@ -0,0 +1,108 @@
+import os
+import re
+import requests
+from requests.exceptions import JSONDecodeError, RequestException
+
+CACHE = {}
+
+
+def parse_research_catalog_identifier(identifier: str):
+    """
+    Given a RC identifier (e.g. "b1234", "pb9876", "pi4567"), returns a dict
+    defining:
+     - nyplSource: One of sierra-nypl, recap-pul, recap-cul, or recap-hl (at
+       writing)
+     - nyplType: One of bib, holding, or item
+     - id: The numeric string id
+    """
+    if not isinstance(identifier, str):
+        raise ResearchCatalogIdentifierError(
+            f'Invalid RC identifier: {identifier}')
+
+    # Extract prefix from the identifier:
+    match = re.match(r'^([a-z]+)', identifier)
+    if match is None:
+        raise ResearchCatalogIdentifierError(
+                f'Invalid RC identifier: {identifier}')
+    prefix = match[0]
+
+    # The id is the identifier without the prefix:
+    id = identifier.replace(prefix, '')
+    nyplType = None
+    nyplSource = None
+
+    # Look up nyplType and nyplSource in nypl-core based on the prefix:
+    for _nyplSource, mapping in nypl_core_source_mapping().items():
+        if mapping.get('bibPrefix') == prefix:
+            nyplType = 'bib'
+        elif mapping.get('itemPrefix') == prefix:
+            nyplType = 'item'
+        elif mapping.get('holdingPrefix') == prefix:
+            nyplType = 'holding'
+        if nyplType is not None:
+            nyplSource = _nyplSource
+            break
+
+    if nyplSource is None:
+        raise ResearchCatalogIdentifierError(
+                f'Invalid RC identifier: {identifier}')
+
+    return {
+        'nyplSource': nyplSource,
+        'nyplType': nyplType,
+        'id': id
+    }
+
+
+def research_catalog_id_prefix(nyplSource: str, nyplType='bib'):
+    """
+    Given a nyplSource (e.g. 'sierra-nypl') and nyplType (e.g. 'item'), returns
+    the relevant prefix used in the RC identifier (e.g. 'i')
+    """
+    if nypl_core_source_mapping().get(nyplSource) is None:
+        raise ResearchCatalogIdentifierError(
+                f'Invalid nyplSource: {nyplSource}')
+
+    if not isinstance(nyplType, str):
+        raise ResearchCatalogIdentifierError(
+            f'Invalid nyplType: {nyplType}')
+
+    prefixKey = f'{nyplType}Prefix'
+    if nypl_core_source_mapping()[nyplSource].get(prefixKey) is None:
+        raise ResearchCatalogIdentifierError(f'Invalid nyplType: {nyplType}')
+
+    return nypl_core_source_mapping()[nyplSource][prefixKey]
+
+
+def nypl_core_source_mapping():
+    """
+    Builds a nypl-source-mapping by retrieving the mapping from NYPL-Core
+    """
+    name = 'nypl-core-source-mapping'
+    if not CACHE.get(name) is None:
+        return CACHE[name]
+
+    url = os.environ.get('NYPL_CORE_SOURCE_MAPPING_URL',
+            'https://raw.githubusercontent.com/NYPL/nypl-core/master/mappings/recap-discovery/nypl-source-mapping.json') # noqa
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+    except RequestException as e:
+        raise ResearchCatalogIdentifierError(
+            'Failed to retrieve nypl-core source-mapping file from {url}:'
+            ' {errorType} {errorMessage}'
+            .format(url=url, errorType=type(e), errorMessage=e)) from None
+
+    try:
+        CACHE[name] = response.json()
+        return CACHE[name]
+    except (JSONDecodeError, KeyError) as e:
+        raise ResearchCatalogIdentifierError(
+            'Failed to parse nypl-core source-mapping file: {errorType}'
+            ' {errorMessage}'
+            .format(errorType=type(e), errorMessage=e)) from None
+
+
+class ResearchCatalogIdentifierError(Exception):
+    def __init__(self, message=None):
+        self.message = message
diff --git a/tests/test_research_catalog_identifier_helper.py b/tests/test_research_catalog_identifier_helper.py
@@ -0,0 +1,72 @@
+import pytest
+import json
+
+from nypl_py_utils.functions.research_catalog_identifier_helper import (
+    parse_research_catalog_identifier, research_catalog_id_prefix,
+    ResearchCatalogIdentifierError)
+
+_TEST_MAPPING = {
+  'sierra-nypl': {
+    'organization': 'nyplOrg:0001',
+    'bibPrefix': 'b',
+    'holdingPrefix': 'h',
+    'itemPrefix': 'i'
+  },
+  'recap-pul': {
+    'organization': 'nyplOrg:0003',
+    'bibPrefix': 'pb',
+    'itemPrefix': 'pi'
+  },
+  'recap-cul': {
+    'organization': 'nyplOrg:0002',
+    'bibPrefix': 'cb',
+    'itemPrefix': 'ci'
+  },
+  'recap-hl': {
+    'organization': 'nyplOrg:0004',
+    'bibPrefix': 'hb',
+    'itemPrefix': 'hi'
+  }
+}
+
+
+class TestResearchCatalogIdentifierHelper:
+    @pytest.fixture(autouse=True)
+    def test_instance(self, requests_mock):
+        requests_mock.get(
+            'https://raw.githubusercontent.com/NYPL/nypl-core/master/mappings/recap-discovery/nypl-source-mapping.json', # noqa
+            text=json.dumps(_TEST_MAPPING))
+
+    def test_parse_research_catalog_identifier_parses_valid(self):
+        assert parse_research_catalog_identifier('b1234') == \
+                {'id': '1234', 'nyplSource': 'sierra-nypl', 'nyplType': 'bib'}
+        assert parse_research_catalog_identifier('cb1234') == \
+               {'id': '1234', 'nyplSource': 'recap-cul', 'nyplType': 'bib'}
+        assert parse_research_catalog_identifier('pi1234') == \
+               {'id': '1234', 'nyplSource': 'recap-pul', 'nyplType': 'item'}
+        assert parse_research_catalog_identifier('h1234') == \
+               {'id': '1234', 'nyplSource': 'sierra-nypl',
+                      'nyplType': 'holding'}
+
+    def test_parse_research_catalog_identifier_fails_nonsense(self):
+        for invalidIdentifier in [None, 1234, 'z1234', '1234']:
+            with pytest.raises(ResearchCatalogIdentifierError):
+                parse_research_catalog_identifier(invalidIdentifier)
+
+    def test_research_catalog_id_prefix_parses_valid(self, mocker):
+        assert research_catalog_id_prefix('sierra-nypl') == 'b'
+        assert research_catalog_id_prefix('sierra-nypl', 'bib') == 'b'
+        assert research_catalog_id_prefix('sierra-nypl', 'item') == 'i'
+        assert research_catalog_id_prefix('sierra-nypl', 'holding') == 'h'
+        assert research_catalog_id_prefix('recap-pul', 'bib') == 'pb'
+        assert research_catalog_id_prefix('recap-hl', 'bib') == 'hb'
+        assert research_catalog_id_prefix('recap-hl', 'item') == 'hi'
+        assert research_catalog_id_prefix('recap-pul', 'item') == 'pi'
+
+    def test_research_catalog_id_prefix_fails_nonsense(self, mocker):
+        for invalidSource in ['sierra-cul', None, 'recap-nypl']:
+            with pytest.raises(ResearchCatalogIdentifierError):
+                research_catalog_id_prefix(invalidSource)
+        for invalidType in [None, '...']:
+            with pytest.raises(ResearchCatalogIdentifierError):
+                research_catalog_id_prefix('sierra-nypl', invalidType)