Skip to content

Commit

Permalink
Merge branch 'main' into qa
Browse files Browse the repository at this point in the history
  • Loading branch information
aaronfriedman6 committed May 19, 2023
2 parents f41143d + 40a97cc commit 139a550
Show file tree
Hide file tree
Showing 5 changed files with 210 additions and 4 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## v1.0.3 - 5/19/23
- Add research_catalog_identifier_helper function

## v1.0.2 - 5/18/23
- Identical to v1.0.1 -- this was mistakenly deployed to QA without any changes

## v1.0.1 - 4/3/23
- Add transaction support to RedshiftClient

Expand Down
21 changes: 19 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ This package contains common Python utility classes and functions.
* Reading a YAML config file and putting the contents in os.environ
* Creating a logger in the appropriate format
* Obfuscating a value using bcrypt
* Parsing/building Research Catalog identifiers

## Usage
```python
Expand Down Expand Up @@ -57,7 +58,22 @@ When a new client or helper file is created, a new optional dependency set shoul
The optional dependency sets also give the developer the option to manually list out the dependencies of the clients rather than relying upon what the package thinks is required, which can be beneficial in certain circumstances. For instance, AWS lambda functions come with `boto3` and `botocore` pre-installed, so it's not necessary to include these (rather hefty) dependencies in the lambda deployment package.

### Troubleshooting
If running `main.py` in this virtual environment produces the following error:
#### Using PostgreSQLClient in an AWS Lambda
Because `psycopg` requires a statically linked version of the `libpq` library, the `PostgreSQLClient` cannot be installed as-is in an AWS Lambda function. Instead, it must be packaged as follows:
```bash
pip install --target ./package nypl-py-utils[postgresql-client]==1.0.1

pip install \
--platform manylinux2014_x86_64 \
--target=./package \
--implementation cp \
--python 3.9 \
--only-binary=:all: --upgrade \
'psycopg[binary]'
```

#### Using PostgreSQLClient locally
If using the `PostgreSQLClient` produces the following error locally:
```
ImportError: no pq wrapper available.
Attempts made:
Expand All @@ -67,7 +83,7 @@ Attempts made:
```

then try running:
```
```bash
pip uninstall psycopg
pip install "psycopg[c]"
```
Expand All @@ -81,6 +97,7 @@ This repo uses the [Main-QA-Production](https://github.com/NYPL/engineering-gene
- Cut a feature branch off of `main`
- Commit changes to your feature branch
- File a pull request against `main` and assign a reviewer (who must be an owner)
- Include relevant updates to pyproject.toml and README
- In order for the PR to be accepted, it must pass all unit tests, have no lint issues, and update the CHANGELOG (or contain the `Skip-Changelog` label in GitHub)
- After the PR is accepted, merge into `main`
- Merge `main` > `qa`
Expand Down
7 changes: 5 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "nypl_py_utils"
version = "1.0.1"
version = "1.0.3"
authors = [
{ name="Aaron Friedman", email="[email protected]" },
]
Expand Down Expand Up @@ -63,8 +63,11 @@ config-helper = [
obfuscation-helper = [
"bcrypt>=4.0.1"
]
research-catalog-identifier-helper = [
"requests>=2.28.1"
]
development = [
"nypl_py_utils[avro-encoder,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,postgresql-pool-client,redshift-client,s3-client,config-helper,obfuscation-helper]",
"nypl_py_utils[avro-encoder,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,postgresql-pool-client,redshift-client,s3-client,config-helper,obfuscation-helper,research-catalog-identifier-helper]",
"flake8>=6.0.0",
"freezegun>=1.2.2",
"mock>=4.0.3",
Expand Down
108 changes: 108 additions & 0 deletions src/nypl_py_utils/functions/research_catalog_identifier_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import os
import re
import requests
from requests.exceptions import JSONDecodeError, RequestException

CACHE = {}


def parse_research_catalog_identifier(identifier: str):
"""
Given a RC identifier (e.g. "b1234", "pb9876", "pi4567"), returns a dict
defining:
- nyplSource: One of sierra-nypl, recap-pul, recap-cul, or recap-hl (at
writing)
- nyplType: One of bib, holding, or item
- id: The numeric string id
"""
if not isinstance(identifier, str):
raise ResearchCatalogIdentifierError(
f'Invalid RC identifier: {identifier}')

# Extract prefix from the identifier:
match = re.match(r'^([a-z]+)', identifier)
if match is None:
raise ResearchCatalogIdentifierError(
f'Invalid RC identifier: {identifier}')
prefix = match[0]

# The id is the identifier without the prefix:
id = identifier.replace(prefix, '')
nyplType = None
nyplSource = None

# Look up nyplType and nyplSource in nypl-core based on the prefix:
for _nyplSource, mapping in nypl_core_source_mapping().items():
if mapping.get('bibPrefix') == prefix:
nyplType = 'bib'
elif mapping.get('itemPrefix') == prefix:
nyplType = 'item'
elif mapping.get('holdingPrefix') == prefix:
nyplType = 'holding'
if nyplType is not None:
nyplSource = _nyplSource
break

if nyplSource is None:
raise ResearchCatalogIdentifierError(
f'Invalid RC identifier: {identifier}')

return {
'nyplSource': nyplSource,
'nyplType': nyplType,
'id': id
}


def research_catalog_id_prefix(nyplSource: str, nyplType='bib'):
"""
Given a nyplSource (e.g. 'sierra-nypl') and nyplType (e.g. 'item'), returns
the relevant prefix used in the RC identifier (e.g. 'i')
"""
if nypl_core_source_mapping().get(nyplSource) is None:
raise ResearchCatalogIdentifierError(
f'Invalid nyplSource: {nyplSource}')

if not isinstance(nyplType, str):
raise ResearchCatalogIdentifierError(
f'Invalid nyplType: {nyplType}')

prefixKey = f'{nyplType}Prefix'
if nypl_core_source_mapping()[nyplSource].get(prefixKey) is None:
raise ResearchCatalogIdentifierError(f'Invalid nyplType: {nyplType}')

return nypl_core_source_mapping()[nyplSource][prefixKey]


def nypl_core_source_mapping():
"""
Builds a nypl-source-mapping by retrieving the mapping from NYPL-Core
"""
name = 'nypl-core-source-mapping'
if not CACHE.get(name) is None:
return CACHE[name]

url = os.environ.get('NYPL_CORE_SOURCE_MAPPING_URL',
'https://raw.githubusercontent.com/NYPL/nypl-core/master/mappings/recap-discovery/nypl-source-mapping.json') # noqa
try:
response = requests.get(url)
response.raise_for_status()
except RequestException as e:
raise ResearchCatalogIdentifierError(
'Failed to retrieve nypl-core source-mapping file from {url}:'
' {errorType} {errorMessage}'
.format(url=url, errorType=type(e), errorMessage=e)) from None

try:
CACHE[name] = response.json()
return CACHE[name]
except (JSONDecodeError, KeyError) as e:
raise ResearchCatalogIdentifierError(
'Failed to parse nypl-core source-mapping file: {errorType}'
' {errorMessage}'
.format(errorType=type(e), errorMessage=e)) from None


class ResearchCatalogIdentifierError(Exception):
def __init__(self, message=None):
self.message = message
72 changes: 72 additions & 0 deletions tests/test_research_catalog_identifier_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import pytest
import json

from nypl_py_utils.functions.research_catalog_identifier_helper import (
parse_research_catalog_identifier, research_catalog_id_prefix,
ResearchCatalogIdentifierError)

_TEST_MAPPING = {
'sierra-nypl': {
'organization': 'nyplOrg:0001',
'bibPrefix': 'b',
'holdingPrefix': 'h',
'itemPrefix': 'i'
},
'recap-pul': {
'organization': 'nyplOrg:0003',
'bibPrefix': 'pb',
'itemPrefix': 'pi'
},
'recap-cul': {
'organization': 'nyplOrg:0002',
'bibPrefix': 'cb',
'itemPrefix': 'ci'
},
'recap-hl': {
'organization': 'nyplOrg:0004',
'bibPrefix': 'hb',
'itemPrefix': 'hi'
}
}


class TestResearchCatalogIdentifierHelper:
@pytest.fixture(autouse=True)
def test_instance(self, requests_mock):
requests_mock.get(
'https://raw.githubusercontent.com/NYPL/nypl-core/master/mappings/recap-discovery/nypl-source-mapping.json', # noqa
text=json.dumps(_TEST_MAPPING))

def test_parse_research_catalog_identifier_parses_valid(self):
assert parse_research_catalog_identifier('b1234') == \
{'id': '1234', 'nyplSource': 'sierra-nypl', 'nyplType': 'bib'}
assert parse_research_catalog_identifier('cb1234') == \
{'id': '1234', 'nyplSource': 'recap-cul', 'nyplType': 'bib'}
assert parse_research_catalog_identifier('pi1234') == \
{'id': '1234', 'nyplSource': 'recap-pul', 'nyplType': 'item'}
assert parse_research_catalog_identifier('h1234') == \
{'id': '1234', 'nyplSource': 'sierra-nypl',
'nyplType': 'holding'}

def test_parse_research_catalog_identifier_fails_nonsense(self):
for invalidIdentifier in [None, 1234, 'z1234', '1234']:
with pytest.raises(ResearchCatalogIdentifierError):
parse_research_catalog_identifier(invalidIdentifier)

def test_research_catalog_id_prefix_parses_valid(self, mocker):
assert research_catalog_id_prefix('sierra-nypl') == 'b'
assert research_catalog_id_prefix('sierra-nypl', 'bib') == 'b'
assert research_catalog_id_prefix('sierra-nypl', 'item') == 'i'
assert research_catalog_id_prefix('sierra-nypl', 'holding') == 'h'
assert research_catalog_id_prefix('recap-pul', 'bib') == 'pb'
assert research_catalog_id_prefix('recap-hl', 'bib') == 'hb'
assert research_catalog_id_prefix('recap-hl', 'item') == 'hi'
assert research_catalog_id_prefix('recap-pul', 'item') == 'pi'

def test_research_catalog_id_prefix_fails_nonsense(self, mocker):
for invalidSource in ['sierra-cul', None, 'recap-nypl']:
with pytest.raises(ResearchCatalogIdentifierError):
research_catalog_id_prefix(invalidSource)
for invalidType in [None, '...']:
with pytest.raises(ResearchCatalogIdentifierError):
research_catalog_id_prefix('sierra-nypl', invalidType)

0 comments on commit 139a550

Please sign in to comment.