-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
NO-REF: Refactor NYPL Process (#433)
- Loading branch information
1 parent
b5381ce
commit 454b342
Showing
9 changed files
with
265 additions
and
414 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .sources.nypl_bib_service import NYPLBibService |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
from datetime import datetime, timedelta, timezone | ||
import os | ||
import requests | ||
from typing import Optional | ||
|
||
from logger import createLog | ||
from managers.db import DBManager | ||
from static.manager import StaticManager | ||
from managers.nyplApi import NyplApiManager | ||
from mappings.nypl import NYPLMapping | ||
from .source_service import SourceService | ||
from sqlalchemy import text | ||
|
||
|
||
logger = createLog(__name__) | ||
|
||
|
||
class NYPLBibService(SourceService): | ||
def __init__(self): | ||
self.bib_db_connection = DBManager( | ||
user=os.environ['NYPL_BIB_USER'], | ||
pswd=os.environ['NYPL_BIB_PSWD'], | ||
host=os.environ['NYPL_BIB_HOST'], | ||
port=os.environ['NYPL_BIB_PORT'], | ||
db=os.environ['NYPL_BIB_NAME'] | ||
) | ||
self.bib_db_connection.generateEngine() | ||
|
||
self.nypl_api_manager = NyplApiManager() | ||
self.nypl_api_manager.generateAccessToken() | ||
|
||
self.location_codes = self.load_location_codes() | ||
self.cce_api = os.environ['BARDO_CCE_API'] | ||
|
||
self.static_manager = StaticManager() | ||
|
||
def get_records( | ||
self, | ||
full_import: bool=False, | ||
start_timestamp: datetime=None, | ||
offset: Optional[int]=None, | ||
limit: Optional[int]=None | ||
) -> list[NYPLMapping]: | ||
records = [] | ||
nypl_bib_query = 'SELECT * FROM bib WHERE publish_year <= 1965' | ||
|
||
if not full_import: | ||
nypl_bib_query += ' and updated_date > ' | ||
|
||
if start_timestamp: | ||
nypl_bib_query += "'{}'".format(start_timestamp) | ||
else: | ||
start_date_time = datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=24) | ||
nypl_bib_query += "'{}'".format(start_date_time.strftime('%Y-%m-%dT%H:%M:%S%z')) | ||
|
||
if offset: | ||
nypl_bib_query += ' OFFSET {}'.format(offset) | ||
|
||
if limit: | ||
nypl_bib_query += ' LIMIT {}'.format(limit) | ||
|
||
with self.bib_db_connection.engine.connect() as db_connection: | ||
bib_results = db_connection.execution_options(stream_results=True).execute(text(nypl_bib_query)) | ||
bib_result_mappings = [bib_result_mapping for bib_result_mapping in bib_results.mappings()] | ||
|
||
for bib_result_mapping in bib_result_mappings: | ||
if bib_result_mapping['var_fields'] is None: | ||
continue | ||
|
||
nypl_bib_record = self.parse_nypl_bib(bib_result_mapping) | ||
|
||
if nypl_bib_record: | ||
records.append(nypl_bib_record) | ||
|
||
return records | ||
|
||
def parse_nypl_bib(self, bib) -> Optional[NYPLMapping]: | ||
try: | ||
if self.is_pd_research_bib(dict(bib)): | ||
bib_items = self.fetch_bib_items(dict(bib)) | ||
|
||
nypl_record = NYPLMapping(bib, bib_items, self.static_manager.statics, self.location_codes) | ||
nypl_record.applyMapping() | ||
|
||
return nypl_record | ||
|
||
return None | ||
except Exception: | ||
logger.exception('Failed to parse NYPL bib {}'.format(bib.get('id'))) | ||
return None | ||
|
||
def fetch_bib_items(self, bib): | ||
bib_endpoint = 'bibs/{}/{}/items'.format(bib['nypl_source'], bib['id']) | ||
|
||
return self.nypl_api_manager.queryApi(bib_endpoint).get('data', []) | ||
|
||
def load_location_codes(self): | ||
return requests.get(os.environ['NYPL_LOCATIONS_BY_CODE']).json() | ||
|
||
def is_pd_research_bib(self, bib): | ||
current_year = datetime.today().year | ||
|
||
try: | ||
pub_year = int(bib['publish_year']) | ||
except Exception: | ||
pub_year = current_year | ||
|
||
if pub_year > 1965: | ||
return False | ||
elif pub_year > current_year - 95: | ||
copyright_status = self.get_copyright_status(bib['var_fields']) | ||
|
||
if not copyright_status: | ||
return False | ||
|
||
bib_status = self.nypl_api_manager.queryApi('bibs/{}/{}/is-research'.format(bib['nypl_source'], bib['id'])) | ||
|
||
return bib_status.get('isResearch', False) is True | ||
|
||
def get_copyright_status(self, var_fields): | ||
lccn_data = list(filter(lambda field: field.get('marcTag', None) == '010', var_fields)) | ||
|
||
if not len(lccn_data) == 1: | ||
return False | ||
|
||
lccn_no = lccn_data[0]['subfields'][0]['content'].replace('sn', '').strip() | ||
|
||
copyright_url = f'{self.cce_api}/lccn/{lccn_no}' | ||
|
||
copyright_response = requests.get(copyright_url) | ||
|
||
if copyright_response.status_code != 200: | ||
return False | ||
|
||
copyright_data = copyright_response.json() | ||
|
||
if len(copyright_data['data']['results']) > 0: | ||
return False if len(copyright_data['data']['results'][0]['renewals']) > 0 else True | ||
|
||
return False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from abc import ABC, abstractmethod | ||
from datetime import datetime | ||
from typing import Optional | ||
|
||
from mappings.abstractMapping import AbstractMapping | ||
|
||
class SourceService(ABC): | ||
|
||
@abstractmethod | ||
def get_records( | ||
self, | ||
full_import: bool=False, | ||
start_timestamp: Optional[datetime]=None, | ||
offset: Optional[int]=None, | ||
limit: Optional[int]=None | ||
) -> list[AbstractMapping]: | ||
pass |
Empty file.
Empty file.
19 changes: 19 additions & 0 deletions
19
tests/integration/services/sources/test_nypl_bib_service.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from datetime import datetime, timezone, timedelta | ||
import pytest | ||
|
||
from load_env import load_env_file | ||
from services import NYPLBibService | ||
|
||
class TestNYPLBibService: | ||
@pytest.fixture | ||
def test_instance(self): | ||
load_env_file('local', file_string='config/local.yaml') | ||
return NYPLBibService() | ||
|
||
def test_get_records(self, test_instance: NYPLBibService): | ||
records = test_instance.get_records( | ||
start_timestamp=datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=24), | ||
limit=100 | ||
) | ||
|
||
assert records is not None |
Oops, something went wrong.