Skip to content

Commit

Permalink
Merge branch 'main' into SFR-1304-add-highlighting-to-api
Browse files Browse the repository at this point in the history
  • Loading branch information
mwbenowitz authored Oct 4, 2021
2 parents d3a92f5 + d305cb2 commit e3b7a29
Show file tree
Hide file tree
Showing 11 changed files with 102 additions and 44 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build-production.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ jobs:
run: |
docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG .
docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
docker tag $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG $ECR_REGISTRY/$ECR_REPOSITORY:latest
docker push $ECR_REGISTRY/$ECR_REPOSITORY:latest
docker tag $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG $ECR_REGISTRY/$ECR_REPOSITORY:production
docker push $ECR_REGISTRY/$ECR_REPOSITORY:production
- name: Force ECS Update
run: |
Expand Down
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# CHANGELOG

## unreleased -- v0.9.2
## 2021-10-04 -- v0.9.2
### Added
- Detect file types in s3 process and specify during storage process
- `readerVersion` parameter for `/search`, `/work` and `/edition` endpoints to control media types returned
Expand All @@ -9,6 +9,9 @@
- Improve clustering stability by improving individual error handling
- Handle relative links from redirects in proxy endpoint
- Add `embed` flag for HTML links
- Extended settings for `utils/proxy` epndoint to be more flexible
- Resolve issue with display of links when filtering by format
- Release stability via distinct production tag

## 2021-09-09 -- v0.9.1
### Fixed
Expand Down
2 changes: 1 addition & 1 deletion api/blueprints/drbUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def totalCounts():
return APIUtils.formatResponseObject(200, 'totalCounts', totalsSummary)


@utils.route('/proxy', methods=['GET', 'POST', 'PUT', 'HEAD'])
@utils.route('/proxy', methods=['GET', 'POST', 'PUT', 'HEAD', 'OPTIONS'])
@cross_origin(origins=os.environ.get('API_PROXY_CORS_ALLOWED', '*'))
def getProxyResponse():
proxyUrl = request.args.get('proxy_url')
Expand Down
39 changes: 24 additions & 15 deletions api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ class APIUtils():
]

FORMAT_CROSSWALK = {
'epub_zip': ['application/epub+zip', 'application/epub+xml', 'application/webpub+json'],
'epub_xml': ['application/epub+zip', 'application/epub+xml', 'application/webpub+json'],
'epub_zip': ['application/epub+zip', 'application/epub+xml'],
'epub_xml': ['application/epub+zip', 'application/epub+xml'],
'html': ['text/html'],
'html_edd': ['application/html+edd', 'application/x.html+edd'],
'pdf': ['application/pdf', 'application/webpub+json'],
'pdf': ['application/pdf'],
'webpub_json': ['application/webpub+json']
}

Expand Down Expand Up @@ -207,18 +207,13 @@ def formatEdition(

itemDict['links'] = []

validLinks = list(filter(lambda x: x.media_type in formats, item.links))\
if formats else item.links

if (
len(validLinks) < 1
or (
formats
and len(validLinks) == 1
and validLinks[0].media_type == 'application/webpub+json'
)
):
continue
if formats:
formats.append('application/webpub+json')
validLinks = list(filter(
lambda x: x.media_type in formats, item.links
))
else:
validLinks = item.links

for link in validLinks:
flags = link.flags
Expand All @@ -243,6 +238,8 @@ def formatEdition(
'flags': flags
})

itemDict['links'].sort(key=cls.sortByMediaType)

itemDict['rights'] = [
{
'source': rights.source,
Expand Down Expand Up @@ -273,6 +270,18 @@ def formatEdition(

return editionDict

@staticmethod
def sortByMediaType(link):
scores = {
'application/epub+xml': 1, 'application/epub+zip': 1,
'text/html': 2,
'application/pdf': 3,
'application/html+edd': 4,
'application/webpub+json': 5
}

return scores[link['mediaType']]

@classmethod
def formatRecord(cls, record, itemsByLink):
outRecord = {
Expand Down
61 changes: 43 additions & 18 deletions config/development.yaml
Original file line number Diff line number Diff line change
@@ -1,40 +1,51 @@
# LOGGING
LOG_LEVEL: info

# POSTGRES CONNECTION DETAILS
# POSTGRES_USER, POSTGRES_PSWD, POSTGRES_ADMIN_USER and POSTGRES_ADMIN_PSWD must be configured in secrets file
# POSTGRES_HOST: ''
# POSTGRES_NAME: ''
# POSTGRES_PORT: ''
POSTGRES_HOST: sfr-new-metadata-production-cluster.cluster-cvy7z512hcjg.us-east-1.rds.amazonaws.com
POSTGRES_NAME: dcdw_qa
POSTGRES_PORT: '5432'

# REDIS CONFIGURATION
# REDIS_HOST: ''
# REDIS_PORT: ''
# REDIS_HOST configured as part of ECS deployment
REDIS_PORT: '6379'

# ELASTICSEARCH CONFIGURATION
# ELASTICSEARCH_INDEX, ELASTICSEARCH_HOST must be configured in secrets file
# ELASTICSEARCH_PORT: ''
# ELASTICSEARCH_TIMEOUT: ''
ELASTICSEARCH_PORT: '443'
ELASTICSEARCH_TIMEOUT: '5'

# RABBITMQ CONFIGURATION
# RABBIT_HOST: ''
# RABBIT_PORT: ''
OCLC_QUEUE: oclc_catalog
EPUB_QUEUE: epub_files
# RABBIT_USER and RABBIT_PSWD must be configured in secrets file
RABBIT_HOST: qa.rmq.aws.nypl.org
RABBIT_PORT: '5672'
RABBIT_VIRTUAL_HOST: /sfr
RABBIT_EXCHANGE: sfrIngestExchange
OCLC_QUEUE: sfrOCLCCatalog
OCLC_ROUTING_KEY: sfrOCLCCatalog
FILE_QUEUE: sfrS3Files
FILE_ROUTING_KEY: sfrS3Files

# HATHITRUST CONFIGURATION
# HATHI_API_KEY and HATHI_API_SECRET must be configured as secrets
HATHI_DATAFILES: https://www.hathitrust.org/filebrowser/download/244651
HATHI_API_ROOT: https://babel.hathitrust.org/cgi/htd

# OCLC CONFIGURATION
# OCLC_API_KEY must be configured in secrets file
OCLC_QUERY_LIMIT: '390000'

# AWS CONFIGURATION
# AWS_ACCESS and AWS_SECRET must be configured in secrets file
AWS_REGION: 'us-east-1'
EPUB_BUCKET: 'sfr_files'
AWS_REGION: us-east-1
FILE_BUCKET: drb-files-qa

# NYPL BIB REPLICA DB CONNECTION
# NYPL_BIB_USER, NYPL_BIB_PSWD must be configured in secrets file
# NYPL_BIB_HOST: ''
# NYPL_BIB_NAME: ''
# NYPL_BIB_PORT: ''
# NYPL_BIB_USER and NYPL_BIB_PSWD must be configured in secrets file
NYPL_BIB_HOST: bib-service-production-rep.cvy7z512hcjg.us-east-1.rds.amazonaws.com
NYPL_BIB_NAME: bib_service_production
NYPL_BIB_PORT: '5432'

# NYPL Location Code Lookup
NYPL_LOCATIONS_BY_CODE: https://nypl-core-objects-mapping-qa.s3.amazonaws.com/by_sierra_location.json
Expand All @@ -52,9 +63,23 @@ BARDO_CCE_API: http://sfr-bardo-copyright-development.us-east-1.elasticbeanstalk

# Project MUSE MARC endpoint
MUSE_MARC_URL: https://about.muse.jhu.edu/lib/metadata?format=marc&content=book&include=oa&filename=open_access_books&no_auth=1
MUSE_CSV_URL: https://about.muse.jhu.edu/static/org/local/holdings/muse_book_metadata.csv

# DOAB OAI-PMH endpoint
DOAB_OAI_URL: http://www.doabooks.org/oai?
DOAB_OAI_URL: https://directory.doabooks.org/oai/request?

# Google Books API
# GOOGLE_BOOKS_KEY must be configured as a secret

# ContentCafe2 API
# CONTENT_CAFE_USER and CONTENT_CAFE_PSWD must be configured as secrets

# SmartSheet API
# SMARTSHEET_API_TOKEN must be configured as a secret
SMARTSHEET_SHEET_ID: '3683038090553220'

# Default Cover Image for OPDS2 Feed
DEFAULT_COVER_URL: https://drb-files-qa.s3.amazonaws.com/covers/default/defaultCover.png

# ePub-to-Webpub Conversion Service
WEBPUB_CONVERSION_URL: https://epub-to-webpub.vercel.app
Expand Down
2 changes: 1 addition & 1 deletion managers/parsers/mdpiParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def generatePDFLinks(self, s3Root):

return [
(manifestURI, {'reader': True}, 'application/webpub+json', (manifestPath, manifestJSON), None),
(pdfSourceURI, {'download': True}, self.mediaType, None, None)
(pdfSourceURI, {'download': True}, 'application/pdf', None, None)
]

def generateManifest(self, sourceURI, manifestURI):
Expand Down
2 changes: 1 addition & 1 deletion swagger.v4.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"swagger": "2.0",
"info": {
"version": "v0.9.0",
"version": "v0.9.2",
"title": "Digital Research Books Search API",
"description": "RESTful API for the Digital Research Books Project"
},
Expand Down
4 changes: 2 additions & 2 deletions task-definition.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
"essential": true,
"command": [
"--process", "APIProcess",
"--environment", "qa"
"--environment", "development"
],
"environment": [
{
"name": "ENVIRONMENT",
"value": "qa"
"value": "development"
},
{
"name": "ELASTICSEARCH_HOST",
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test_api_es.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,14 +665,14 @@ def test_createFilterClausesAndAggregations_w_format(self, testInstance, mocker)
mocker.call('exists', field='editions.formats'),
mocker.call(
'terms',
editions__formats=['application/pdf', 'application/webpub+json', 'application/html+edd', 'application/x.html+edd']
editions__formats=['application/pdf', 'application/html+edd', 'application/x.html+edd']
)
])
mockAgg.assert_has_calls([
mocker.call('filter', exists={'field': 'editions.formats'}),
mocker.call(
'filter',
terms={'editions.formats': ['application/pdf', 'application/webpub+json', 'application/html+edd', 'application/x.html+edd']}
terms={'editions.formats': ['application/pdf', 'application/html+edd', 'application/x.html+edd']}
)
])

Expand Down
21 changes: 20 additions & 1 deletion tests/unit/test_api_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from hashlib import scrypt
import pytest
from random import shuffle

from api.utils import APIUtils

Expand Down Expand Up @@ -566,4 +567,22 @@ def test_addWorkMeta(self):
APIUtils.addWorkMeta(testWork, field1='value1', field2=['value2'])

assert testWork['_meta']['field1'] == 'value1'
assert testWork['_meta']['field2'] == ['value2']
assert testWork['_meta']['field2'] == ['value2']

def test_sortByMediaType(self):
testList = [
{'id': 2, 'mediaType': 'text/html'},
{'id': 1, 'mediaType': 'application/epub+xml'},
{'id': 4, 'mediaType': 'application/html+edd'},
{'id': 1, 'mediaType': 'application/epub+zip'},
{'id': 5, 'mediaType': 'application/webpub+json'},
{'id': 3, 'mediaType': 'application/pdf'}
]

shuffle(testList)
testList.sort(key=APIUtils.sortByMediaType)
assert [i['id'] for i in testList] == [1, 1, 2, 3, 4, 5]

shuffle(testList)
testList.sort(key=APIUtils.sortByMediaType)
assert [i['id'] for i in testList] == [1, 1, 2, 3, 4, 5]
2 changes: 2 additions & 0 deletions tests/unit/test_api_utils_blueprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ def test_getProxyResponse_direct_success(self, testApp, mocker):
assert testAPIResponse.status_code == 200
assert testAPIResponse.response == [b'Test Content']
assert testAPIResponse.headers['Media-Type'] == 'allow'
assert testAPIResponse.headers['Access-Control-Allow-Origin'] ==\
'*'

mockHead.assert_called_once_with(
'https://www.testURL.com',
Expand Down

0 comments on commit e3b7a29

Please sign in to comment.