diff --git a/.github/workflows/build-production.yaml b/.github/workflows/build-production.yaml index 59b558ff39..98b535d08b 100644 --- a/.github/workflows/build-production.yaml +++ b/.github/workflows/build-production.yaml @@ -32,8 +32,8 @@ jobs: run: | docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG - docker tag $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG $ECR_REGISTRY/$ECR_REPOSITORY:latest - docker push $ECR_REGISTRY/$ECR_REPOSITORY:latest + docker tag $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG $ECR_REGISTRY/$ECR_REPOSITORY:production + docker push $ECR_REGISTRY/$ECR_REPOSITORY:production - name: Force ECS Update run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index e047147cfc..dd45387d3a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # CHANGELOG -## unreleased -- v0.9.2 +## 2021-10-04 -- v0.9.2 ### Added - Detect file types in s3 process and specify during storage process - `readerVersion` parameter for `/search`, `/work` and `/edition` endpoints to control media types returned @@ -9,6 +9,9 @@ - Improve clustering stability by improving individual error handling - Handle relative links from redirects in proxy endpoint - Add `embed` flag for HTML links +- Extended settings for `utils/proxy` epndoint to be more flexible +- Resolve issue with display of links when filtering by format +- Release stability via distinct production tag ## 2021-09-09 -- v0.9.1 ### Fixed diff --git a/api/blueprints/drbUtils.py b/api/blueprints/drbUtils.py index 509a08c871..1bb8c90dbd 100644 --- a/api/blueprints/drbUtils.py +++ b/api/blueprints/drbUtils.py @@ -47,7 +47,7 @@ def totalCounts(): return APIUtils.formatResponseObject(200, 'totalCounts', totalsSummary) -@utils.route('/proxy', methods=['GET', 'POST', 'PUT', 'HEAD']) +@utils.route('/proxy', methods=['GET', 'POST', 'PUT', 'HEAD', 'OPTIONS']) @cross_origin(origins=os.environ.get('API_PROXY_CORS_ALLOWED', '*')) def getProxyResponse(): proxyUrl = request.args.get('proxy_url') diff --git a/api/utils.py b/api/utils.py index f87227a7e7..b1f645a123 100644 --- a/api/utils.py +++ b/api/utils.py @@ -13,11 +13,11 @@ class APIUtils(): ] FORMAT_CROSSWALK = { - 'epub_zip': ['application/epub+zip', 'application/epub+xml', 'application/webpub+json'], - 'epub_xml': ['application/epub+zip', 'application/epub+xml', 'application/webpub+json'], + 'epub_zip': ['application/epub+zip', 'application/epub+xml'], + 'epub_xml': ['application/epub+zip', 'application/epub+xml'], 'html': ['text/html'], 'html_edd': ['application/html+edd', 'application/x.html+edd'], - 'pdf': ['application/pdf', 'application/webpub+json'], + 'pdf': ['application/pdf'], 'webpub_json': ['application/webpub+json'] } @@ -207,18 +207,13 @@ def formatEdition( itemDict['links'] = [] - validLinks = list(filter(lambda x: x.media_type in formats, item.links))\ - if formats else item.links - - if ( - len(validLinks) < 1 - or ( - formats - and len(validLinks) == 1 - and validLinks[0].media_type == 'application/webpub+json' - ) - ): - continue + if formats: + formats.append('application/webpub+json') + validLinks = list(filter( + lambda x: x.media_type in formats, item.links + )) + else: + validLinks = item.links for link in validLinks: flags = link.flags @@ -243,6 +238,8 @@ def formatEdition( 'flags': flags }) + itemDict['links'].sort(key=cls.sortByMediaType) + itemDict['rights'] = [ { 'source': rights.source, @@ -273,6 +270,18 @@ def formatEdition( return editionDict + @staticmethod + def sortByMediaType(link): + scores = { + 'application/epub+xml': 1, 'application/epub+zip': 1, + 'text/html': 2, + 'application/pdf': 3, + 'application/html+edd': 4, + 'application/webpub+json': 5 + } + + return scores[link['mediaType']] + @classmethod def formatRecord(cls, record, itemsByLink): outRecord = { diff --git a/config/development.yaml b/config/development.yaml index b87c250d5e..baa32c0308 100644 --- a/config/development.yaml +++ b/config/development.yaml @@ -1,40 +1,51 @@ +# LOGGING +LOG_LEVEL: info + # POSTGRES CONNECTION DETAILS # POSTGRES_USER, POSTGRES_PSWD, POSTGRES_ADMIN_USER and POSTGRES_ADMIN_PSWD must be configured in secrets file -# POSTGRES_HOST: '' -# POSTGRES_NAME: '' -# POSTGRES_PORT: '' +POSTGRES_HOST: sfr-new-metadata-production-cluster.cluster-cvy7z512hcjg.us-east-1.rds.amazonaws.com +POSTGRES_NAME: dcdw_qa +POSTGRES_PORT: '5432' # REDIS CONFIGURATION -# REDIS_HOST: '' -# REDIS_PORT: '' +# REDIS_HOST configured as part of ECS deployment +REDIS_PORT: '6379' # ELASTICSEARCH CONFIGURATION # ELASTICSEARCH_INDEX, ELASTICSEARCH_HOST must be configured in secrets file -# ELASTICSEARCH_PORT: '' -# ELASTICSEARCH_TIMEOUT: '' +ELASTICSEARCH_PORT: '443' +ELASTICSEARCH_TIMEOUT: '5' # RABBITMQ CONFIGURATION -# RABBIT_HOST: '' -# RABBIT_PORT: '' -OCLC_QUEUE: oclc_catalog -EPUB_QUEUE: epub_files +# RABBIT_USER and RABBIT_PSWD must be configured in secrets file +RABBIT_HOST: qa.rmq.aws.nypl.org +RABBIT_PORT: '5672' +RABBIT_VIRTUAL_HOST: /sfr +RABBIT_EXCHANGE: sfrIngestExchange +OCLC_QUEUE: sfrOCLCCatalog +OCLC_ROUTING_KEY: sfrOCLCCatalog +FILE_QUEUE: sfrS3Files +FILE_ROUTING_KEY: sfrS3Files # HATHITRUST CONFIGURATION +# HATHI_API_KEY and HATHI_API_SECRET must be configured as secrets HATHI_DATAFILES: https://www.hathitrust.org/filebrowser/download/244651 +HATHI_API_ROOT: https://babel.hathitrust.org/cgi/htd # OCLC CONFIGURATION # OCLC_API_KEY must be configured in secrets file +OCLC_QUERY_LIMIT: '390000' # AWS CONFIGURATION # AWS_ACCESS and AWS_SECRET must be configured in secrets file -AWS_REGION: 'us-east-1' -EPUB_BUCKET: 'sfr_files' +AWS_REGION: us-east-1 +FILE_BUCKET: drb-files-qa # NYPL BIB REPLICA DB CONNECTION -# NYPL_BIB_USER, NYPL_BIB_PSWD must be configured in secrets file -# NYPL_BIB_HOST: '' -# NYPL_BIB_NAME: '' -# NYPL_BIB_PORT: '' +# NYPL_BIB_USER and NYPL_BIB_PSWD must be configured in secrets file +NYPL_BIB_HOST: bib-service-production-rep.cvy7z512hcjg.us-east-1.rds.amazonaws.com +NYPL_BIB_NAME: bib_service_production +NYPL_BIB_PORT: '5432' # NYPL Location Code Lookup NYPL_LOCATIONS_BY_CODE: https://nypl-core-objects-mapping-qa.s3.amazonaws.com/by_sierra_location.json @@ -52,9 +63,23 @@ BARDO_CCE_API: http://sfr-bardo-copyright-development.us-east-1.elasticbeanstalk # Project MUSE MARC endpoint MUSE_MARC_URL: https://about.muse.jhu.edu/lib/metadata?format=marc&content=book&include=oa&filename=open_access_books&no_auth=1 +MUSE_CSV_URL: https://about.muse.jhu.edu/static/org/local/holdings/muse_book_metadata.csv # DOAB OAI-PMH endpoint -DOAB_OAI_URL: http://www.doabooks.org/oai? +DOAB_OAI_URL: https://directory.doabooks.org/oai/request? + +# Google Books API +# GOOGLE_BOOKS_KEY must be configured as a secret + +# ContentCafe2 API +# CONTENT_CAFE_USER and CONTENT_CAFE_PSWD must be configured as secrets + +# SmartSheet API +# SMARTSHEET_API_TOKEN must be configured as a secret +SMARTSHEET_SHEET_ID: '3683038090553220' + +# Default Cover Image for OPDS2 Feed +DEFAULT_COVER_URL: https://drb-files-qa.s3.amazonaws.com/covers/default/defaultCover.png # ePub-to-Webpub Conversion Service WEBPUB_CONVERSION_URL: https://epub-to-webpub.vercel.app diff --git a/managers/parsers/mdpiParser.py b/managers/parsers/mdpiParser.py index 0b287129ae..ed69ddff44 100644 --- a/managers/parsers/mdpiParser.py +++ b/managers/parsers/mdpiParser.py @@ -28,7 +28,7 @@ def generatePDFLinks(self, s3Root): return [ (manifestURI, {'reader': True}, 'application/webpub+json', (manifestPath, manifestJSON), None), - (pdfSourceURI, {'download': True}, self.mediaType, None, None) + (pdfSourceURI, {'download': True}, 'application/pdf', None, None) ] def generateManifest(self, sourceURI, manifestURI): diff --git a/swagger.v4.json b/swagger.v4.json index 92628aca21..166dcebe0f 100644 --- a/swagger.v4.json +++ b/swagger.v4.json @@ -1,7 +1,7 @@ { "swagger": "2.0", "info": { - "version": "v0.9.0", + "version": "v0.9.2", "title": "Digital Research Books Search API", "description": "RESTful API for the Digital Research Books Project" }, diff --git a/task-definition.json b/task-definition.json index cea88e719c..fd4deb559b 100644 --- a/task-definition.json +++ b/task-definition.json @@ -17,12 +17,12 @@ "essential": true, "command": [ "--process", "APIProcess", - "--environment", "qa" + "--environment", "development" ], "environment": [ { "name": "ENVIRONMENT", - "value": "qa" + "value": "development" }, { "name": "ELASTICSEARCH_HOST", diff --git a/tests/unit/test_api_es.py b/tests/unit/test_api_es.py index 173d3fb65f..fc3f9e4830 100644 --- a/tests/unit/test_api_es.py +++ b/tests/unit/test_api_es.py @@ -665,14 +665,14 @@ def test_createFilterClausesAndAggregations_w_format(self, testInstance, mocker) mocker.call('exists', field='editions.formats'), mocker.call( 'terms', - editions__formats=['application/pdf', 'application/webpub+json', 'application/html+edd', 'application/x.html+edd'] + editions__formats=['application/pdf', 'application/html+edd', 'application/x.html+edd'] ) ]) mockAgg.assert_has_calls([ mocker.call('filter', exists={'field': 'editions.formats'}), mocker.call( 'filter', - terms={'editions.formats': ['application/pdf', 'application/webpub+json', 'application/html+edd', 'application/x.html+edd']} + terms={'editions.formats': ['application/pdf', 'application/html+edd', 'application/x.html+edd']} ) ]) diff --git a/tests/unit/test_api_utils.py b/tests/unit/test_api_utils.py index 1587c64b72..3772684433 100644 --- a/tests/unit/test_api_utils.py +++ b/tests/unit/test_api_utils.py @@ -1,5 +1,6 @@ from hashlib import scrypt import pytest +from random import shuffle from api.utils import APIUtils @@ -566,4 +567,22 @@ def test_addWorkMeta(self): APIUtils.addWorkMeta(testWork, field1='value1', field2=['value2']) assert testWork['_meta']['field1'] == 'value1' - assert testWork['_meta']['field2'] == ['value2'] \ No newline at end of file + assert testWork['_meta']['field2'] == ['value2'] + + def test_sortByMediaType(self): + testList = [ + {'id': 2, 'mediaType': 'text/html'}, + {'id': 1, 'mediaType': 'application/epub+xml'}, + {'id': 4, 'mediaType': 'application/html+edd'}, + {'id': 1, 'mediaType': 'application/epub+zip'}, + {'id': 5, 'mediaType': 'application/webpub+json'}, + {'id': 3, 'mediaType': 'application/pdf'} + ] + + shuffle(testList) + testList.sort(key=APIUtils.sortByMediaType) + assert [i['id'] for i in testList] == [1, 1, 2, 3, 4, 5] + + shuffle(testList) + testList.sort(key=APIUtils.sortByMediaType) + assert [i['id'] for i in testList] == [1, 1, 2, 3, 4, 5] diff --git a/tests/unit/test_api_utils_blueprint.py b/tests/unit/test_api_utils_blueprint.py index e943267db5..9ec94e894c 100644 --- a/tests/unit/test_api_utils_blueprint.py +++ b/tests/unit/test_api_utils_blueprint.py @@ -91,6 +91,8 @@ def test_getProxyResponse_direct_success(self, testApp, mocker): assert testAPIResponse.status_code == 200 assert testAPIResponse.response == [b'Test Content'] assert testAPIResponse.headers['Media-Type'] == 'allow' + assert testAPIResponse.headers['Access-Control-Allow-Origin'] ==\ + '*' mockHead.assert_called_once_with( 'https://www.testURL.com',