From 4075859ba311fb6f0efbc0c6da6657b8fa543b96 Mon Sep 17 00:00:00 2001 From: Lyndsey Jane Moulds <2042238+Apophenia@users.noreply.github.com> Date: Mon, 30 Dec 2024 14:29:08 -0500 Subject: [PATCH 01/17] Parse access types and set flags --- .../sources/publisher_backlist_service.py | 44 ++++++++++++++----- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/services/sources/publisher_backlist_service.py b/services/sources/publisher_backlist_service.py index 4752188863..38c982af6d 100644 --- a/services/sources/publisher_backlist_service.py +++ b/services/sources/publisher_backlist_service.py @@ -26,9 +26,10 @@ def __init__(self): self.s3_manager.createS3Client() self.title_prefix = 'titles/publisher_backlist' self.file_bucket = os.environ['FILE_BUCKET'] - + self.limited_file_bucket = self.build_limited_bucket(self.file_bucket) + self.drive_service = GoogleDriveService() - + self.db_manager = DBManager() self.db_manager.generateEngine() @@ -215,31 +216,31 @@ def get_publisher_backlist_records(self, records_response = requests.get(url, headers=headers) records_response_json = records_response.json() - + publisher_backlist_records.extend(records_response_json.get('records', [])) - + while 'offset' in records_response_json: next_page_url = url + f"&offset={records_response_json['offset']}" - + records_response = requests.get(next_page_url, headers=headers) records_response_json = records_response.json() - + publisher_backlist_records.extend(records_response_json.get('records', [])) return publisher_backlist_records - - def add_has_part_mapping(self, s3_url: str, record: Record): + + def add_has_part_mapping(self, s3_url: str, record: Record, is_downloadable: bool=False, is_login_limited: bool=True): item_no = '1' - media_tpye = 'application/pdf' + media_type = 'application/pdf' flags = { 'catalog': False, - 'download': True, + 'download': is_downloadable, 'reader': False, 'embed': False, - **({'nypl_login': True} if 'in_copyright' in record.rights else {}) + 'nypl_login': is_login_limited, } - record.has_part.append('|'.join([item_no, s3_url, record.source, media_tpye, json.dumps(flags)])) + record.has_part.append('|'.join([item_no, s3_url, record.source, media_type, json.dumps(flags)])) def store_pdf_manifest(self, record: Record): for link in record.has_part: @@ -283,3 +284,22 @@ def generate_manifest(record, source_url, manifest_url): }) return manifest.toJson() + + @staticmethod + def parse_permissions(permissions: str) -> dict: + if permissions == 'Full access': + return {'is_downloadable': True, 'is_login_limited': False} + if permissions == 'Partial access/read only/no download/no login': + return {'is_downloadable': False, 'is_login_limited': False} + if permissions == 'Limited Access/login for read/no download': + return {'is_downloadable': False, 'is_login_limited': True} + else: + return {'is_downloadable': False, 'is_login_limited': True} + + @staticmethod + def build_limited_bucket(bucket: str) -> str: + split_bucket = os.environ['FILE_BUCKET'].rsplit('-', 1) + split_bucket.insert(len(split_bucket)-1, '-limited-') + return ''.join(split_bucket) + + From 725dbe81dba3b38709a2f3982993332797173ebb Mon Sep 17 00:00:00 2001 From: Lyndsey Jane Moulds <2042238+Apophenia@users.noreply.github.com> Date: Mon, 30 Dec 2024 14:35:42 -0500 Subject: [PATCH 02/17] newline --- services/sources/publisher_backlist_service.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/services/sources/publisher_backlist_service.py b/services/sources/publisher_backlist_service.py index 38c982af6d..4cbe4e0e9a 100644 --- a/services/sources/publisher_backlist_service.py +++ b/services/sources/publisher_backlist_service.py @@ -301,5 +301,3 @@ def build_limited_bucket(bucket: str) -> str: split_bucket = os.environ['FILE_BUCKET'].rsplit('-', 1) split_bucket.insert(len(split_bucket)-1, '-limited-') return ''.join(split_bucket) - - From bc8ed675b0ca6aacbf8b71582e069d6e651150d5 Mon Sep 17 00:00:00 2001 From: Lyndsey Jane Moulds <2042238+Apophenia@users.noreply.github.com> Date: Fri, 3 Jan 2025 10:30:46 -0500 Subject: [PATCH 03/17] Default to least permissive limited access option --- services/sources/publisher_backlist_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/sources/publisher_backlist_service.py b/services/sources/publisher_backlist_service.py index 4cbe4e0e9a..87eef8d3d0 100644 --- a/services/sources/publisher_backlist_service.py +++ b/services/sources/publisher_backlist_service.py @@ -291,8 +291,8 @@ def parse_permissions(permissions: str) -> dict: return {'is_downloadable': True, 'is_login_limited': False} if permissions == 'Partial access/read only/no download/no login': return {'is_downloadable': False, 'is_login_limited': False} - if permissions == 'Limited Access/login for read/no download': - return {'is_downloadable': False, 'is_login_limited': True} + if permissions == 'Limited Access/login for read & download': + return {'is_downloadable': True, 'is_login_limited': True} else: return {'is_downloadable': False, 'is_login_limited': True} From 1e7c7f150d1e16bd0731110466aff24b10954943 Mon Sep 17 00:00:00 2001 From: Lyndsey Jane Moulds <2042238+Apophenia@users.noreply.github.com> Date: Fri, 3 Jan 2025 15:14:04 -0500 Subject: [PATCH 04/17] Bucket logic and publisher->project to match manifest path --- services/sources/publisher_backlist_service.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/services/sources/publisher_backlist_service.py b/services/sources/publisher_backlist_service.py index 87eef8d3d0..86e930a34a 100644 --- a/services/sources/publisher_backlist_service.py +++ b/services/sources/publisher_backlist_service.py @@ -150,8 +150,11 @@ def get_records( for record in records: try: record_metadata = record.get('fields') - - file_id = f'{self.drive_service.id_from_url(record_metadata.get("DRB_File Location"))}' + try: + file_id = f'{self.drive_service.id_from_url(record_metadata.get("DRB_File Location"))}' + except Exception: + logger.error(f'Could not extract a Drive identifier from {record_metadata.get("DRB_Record ID")}') + continue file_name = self.drive_service.get_file_metadata(file_id).get('name') file = self.drive_service.get_drive_file(file_id) @@ -159,6 +162,11 @@ def get_records( logger.error(f'Failed to retrieve file for {record_metadata.get("DRB_Record ID")} from Google Drive') continue + record_permissions = self.parse_permissions(record_metadata.get('Access type in DRB (from Access types)')[0]) + if not record_permissions['is_login_limited']: + bucket = self.file_bucket + else: + bucket = self.limited_file_bucket bucket = self.file_bucket # TODO: if record is limited access, upload to limited access bucket s3_path = f'{self.title_prefix}/{record_metadata["Publisher (from Projects)"][0]}/{file_name}' s3_response = self.s3_manager.putObjectInBucket(file.getvalue(), s3_path, bucket) From e6547a936f9ccf62fa323ebf3b2188723c78fe20 Mon Sep 17 00:00:00 2001 From: Lyndsey Jane Moulds <2042238+Apophenia@users.noreply.github.com> Date: Fri, 3 Jan 2025 15:24:28 -0500 Subject: [PATCH 05/17] Fix commit error --- services/sources/publisher_backlist_service.py | 1 - 1 file changed, 1 deletion(-) diff --git a/services/sources/publisher_backlist_service.py b/services/sources/publisher_backlist_service.py index 86e930a34a..d377ea4adb 100644 --- a/services/sources/publisher_backlist_service.py +++ b/services/sources/publisher_backlist_service.py @@ -167,7 +167,6 @@ def get_records( bucket = self.file_bucket else: bucket = self.limited_file_bucket - bucket = self.file_bucket # TODO: if record is limited access, upload to limited access bucket s3_path = f'{self.title_prefix}/{record_metadata["Publisher (from Projects)"][0]}/{file_name}' s3_response = self.s3_manager.putObjectInBucket(file.getvalue(), s3_path, bucket) From ec9ef1f68184a6503ccbbdf74e4d5b6c7a8bed82 Mon Sep 17 00:00:00 2001 From: Lyndsey Jane Moulds <2042238+Apophenia@users.noreply.github.com> Date: Fri, 3 Jan 2025 15:30:42 -0500 Subject: [PATCH 06/17] Build limited bucket name from string; remove static method --- services/sources/publisher_backlist_service.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/services/sources/publisher_backlist_service.py b/services/sources/publisher_backlist_service.py index d377ea4adb..59fd457233 100644 --- a/services/sources/publisher_backlist_service.py +++ b/services/sources/publisher_backlist_service.py @@ -26,7 +26,7 @@ def __init__(self): self.s3_manager.createS3Client() self.title_prefix = 'titles/publisher_backlist' self.file_bucket = os.environ['FILE_BUCKET'] - self.limited_file_bucket = self.build_limited_bucket(self.file_bucket) + self.limited_file_bucket = f'drb-files-limited-{os.environment.get('ENVIRONMENT', 'qa')}' self.drive_service = GoogleDriveService() @@ -302,9 +302,3 @@ def parse_permissions(permissions: str) -> dict: return {'is_downloadable': True, 'is_login_limited': True} else: return {'is_downloadable': False, 'is_login_limited': True} - - @staticmethod - def build_limited_bucket(bucket: str) -> str: - split_bucket = os.environ['FILE_BUCKET'].rsplit('-', 1) - split_bucket.insert(len(split_bucket)-1, '-limited-') - return ''.join(split_bucket) From f84ec29008b931df899c0d74fc09a3dfc9da6ee3 Mon Sep 17 00:00:00 2001 From: Lyndsey Jane Moulds <2042238+Apophenia@users.noreply.github.com> Date: Fri, 3 Jan 2025 15:35:11 -0500 Subject: [PATCH 07/17] Quote error --- services/sources/publisher_backlist_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/sources/publisher_backlist_service.py b/services/sources/publisher_backlist_service.py index 59fd457233..df44b8eb6e 100644 --- a/services/sources/publisher_backlist_service.py +++ b/services/sources/publisher_backlist_service.py @@ -26,7 +26,7 @@ def __init__(self): self.s3_manager.createS3Client() self.title_prefix = 'titles/publisher_backlist' self.file_bucket = os.environ['FILE_BUCKET'] - self.limited_file_bucket = f'drb-files-limited-{os.environment.get('ENVIRONMENT', 'qa')}' + self.limited_file_bucket = f'drb-files-limited-{os.environment.get("ENVIRONMENT", "qa")}' self.drive_service = GoogleDriveService() From ccbc0a39f50f794d4a62844ece2f300f5072473c Mon Sep 17 00:00:00 2001 From: Lyndsey Jane Moulds <2042238+Apophenia@users.noreply.github.com> Date: Mon, 6 Jan 2025 12:18:53 -0500 Subject: [PATCH 08/17] Fix environ typo and standardize to sentence case for LA keys --- services/sources/publisher_backlist_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/sources/publisher_backlist_service.py b/services/sources/publisher_backlist_service.py index df44b8eb6e..0a0f614b11 100644 --- a/services/sources/publisher_backlist_service.py +++ b/services/sources/publisher_backlist_service.py @@ -26,7 +26,7 @@ def __init__(self): self.s3_manager.createS3Client() self.title_prefix = 'titles/publisher_backlist' self.file_bucket = os.environ['FILE_BUCKET'] - self.limited_file_bucket = f'drb-files-limited-{os.environment.get("ENVIRONMENT", "qa")}' + self.limited_file_bucket = f'drb-files-limited-{os.environ.get("ENVIRONMENT", "qa")}' self.drive_service = GoogleDriveService() @@ -298,7 +298,7 @@ def parse_permissions(permissions: str) -> dict: return {'is_downloadable': True, 'is_login_limited': False} if permissions == 'Partial access/read only/no download/no login': return {'is_downloadable': False, 'is_login_limited': False} - if permissions == 'Limited Access/login for read & download': + if permissions == 'Limited access/login for read & download': return {'is_downloadable': True, 'is_login_limited': True} else: return {'is_downloadable': False, 'is_login_limited': True} From 7075c1b3277aaf2374cd56f9e4cbc198a726c1d3 Mon Sep 17 00:00:00 2001 From: Lyndsey Jane Moulds <2042238+Apophenia@users.noreply.github.com> Date: Mon, 6 Jan 2025 13:59:54 -0500 Subject: [PATCH 09/17] Use entire project name instead of first word Use entire project name instead of first word and build buckets from project name instead of publisher name --- mappings/publisher_backlist.py | 3 +-- services/sources/publisher_backlist_service.py | 2 +- tests/unit/test_pub_backlist_mapping.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/mappings/publisher_backlist.py b/mappings/publisher_backlist.py index ad04934b90..28f6677f64 100644 --- a/mappings/publisher_backlist.py +++ b/mappings/publisher_backlist.py @@ -26,8 +26,7 @@ def createMapping(self): def applyFormatting(self): self.record.has_part = [] if self.record.source: - source_list = self.record.source[0].split(' ') - self.record.source = source_list[0] + self.record.source = self.record.source[0] if self.record.publisher_project_source: publisher_source = self.record.publisher_project_source[0] diff --git a/services/sources/publisher_backlist_service.py b/services/sources/publisher_backlist_service.py index 0a0f614b11..eda84fe5ca 100644 --- a/services/sources/publisher_backlist_service.py +++ b/services/sources/publisher_backlist_service.py @@ -167,7 +167,7 @@ def get_records( bucket = self.file_bucket else: bucket = self.limited_file_bucket - s3_path = f'{self.title_prefix}/{record_metadata["Publisher (from Projects)"][0]}/{file_name}' + s3_path = f'{self.title_prefix}/{record_metadata["Project Name (from Projects)"][0]}/{file_name}' s3_response = self.s3_manager.putObjectInBucket(file.getvalue(), s3_path, bucket) if not s3_response.get('ResponseMetadata').get('HTTPStatusCode') == 200: diff --git a/tests/unit/test_pub_backlist_mapping.py b/tests/unit/test_pub_backlist_mapping.py index 3f81ec75b7..928798d1be 100644 --- a/tests/unit/test_pub_backlist_mapping.py +++ b/tests/unit/test_pub_backlist_mapping.py @@ -43,7 +43,7 @@ def test_applyFormatting_standard(self, test_mapping, testRecordStandard): test_mapping.applyFormatting() assert test_mapping.record.has_part == [] - assert test_mapping.record.source == 'UofMichigan' + assert test_mapping.record.source == 'UofMichigan Backlist' assert test_mapping.record.identifiers == ['testISBN|isbn', 'testOCLC|oclc'] assert test_mapping.record.source_id == 'testSourceID' assert test_mapping.record.publisher == ['testPublisher||'] From 4b03d86e79ca9f72c622a039c66882615087ffec Mon Sep 17 00:00:00 2001 From: Lyndsey Jane Moulds <2042238+Apophenia@users.noreply.github.com> Date: Mon, 6 Jan 2025 19:06:43 -0500 Subject: [PATCH 10/17] Projects -> Project in airtable mapping naming --- mappings/publisher_backlist.py | 6 +++--- services/sources/publisher_backlist_service.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mappings/publisher_backlist.py b/mappings/publisher_backlist.py index 28f6677f64..c5ad30cf81 100644 --- a/mappings/publisher_backlist.py +++ b/mappings/publisher_backlist.py @@ -10,7 +10,7 @@ def createMapping(self): 'title': ('Title', '{0}'), 'authors': ('Author(s)', '{0}'), 'dates': [('Pub Date', '{0}|publication_date')], - 'publisher': ('Publisher (from Projects)', '{0}||'), + 'publisher': ('Publisher (from Project)', '{0}||'), 'identifiers': [ ('ISBN', '{0}|isbn'), ('OCLC', '{0}|oclc') @@ -18,9 +18,9 @@ def createMapping(self): 'rights': ('DRB Rights Classification', '{0}||||'), 'contributors': [('Contributors', '{0}|||contributor')], 'subjects': ('Subject 1', '{0}'), - 'source': ('Project Name (from Projects)', '{0}'), + 'source': ('Project Name (from Project)', '{0}'), 'source_id': ('DRB_Record ID', '{0}'), - 'publisher_project_source': ('Publisher (from Projects)', '{0}') + 'publisher_project_source': ('Publisher (from Project)', '{0}') } def applyFormatting(self): diff --git a/services/sources/publisher_backlist_service.py b/services/sources/publisher_backlist_service.py index eda84fe5ca..90c4b6602f 100644 --- a/services/sources/publisher_backlist_service.py +++ b/services/sources/publisher_backlist_service.py @@ -167,7 +167,7 @@ def get_records( bucket = self.file_bucket else: bucket = self.limited_file_bucket - s3_path = f'{self.title_prefix}/{record_metadata["Project Name (from Projects)"][0]}/{file_name}' + s3_path = f'{self.title_prefix}/{record_metadata["Project Name (from Project)"][0]}/{file_name}' s3_response = self.s3_manager.putObjectInBucket(file.getvalue(), s3_path, bucket) if not s3_response.get('ResponseMetadata').get('HTTPStatusCode') == 200: From 8f1345f432048110fd611a827c187b798cdb12d1 Mon Sep 17 00:00:00 2001 From: Lyndsey Jane Moulds <2042238+Apophenia@users.noreply.github.com> Date: Tue, 7 Jan 2025 17:23:13 -0500 Subject: [PATCH 11/17] Add limited-local bucket for local development --- localstack/init-localstack-resources.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/localstack/init-localstack-resources.sh b/localstack/init-localstack-resources.sh index 501bc91076..a3cb83befe 100755 --- a/localstack/init-localstack-resources.sh +++ b/localstack/init-localstack-resources.sh @@ -1,4 +1,5 @@ #!/bin/bash awslocal s3 mb s3://drb-files-local +awslocal s3 mb s3://drb-files-limited-local awslocal s3 mb s3://ump-pdf-repository-local From 985bacff0e1b67c04fcf35d577005b6edf54dfec Mon Sep 17 00:00:00 2001 From: Lyndsey Jane Moulds <2042238+Apophenia@users.noreply.github.com> Date: Wed, 8 Jan 2025 10:39:14 -0500 Subject: [PATCH 12/17] Add fields from airtable to constants --- services/sources/publisher_backlist_service.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/services/sources/publisher_backlist_service.py b/services/sources/publisher_backlist_service.py index 90c4b6602f..7080f17b45 100644 --- a/services/sources/publisher_backlist_service.py +++ b/services/sources/publisher_backlist_service.py @@ -3,6 +3,7 @@ import os import requests import urllib.parse +from enum import Enum from typing import Optional from model import Record, Work, Edition, Item from sqlalchemy.orm import joinedload @@ -19,6 +20,13 @@ BASE_URL = "https://api.airtable.com/v0/appBoLf4lMofecGPU/Publisher%20Backlists%20%26%20Collections%20%F0%9F%93%96?view=All%20Lists" +SOURCE_FIELD = "Project Name (from Project)" + +class LimitedAccessPermissions(Enum): + FULL_ACCESS = 'Full access' + PARTIAL_ACCESS = 'Partial access/read only/no download/no login' + LIMITED_DOWNLOADABLE = 'Limited access/login for read & download' + LIMITED_WITHOUT_DOWNLOAD = 'Limited access/login for read/no download' class PublisherBacklistService(SourceService): def __init__(self): @@ -167,7 +175,7 @@ def get_records( bucket = self.file_bucket else: bucket = self.limited_file_bucket - s3_path = f'{self.title_prefix}/{record_metadata["Project Name (from Project)"][0]}/{file_name}' + s3_path = f'{self.title_prefix}/{record_metadata[SOURCE_FIELD][0]}/{file_name}' s3_response = self.s3_manager.putObjectInBucket(file.getvalue(), s3_path, bucket) if not s3_response.get('ResponseMetadata').get('HTTPStatusCode') == 200: @@ -294,11 +302,11 @@ def generate_manifest(record, source_url, manifest_url): @staticmethod def parse_permissions(permissions: str) -> dict: - if permissions == 'Full access': + if permissions == LimitedAccessPermissions.FULL_ACCESS.value: return {'is_downloadable': True, 'is_login_limited': False} - if permissions == 'Partial access/read only/no download/no login': + if permissions == LimitedAccessPermissions.PARTIAL_ACCESS.value: return {'is_downloadable': False, 'is_login_limited': False} - if permissions == 'Limited access/login for read & download': + if permissions == LimitedAccessPermissions.LIMITED_DOWNLOADABLE.value: return {'is_downloadable': True, 'is_login_limited': True} else: return {'is_downloadable': False, 'is_login_limited': True} From 785db744d20d7da082ac93d700bfe44f8b2d2e8b Mon Sep 17 00:00:00 2001 From: Lyndsey Jane Moulds <2042238+Apophenia@users.noreply.github.com> Date: Wed, 8 Jan 2025 10:51:58 -0500 Subject: [PATCH 13/17] Rename --- services/sources/publisher_backlist_service.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/services/sources/publisher_backlist_service.py b/services/sources/publisher_backlist_service.py index 7080f17b45..65bd3ce892 100644 --- a/services/sources/publisher_backlist_service.py +++ b/services/sources/publisher_backlist_service.py @@ -171,7 +171,7 @@ def get_records( continue record_permissions = self.parse_permissions(record_metadata.get('Access type in DRB (from Access types)')[0]) - if not record_permissions['is_login_limited']: + if not record_permissions['requires_login']: bucket = self.file_bucket else: bucket = self.limited_file_bucket @@ -252,7 +252,7 @@ def add_has_part_mapping(self, s3_url: str, record: Record, is_downloadable: boo 'download': is_downloadable, 'reader': False, 'embed': False, - 'nypl_login': is_login_limited, + 'nypl_login': requires_login, } record.has_part.append('|'.join([item_no, s3_url, record.source, media_type, json.dumps(flags)])) @@ -303,10 +303,10 @@ def generate_manifest(record, source_url, manifest_url): @staticmethod def parse_permissions(permissions: str) -> dict: if permissions == LimitedAccessPermissions.FULL_ACCESS.value: - return {'is_downloadable': True, 'is_login_limited': False} + return {'is_downloadable': True, 'requires_login': False} if permissions == LimitedAccessPermissions.PARTIAL_ACCESS.value: - return {'is_downloadable': False, 'is_login_limited': False} + return {'is_downloadable': False, 'requires_login': False} if permissions == LimitedAccessPermissions.LIMITED_DOWNLOADABLE.value: - return {'is_downloadable': True, 'is_login_limited': True} + return {'is_downloadable': True, 'requires_login': True} else: - return {'is_downloadable': False, 'is_login_limited': True} + return {'is_downloadable': False, 'requires_login': True} From bf46f63a2bfb5177605b6db5af420436115d09c5 Mon Sep 17 00:00:00 2001 From: Lyndsey Jane Moulds <2042238+Apophenia@users.noreply.github.com> Date: Wed, 8 Jan 2025 11:00:42 -0500 Subject: [PATCH 14/17] Fix requires_login & change bucket assignment to ternary --- services/sources/publisher_backlist_service.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/services/sources/publisher_backlist_service.py b/services/sources/publisher_backlist_service.py index 65bd3ce892..553c568072 100644 --- a/services/sources/publisher_backlist_service.py +++ b/services/sources/publisher_backlist_service.py @@ -171,10 +171,7 @@ def get_records( continue record_permissions = self.parse_permissions(record_metadata.get('Access type in DRB (from Access types)')[0]) - if not record_permissions['requires_login']: - bucket = self.file_bucket - else: - bucket = self.limited_file_bucket + bucket = self.file_bucket if not record_permissions['requires_login'] else self.limited_file_bucket s3_path = f'{self.title_prefix}/{record_metadata[SOURCE_FIELD][0]}/{file_name}' s3_response = self.s3_manager.putObjectInBucket(file.getvalue(), s3_path, bucket) @@ -241,10 +238,9 @@ def get_publisher_backlist_records(self, records_response_json = records_response.json() publisher_backlist_records.extend(records_response_json.get('records', [])) - return publisher_backlist_records - def add_has_part_mapping(self, s3_url: str, record: Record, is_downloadable: bool=False, is_login_limited: bool=True): + def add_has_part_mapping(self, s3_url: str, record: Record, is_downloadable: bool=False, requires_login: bool=True): item_no = '1' media_type = 'application/pdf' flags = { From 0c914a79da1eb90f2635f4a6093351e92c796242 Mon Sep 17 00:00:00 2001 From: Lyndsey Jane Moulds <2042238+Apophenia@users.noreply.github.com> Date: Wed, 8 Jan 2025 11:05:21 -0500 Subject: [PATCH 15/17] Change source priority to match Airtable project values --- api/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/utils.py b/api/utils.py index 2c09ceea7d..d55ab2587c 100644 --- a/api/utils.py +++ b/api/utils.py @@ -41,7 +41,7 @@ class APIUtils(): 'muse': 4, 'met': 5, 'isac': 6, - 'UofM': 7, + 'UofMichigan Backlist': 7, 'UofSC': 8, 'hathitrust': 9, 'oclc': 10, From fed93cd796aa0356ebd41f520976d8f4d0425462 Mon Sep 17 00:00:00 2001 From: Lyndsey Jane Moulds <2042238+Apophenia@users.noreply.github.com> Date: Wed, 8 Jan 2025 14:12:28 -0500 Subject: [PATCH 16/17] Update manifest flags for fulfill process based on login requirements --- services/sources/publisher_backlist_service.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/sources/publisher_backlist_service.py b/services/sources/publisher_backlist_service.py index 553c568072..06115ccede 100644 --- a/services/sources/publisher_backlist_service.py +++ b/services/sources/publisher_backlist_service.py @@ -185,7 +185,7 @@ def get_records( publisher_backlist_record.applyMapping() self.add_has_part_mapping(s3_url, publisher_backlist_record.record) - self.store_pdf_manifest(publisher_backlist_record.record) + self.store_pdf_manifest(publisher_backlist_record.record, requires_login=record_permissions['requires_login']) mapped_records.append(publisher_backlist_record) except Exception: @@ -253,7 +253,7 @@ def add_has_part_mapping(self, s3_url: str, record: Record, is_downloadable: boo record.has_part.append('|'.join([item_no, s3_url, record.source, media_type, json.dumps(flags)])) - def store_pdf_manifest(self, record: Record): + def store_pdf_manifest(self, record: Record, requires_login: bool=True): for link in record.has_part: item_no, url, source, media_type, _ = link.split('|') @@ -270,7 +270,7 @@ def store_pdf_manifest(self, record: Record): 'download': False, 'reader': True, 'embed': False, - **({'fulfill_limited_access': False} if 'in_copyright' in record.rights else {}) + **({'fulfill_limited_access': False} if requires_login else {}) } record.has_part.insert(0, '|'.join([item_no, manifest_url, source, 'application/webpub+json', json.dumps(manifest_flags)])) From 63e8dcdf5c6d7786a9b5c9c63b8cc595bcd4d22d Mon Sep 17 00:00:00 2001 From: Lyndsey Jane Moulds <2042238+Apophenia@users.noreply.github.com> Date: Wed, 8 Jan 2025 15:10:07 -0500 Subject: [PATCH 17/17] Require parameters for limited access and downloadability --- services/sources/publisher_backlist_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/sources/publisher_backlist_service.py b/services/sources/publisher_backlist_service.py index 06115ccede..83cf7afc63 100644 --- a/services/sources/publisher_backlist_service.py +++ b/services/sources/publisher_backlist_service.py @@ -240,7 +240,7 @@ def get_publisher_backlist_records(self, publisher_backlist_records.extend(records_response_json.get('records', [])) return publisher_backlist_records - def add_has_part_mapping(self, s3_url: str, record: Record, is_downloadable: bool=False, requires_login: bool=True): + def add_has_part_mapping(self, s3_url: str, record: Record, is_downloadable: bool, requires_login: bool): item_no = '1' media_type = 'application/pdf' flags = { @@ -253,7 +253,7 @@ def add_has_part_mapping(self, s3_url: str, record: Record, is_downloadable: boo record.has_part.append('|'.join([item_no, s3_url, record.source, media_type, json.dumps(flags)])) - def store_pdf_manifest(self, record: Record, requires_login: bool=True): + def store_pdf_manifest(self, record: Record, requires_login: bool): for link in record.has_part: item_no, url, source, media_type, _ = link.split('|')