Skip to content

Commit

Permalink
Added edition deletion from ES
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitri committed Dec 18, 2024
1 parent 6f7ee0b commit 54811be
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 12 deletions.
1 change: 1 addition & 0 deletions scripts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@
from .countCABooks import main as countCA
from .nyplLoginFlags import main as nyplFlags
from .deleteUMPManifestLinks import main as deleteUMPManifests
from .deleteProblemWorks import main as deleteWorks
26 changes: 14 additions & 12 deletions services/sources/publisher_backlist_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from managers import S3Manager, WebpubManifest
from .source_service import SourceService
from managers import DBManager, ElasticsearchManager
from elasticsearch_dsl import Search
from elasticsearch_dsl import Search, Q

logger = create_log(__name__)

Expand Down Expand Up @@ -50,7 +50,7 @@ def delete_records(
def delete_manifest(self, record_metadata_dict):
self.db_manager.createSession()
try:
record = self.session.query(Record).filter(Record.source_id == record_metadata_dict['DRB Record_ID']).first()
record = self.db_manager.session.query(Record).filter(Record.source_id == record_metadata_dict['DRB Record_ID']).first()
if record:
key_name = self.get_metadata_file_name(record, record_metadata_dict)
self.s3_manager.s3Client.delete_object(Bucket= self.s3_bucket, Key= key_name)
Expand All @@ -68,7 +68,7 @@ def delete_work(self, record_metadata_dict):
record_uuid_str = str(record.uuid)
edition = self.db_manager.session.query(Edition).filter(Edition.dcdw_uuids.contains([record_uuid_str])).first()
work = self.db_manager.session.query(Work).filter(Work.id == edition.work_id).first()
if self.checkAllEditionsRelatedToRecord(record_uuid_str, work) == True:
if self.has_only_one_edition(work):
work_uuid_str = str(work.uuid)
es_work_resp = Search(index=os.environ['ELASTICSEARCH_INDEX']).query('match', uuid=work_uuid_str)
self.db_manager.session.query(Work).filter(Work.id == edition.work_id).delete()
Expand All @@ -81,20 +81,22 @@ def delete_work(self, record_metadata_dict):
finally:
self.db_manager.session.close()

def checkAllEditionsRelatedToRecord(self, record_uuid_str, work):
for edition in self.db_manager.session.query(Edition).filter(Edition.work_id == work.id).all():
if record_uuid_str not in edition.dcdw_uuids:
return False
def has_only_one_edition(self, work):
if len(work.editions) > 1:
return False
return True

def delete_pub_backlist_edition_only(self, record_uuid_str, work):
for edition in self.db_manager.session.query(Edition) \
edition = self.db_manager.session.query(Edition) \
.filter(Edition.work_id == work.id) \
.filter(Edition.dcdw_uuids.contains([record_uuid_str])) \
.all():
self.db_manager.session.delete(edition)
# es_edition_resp = Search(index=os.environ['ELASTICSEARCH_INDEX']).query('match', uuid=uuid)
# es_edition_esp.delete()
.first()
self.db_manager.session.delete(edition)
es_work_resp = Search(index=os.environ['ELASTICSEARCH_INDEX']).query('match', uuid=str(work.uuid))
for work_hit in es_work_resp:
for edition_hit in work_hit:
edition_es_response = Search(index=os.environ['ELASTICSEARCH_INDEX']).query('nested', path='editions', query=Q('match', **{'editions.edition_id': edition_hit['edition_id']}))
edition_es_response.delete()

def get_metadata_file_name(self, record, record_metadata_dict):
key_format = f"{self.prefix}{record.source}"
Expand Down

0 comments on commit 54811be

Please sign in to comment.