From 08824daf468089f0b19dcce1bd6352dd70cd5de4 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 13 Mar 2024 17:25:58 -0400 Subject: [PATCH 1/4] first attempt at https://github.com/pepkit/looper/issues/469 --- looper/looper.py | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/looper/looper.py b/looper/looper.py index 53004c919..a120eebbe 100755 --- a/looper/looper.py +++ b/looper/looper.py @@ -242,35 +242,37 @@ def __call__(self, args, preview_flag=True): :param bool preview_flag: whether to halt before actually removing files """ - _LOGGER.info("Removing results:") - - for sample in select_samples(prj=self.prj, args=args): - _LOGGER.info(self.counter.show(sample.sample_name)) - sample_output_folder = sample_folder(self.prj, sample) - if preview_flag: - # Preview: Don't actually delete, just show files. - _LOGGER.info(str(sample_output_folder)) - else: - _remove_or_dry_run(sample_output_folder, args.dry_run) - - _LOGGER.info("Removing summary:") use_pipestat = ( self.prj.pipestat_configured_project - if getattr( - args, "project", None - ) # TODO this argument hasn't been added to the pydantic models. + if getattr(args, "project", None) else self.prj.pipestat_configured ) + if use_pipestat: + _LOGGER.info("Removing summary:") destroy_summary( self.prj, getattr(args, "dry_run", None), getattr(args, "project", None), ) - else: - _LOGGER.warning( - "Pipestat must be configured to destroy any created summaries." - ) + + _LOGGER.info("Removing results:") + + for sample in select_samples(prj=self.prj, args=args): + _LOGGER.info(self.counter.show(sample.sample_name)) + sample_output_folder = sample_folder(self.prj, sample) + if preview_flag: + # Preview: Don't actually delete, just show files. + _LOGGER.info(str(sample_output_folder)) + else: + if use_pipestat: + psms = self.prj.get_pipestat_managers( + sample_name=sample.sample_name + ) + for pipeline_name, psm in psms.items(): + psm.remove(record_identifier=sample.sample_name) + else: + _remove_or_dry_run(sample_output_folder, args.dry_run) if not preview_flag: _LOGGER.info("Destroy complete.") From fd05c04e75e53992a63ef4d1e0fef63020aefede Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 14 Mar 2024 12:14:52 -0400 Subject: [PATCH 2/4] correct retrieving reports directory, use pipestat rm_record for filtered samples when destroying https://github.com/pepkit/looper/issues/469 --- looper/looper.py | 22 ++++++++-------------- requirements/requirements-all.txt | 2 +- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/looper/looper.py b/looper/looper.py index a120eebbe..defe11597 100755 --- a/looper/looper.py +++ b/looper/looper.py @@ -46,7 +46,6 @@ sample_folder, ) from pipestat.reports import get_file_for_table -from pipestat.reports import get_file_for_project _PKGNAME = "looper" _LOGGER = logging.getLogger(_PKGNAME) @@ -270,7 +269,9 @@ def __call__(self, args, preview_flag=True): sample_name=sample.sample_name ) for pipeline_name, psm in psms.items(): - psm.remove(record_identifier=sample.sample_name) + psm.backend.remove_record( + record_identifier=sample.sample_name, rm_record=True + ) else: _remove_or_dry_run(sample_output_folder, args.dry_run) @@ -694,10 +695,8 @@ def destroy_summary(prj, dry_run=False, project_level=False): for name, psm in psms.items(): _remove_or_dry_run( [ - get_file_for_project( - psm, - pipeline_name=psm.pipeline_name, - directory="reports", + get_file_for_table( + psm, pipeline_name=psm.pipeline_name, directory="reports" ), get_file_for_table( psm, @@ -709,9 +708,6 @@ def destroy_summary(prj, dry_run=False, project_level=False): pipeline_name=psm.pipeline_name, appendix="objs_summary.yaml", ), - get_file_for_table( - psm, pipeline_name=psm.pipeline_name, appendix="reports" - ), ], dry_run, ) @@ -726,10 +722,8 @@ def destroy_summary(prj, dry_run=False, project_level=False): for name, psm in psms.items(): _remove_or_dry_run( [ - get_file_for_project( - psm, - pipeline_name=psm.pipeline_name, - directory="reports", + get_file_for_table( + psm, pipeline_name=psm.pipeline_name, directory="reports" ), get_file_for_table( psm, @@ -742,7 +736,7 @@ def destroy_summary(prj, dry_run=False, project_level=False): appendix="objs_summary.yaml", ), get_file_for_table( - psm, pipeline_name=psm.pipeline_name, appendix="reports" + psm, pipeline_name="", directory="aggregate_results.yaml" ), ], dry_run, diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 5d7d5eb23..108303465 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -6,7 +6,7 @@ logmuse>=0.2.0 pandas>=2.0.2 pephubclient>=0.4.0 peppy>=0.40.0 -pipestat>=0.8.2a1 +pipestat>=0.8.3a1 pyyaml>=3.12 rich>=9.10.0 ubiquerg>=0.5.2 From 2952f1a63cc4aff2df8ce19d6a23228568474784 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 14 Mar 2024 12:46:47 -0400 Subject: [PATCH 3/4] correct destroy bug, gather aggregate_results https://github.com/pepkit/looper/issues/469 --- looper/looper.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/looper/looper.py b/looper/looper.py index defe11597..b044ef1d9 100755 --- a/looper/looper.py +++ b/looper/looper.py @@ -251,8 +251,8 @@ def __call__(self, args, preview_flag=True): _LOGGER.info("Removing summary:") destroy_summary( self.prj, - getattr(args, "dry_run", None), - getattr(args, "project", None), + dry_run=preview_flag, + project_level=getattr(args, "project", None), ) _LOGGER.info("Removing results:") @@ -708,6 +708,9 @@ def destroy_summary(prj, dry_run=False, project_level=False): pipeline_name=psm.pipeline_name, appendix="objs_summary.yaml", ), + os.path.join( + os.path.dirname(psm.config_path), "aggregate_results.yaml" + ), ], dry_run, ) @@ -735,8 +738,8 @@ def destroy_summary(prj, dry_run=False, project_level=False): pipeline_name=psm.pipeline_name, appendix="objs_summary.yaml", ), - get_file_for_table( - psm, pipeline_name="", directory="aggregate_results.yaml" + os.path.join( + os.path.dirname(psm.config_path), "aggregate_results.yaml" ), ], dry_run, From bd175533df8e5ccdd45e5ca7bf12ae6ecabcc614 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 14 Mar 2024 13:07:07 -0400 Subject: [PATCH 4/4] add checks to destroy test https://github.com/pepkit/looper/issues/469 --- tests/test_comprehensive.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_comprehensive.py b/tests/test_comprehensive.py index a1560289c..a40a0f41d 100644 --- a/tests/test_comprehensive.py +++ b/tests/test_comprehensive.py @@ -1,3 +1,5 @@ +import os.path + import pytest from peppy.const import * from yaml import dump @@ -10,6 +12,7 @@ from tests.smoketests.test_run import is_connected from tempfile import TemporaryDirectory from pipestat import PipestatManager +from pipestat.exceptions import RecordNotFoundError from yaml import dump, safe_load @@ -137,3 +140,9 @@ def test_comprehensive_looper_pipestat(prep_temp_pep_pipestat): result = main(test_args=x) except Exception: raise pytest.fail("DID RAISE {0}".format(Exception)) + + sd = os.path.dirname(path_to_looper_config) + tsv_list = [os.path.join(sd, f) for f in os.listdir(sd) if f.endswith(".tsv")] + assert len(tsv_list) == 0 + with pytest.raises(RecordNotFoundError): + retrieved_result = psm.retrieve_one(record_identifier="frog_2")