Skip to content

Commit

Permalink
Patched cleanup_archer_fastqs, added test generation script
Browse files Browse the repository at this point in the history
  • Loading branch information
g-pyxl committed May 3, 2024
1 parent a0fe235 commit 86f777a
Show file tree
Hide file tree
Showing 3 changed files with 175 additions and 3 deletions.
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,24 @@ The Docker image is run daily as a CRON job (the python script doesn't work in c
In the config file there is a testing variable.
When set to `True` an alternative folder location is used on the archer server, to avoid processing real runs during testing.

```generate_test_data.py``` can be used to generate a structured series of blank tar.gz and fastq.gz files within the test directories on the Archer server to faciliate simple end-to-end testing.

Usage:

1) Ensure testing = True is enabled on ```archer_archiving_config.py```

2) ```python3 generate_test_data.py```

Creates testing files and directories within the Archer test directories defined in ```archer_archiving_config.py```

3) ```python3 archer_archiving_script.py```

Runs the archiving script in test mode.

4) ```python3 generate_test_data.py --teardown```

Deletes testing files and directories within the archer test directory.

## Logging
Script logfiles are written to mokaguys/logfiles/script_logfiles/YYYYMMDD_TTTTTTarchivelog.txt

Expand Down
6 changes: 3 additions & 3 deletions archer_archive_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,8 +328,8 @@ def cleanup_archer_fastqs(self,project_adx):
return True
else:
# Rapid 7 alert set up
self.logger("ERROR: failed to correctly delete the FASTQ files for project %s" % (project_adx),"Archer archive")
return False
self.logger("WARNING: No FASTQs were located for project %s" % (project_adx),"Archer archive")
return True # Fastqs possibly deleted already, provide warning, continue to genomics server cleanup

def list_archer_fastq_for_deletion(self,project_adx):
"""
Expand Down Expand Up @@ -473,4 +473,4 @@ def go(self):

if __name__ == "__main__":
archer = ArcherArchive()
archer.go()
archer.go()
154 changes: 154 additions & 0 deletions generate_test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
'''
Generates test data on the Archer server for use with the archiving script.
Removes test data that was generated with --teardown parameter.
'''

import os
import subprocess
import argparse
import archer_archive_config as config

def execute_subprocess_command(command):
'''Executes a shell command via subprocess'''
# Execute command in bash shell, capture both output and errors
proc = subprocess.Popen(
[command],
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
shell=True,
universal_newlines=True,
executable="/bin/bash",
)
return proc.communicate()

def create_test_project_folder(project_id):
'''Creates folder on the Archer server for a given project_id.'''
cmd = "archer_pw=$(<%s); \
sshpass -p $archer_pw ssh [email protected] 'mkdir -p %s/%s'" % (
config.path_to_archerdx_pw,
config.path_to_analysis_test_folder,
project_id)
execute_subprocess_command(cmd)

def create_test_fastq_files(adx_project_name, panel_name, sample_ids):
'''Creates dummy FASTQ files for testing'''
# Loop through each sample ID and create two FASTQ files for each
for sample_id in sample_ids:
cmd = "archer_pw=$(<%s); \
sshpass -p $archer_pw ssh [email protected] '\
touch %s/%s_%s_%s_S%s_R1_001.fastq.gz; \
touch %s/%s_%s_%s_S%s_R2_001.fastq.gz'" % (
config.path_to_archerdx_pw,
config.path_to_picked_up_test_files,
adx_project_name,
sample_id,
panel_name,
sample_id,
config.path_to_picked_up_test_files,
adx_project_name,
sample_id,
panel_name,
sample_id)
execute_subprocess_command(cmd)

def create_test_project_files(project_id, adx_project_name, panel_name, sample_ids):
'''Creates project files and symlinks for testing'''
# Create tar.gz for the project and link initial fastqs
cmd = "archer_pw=$(<%s); \
sshpass -p $archer_pw ssh [email protected] '\
touch %s/%s/%s.tar.gz; \
ln -s %s/%s_01_%s_%s_S1_R1_001.fastq.gz %s/%s/%s_01_%s_%s_S1_R1_001.fastq.gz; \
ln -s %s/%s_01_%s_%s_S1_R2_001.fastq.gz %s/%s/%s_01_%s_%s_S1_R2_001.fastq.gz; \
ln -s %s/%s_02_%s_%s_S2_R1_001.fastq.gz %s/%s/%s_02_%s_%s_S2_R1_001.fastq.gz; \
ln -s %s/%s_02_%s_%s_S2_R2_001.fastq.gz %s/%s/%s_02_%s_%s_S2_R2_001.fastq.gz'" % (
# Sequential file generation for each given project
config.path_to_archerdx_pw,
config.path_to_analysis_test_folder,
project_id,
project_id,
config.path_to_picked_up_test_files,
adx_project_name,
panel_name,
sample_ids[0],
config.path_to_analysis_test_folder,
project_id,
adx_project_name,
panel_name,
sample_ids[0],
config.path_to_picked_up_test_files,
adx_project_name,
panel_name,
sample_ids[0],
config.path_to_analysis_test_folder,
project_id,
adx_project_name,
panel_name,
sample_ids[0],
config.path_to_picked_up_test_files,
adx_project_name,
panel_name,
sample_ids[1],
config.path_to_analysis_test_folder,
project_id,
adx_project_name,
panel_name,
sample_ids[1],
config.path_to_picked_up_test_files,
adx_project_name,
panel_name,
sample_ids[1],
config.path_to_analysis_test_folder,
project_id,
adx_project_name,
panel_name,
sample_ids[1])
execute_subprocess_command(cmd)

def teardown_test_data(project_ids, adx_project_name, panel_names):
'''Removes test data from the Archer server'''
# Loop through script defined projects and remove
for project_id in project_ids:
cmd = "archer_pw=$(<%s); \
sshpass -p $archer_pw ssh [email protected] '\
rm -rf %s/%s'" % (
config.path_to_archerdx_pw,
config.path_to_analysis_test_folder,
project_id)
execute_subprocess_command(cmd)

# Loop through script defined pan files and remove
for panel_name in panel_names:
cmd = "archer_pw=$(<%s); \
sshpass -p $archer_pw ssh [email protected] '\
rm -f %s/%s_*_%s_*.fastq.gz'" % (
config.path_to_archerdx_pw,
config.path_to_picked_up_test_files,
adx_project_name,
panel_name)
execute_subprocess_command(cmd)

def create_test_data(project_ids, adx_project_name, panel_names, sample_ids):
'''Main function to set up test data'''
for project_id, panel_name in zip(project_ids, panel_names):
create_test_project_folder(project_id)
create_test_fastq_files(adx_project_name, panel_name, sample_ids)
create_test_project_files(project_id, adx_project_name, panel_name, sample_ids)

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Create or teardown test data for Archer archiving.")
parser.add_argument("--teardown", action="store_true", help="Teardown the test data")

args = parser.parse_args()

# Define the test project details
test_project_ids = ["9996", "9997", "9998"]
test_adx_project_name = "ADXtest2"
test_panel_names = ["Pan4001", "Pan4002", "Pan4003"]
test_sample_ids = ["sample1", "sample2"]

if args.teardown:
teardown_test_data(test_project_ids, test_adx_project_name, test_panel_names)
print("Test data teardown completed for projects {} ({}) on the Archer server.".format(', '.join(test_project_ids), test_adx_project_name))
else:
create_test_data(test_project_ids, test_adx_project_name, test_panel_names, test_sample_ids)
print("Test data created for projects {} ({}) on the Archer server.".format(', '.join(test_project_ids), test_adx_project_name))

0 comments on commit 86f777a

Please sign in to comment.