From b3811c67e4981542bc965a5e3b46ba59ae7aecc1 Mon Sep 17 00:00:00 2001 From: nukappa Date: Tue, 17 Dec 2024 17:37:29 +0100 Subject: [PATCH 1/6] basic parser for migrate utility. checks for project_id, sample_id combo --- spacemake/cmdline.py | 61 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/spacemake/cmdline.py b/spacemake/cmdline.py index bd4e3c0..1897b38 100644 --- a/spacemake/cmdline.py +++ b/spacemake/cmdline.py @@ -774,6 +774,37 @@ def setup_run_parser(parent_parser_subparsers): return parser_run +def setup_migrate_parser(parent_parser_subparsers): + """setup_migrate_parser + + :param parent_parser_subparsers + """ + parser_migrate = parent_parser_subparsers.add_parser( + "migrate", + help="migrate spacemake" + ) + + parser_migrate.add_argument( + "--project-id", + default="", + help="The project-id of the sample to perform the migration.", + type=str, + required=True, + dest="project_id", + ) + parser_migrate.add_argument( + "--sample-id", + default="", + help="The sample-id of the sample to perform the migration.", + type=str, + required=True, + dest="sample_id", + ) + + parser_migrate.set_defaults(func=spacemake_migrate) + + return parser_migrate + ##################################################### # actual command-line functions, used as call-backs # ##################################################### @@ -1145,6 +1176,25 @@ def list_projects_cmdline(args): # print the table logger.info(df.loc[:, variables].__str__()) +@message_aggregation(logger_name) +def spacemake_migrate(args): + """spacemake_migrate. + + :param args: + """ + from spacemake.project_df import get_global_ProjectDF + + pdf = get_global_ProjectDF() + + # Check that the project-id and sample-id combination provided exists + pdf.assert_sample(args['project_id'], args['sample_id']) + + # TODO: convert BAM to CRAM, appropriately change timestamp + + # TODO: delete BAMs + + # TODO: delete other unnecessary files + def make_main_parser(): ################# @@ -1159,7 +1209,7 @@ def make_main_parser(): parser_main = argparse.ArgumentParser( allow_abbrev=False, - description="spacemake: bioinformatic pipeline for processing and analysis of spatial-transcriptomics data", + description="Spacemake: processing and analysis of large-scale spatial transcriptomics data", ) parser_main.add_argument("--version", action="store_true") @@ -1172,6 +1222,7 @@ def make_main_parser(): parser_projects = None parser_config = None parser_init = None + parser_migrate = None parser_spatial = None ################## @@ -1194,7 +1245,7 @@ def make_main_parser(): # SPACEMAKE PROJECT/SAMPLE # ############################ from spacemake.cmdline import setup_project_parser - + parser_projects = setup_project_parser(parser_main_subparsers) ################# @@ -1202,6 +1253,11 @@ def make_main_parser(): ################# parser_run = setup_run_parser(parser_main_subparsers) + ##################### + # SPACEMAKE MIGRATE # + ##################### + parser_migrate = setup_migrate_parser(parser_main_subparsers) + ##################### # SPACEMAKE SPATIAL # ##################### @@ -1214,6 +1270,7 @@ def make_main_parser(): "config": parser_config, "projects": parser_projects, "run": parser_run, + "migrate": parser_migrate, "main": parser_main, "spatial": parser_spatial, } From a8835ea5b68862b3ac2210498b77a4a320f193c4 Mon Sep 17 00:00:00 2001 From: nukappa Date: Wed, 18 Dec 2024 14:21:05 +0100 Subject: [PATCH 2/6] added CRAM conversion, several TODOs pending --- spacemake/cmdline.py | 70 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 62 insertions(+), 8 deletions(-) diff --git a/spacemake/cmdline.py b/spacemake/cmdline.py index 1897b38..b39b6e9 100644 --- a/spacemake/cmdline.py +++ b/spacemake/cmdline.py @@ -787,7 +787,7 @@ def setup_migrate_parser(parent_parser_subparsers): parser_migrate.add_argument( "--project-id", default="", - help="The project-id of the sample to perform the migration.", + help="The project-id of the sample to perform the migration", type=str, required=True, dest="project_id", @@ -795,11 +795,19 @@ def setup_migrate_parser(parent_parser_subparsers): parser_migrate.add_argument( "--sample-id", default="", - help="The sample-id of the sample to perform the migration.", + help="The sample-id of the sample to perform the migration", type=str, required=True, dest="sample_id", ) + parser_migrate.add_argument( + "--threads", + default="1", + help="Number of threads to use", + type=str, + required=False, + dest="threads", + ) parser_migrate.set_defaults(func=spacemake_migrate) @@ -1176,6 +1184,7 @@ def list_projects_cmdline(args): # print the table logger.info(df.loc[:, variables].__str__()) + @message_aggregation(logger_name) def spacemake_migrate(args): """spacemake_migrate. @@ -1183,17 +1192,62 @@ def spacemake_migrate(args): :param args: """ from spacemake.project_df import get_global_ProjectDF + import subprocess + import time + import yaml + + project_id = args['project_id'] + sample_id = args['sample_id'] + threads = args['threads'] pdf = get_global_ProjectDF() - - # Check that the project-id and sample-id combination provided exists - pdf.assert_sample(args['project_id'], args['sample_id']) - # TODO: convert BAM to CRAM, appropriately change timestamp + # Make sure that the project-id and sample-id combination provided exists + pdf.assert_sample(project_id, sample_id) + project_folder = os.path.join('projects', project_id, 'processed_data', sample_id, 'illumina', 'complete_data') + + # Extract vars from the config.yaml for later use + with open("config.yaml") as yamlfile: + cf = yaml.safe_load(yamlfile.read()) + sample_species = pdf.get_sample_info(project_id, sample_id)['species'] + genome_sequence = cf['species'][sample_species]['genome']['sequence'] + + # Start migrartion + print('Beginning migration ...', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) + if not os.path.exists(os.path.join(project_id, 'stats.csv')): + print(f"Stats file for sample with (project_id, sample_id)=({project_id}, {sample_id}) " + "not found on disk. Will generate it now.") + # Execute code written elsewhere to generate the file + else: + print("Stats file found on disk") + + if not os.path.exists(os.path.join(project_folder, 'final.cram')): + print(f"CRAM files for sample with (project_id, sample_id)=({project_id}, {sample_id}) " + "not found on disk. Will generate them now.") + # Execute code to convert to CRAM) + # TODO: reference BAM from internals OR write a func to find it + # TODO: proper naming for CRAM + # TODO: transfer timestamp + subprocess.run( + [ + "samtools", "view", + "-T", genome_sequence, + "-C", + "--threads", str(threads), + "-o", os.path.join(project_folder, "final.cram"), + os.path.join(project_folder, "final.polyA_adapter_trimmed.bam") + ] + ) + else: + print(f"CRAM files for sample with (project_id, sample_id)=({project_id}, {sample_id}) " + "already on disk. Skipping conversion step.") - # TODO: delete BAMs + print("Removing unnecessary files ...", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) + # to delete: + # - BAM files (if CRAM are present) + # - unaligned.bam - # TODO: delete other unnecessary files + print("Migration complete ...", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) def make_main_parser(): From c338f3a47153472c3cfa179a7a6a4a8453973c02 Mon Sep 17 00:00:00 2001 From: nukappa Date: Thu, 19 Dec 2024 09:19:46 +0100 Subject: [PATCH 3/6] moved functions to a migration dedicated file --- spacemake/cmdline.py | 32 +++++++----------------- spacemake/migrate.py | 59 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 23 deletions(-) create mode 100644 spacemake/migrate.py diff --git a/spacemake/cmdline.py b/spacemake/cmdline.py index b39b6e9..c995b0f 100644 --- a/spacemake/cmdline.py +++ b/spacemake/cmdline.py @@ -1191,8 +1191,8 @@ def spacemake_migrate(args): :param args: """ + from spacemake.migrate import convert_bam_to_cram from spacemake.project_df import get_global_ProjectDF - import subprocess import time import yaml @@ -1212,40 +1212,26 @@ def spacemake_migrate(args): sample_species = pdf.get_sample_info(project_id, sample_id)['species'] genome_sequence = cf['species'][sample_species]['genome']['sequence'] - # Start migrartion + # Begin migration print('Beginning migration ...', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) + if not os.path.exists(os.path.join(project_id, 'stats.csv')): - print(f"Stats file for sample with (project_id, sample_id)=({project_id}, {sample_id}) " + print(f"Stats file for sample with (project-id, sample-id)=({project_id}, {sample_id}) " "not found on disk. Will generate it now.") # Execute code written elsewhere to generate the file else: print("Stats file found on disk") if not os.path.exists(os.path.join(project_folder, 'final.cram')): - print(f"CRAM files for sample with (project_id, sample_id)=({project_id}, {sample_id}) " + print(f"CRAM files for sample with (project-id, sample-id)=({project_id}, {sample_id}) " "not found on disk. Will generate them now.") - # Execute code to convert to CRAM) - # TODO: reference BAM from internals OR write a func to find it - # TODO: proper naming for CRAM - # TODO: transfer timestamp - subprocess.run( - [ - "samtools", "view", - "-T", genome_sequence, - "-C", - "--threads", str(threads), - "-o", os.path.join(project_folder, "final.cram"), - os.path.join(project_folder, "final.polyA_adapter_trimmed.bam") - ] - ) + # Execute code to convert to CRAM + convert_bam_to_cram(genome_sequence, project_folder, threads) else: - print(f"CRAM files for sample with (project_id, sample_id)=({project_id}, {sample_id}) " - "already on disk. Skipping conversion step.") + print(f"CRAM files for sample with (project-id, sample-id)=({project_id}, {sample_id}) " + "already on disk.") print("Removing unnecessary files ...", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) - # to delete: - # - BAM files (if CRAM are present) - # - unaligned.bam print("Migration complete ...", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) diff --git a/spacemake/migrate.py b/spacemake/migrate.py new file mode 100644 index 0000000..54b028a --- /dev/null +++ b/spacemake/migrate.py @@ -0,0 +1,59 @@ +import os +import subprocess +import time + +def find_bam_files(folder): + """ + Finds all .bam files in the given folder and checks if any of them is a symlink. + + Returns a list of tuples of type (str, bool), e.g. ('bam_file', False) + """ + if not os.path.isdir(folder): + raise ValueError(f"The provided path {folder} is not a valid directory.") + + # Find files and check for symlinks + bam_files = [f for f in os.listdir(folder) if f.endswith('.bam')] + are_symlinks = [os.path.islink(bam_file) for bam_file in bam_files] + + return list(zip(bam_files, are_symlinks)) + +def convert_bam_to_cram(ref_sequence, project_folder, threads=4): + bam_files = find_bam_files(project_folder) + + for idx in range(len(bam_files)): + bam_filename, bam_file_is_symlink = bam_files[idx] + bam_filename_prefix = bam_filename.rsplit('.', 1)[0] + cram_filename = bam_filename_prefix + ".cram" + + # TODO: change ref sequence for genome, rRNA, phiX, custom? + + if bam_file_is_symlink: + # TODO: deal with this + continue + else: + print('Converting', bam_filename, 'to', cram_filename, + '...', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) + subprocess.run( + [ + "samtools", "view", + "-T", ref_sequence, + "-C", + "--threads", str(threads), + "-o", os.path.join(project_folder, cram_filename), + os.path.join(project_folder, bam_filename) + ] + ) + + # TODO: transfer timestamp + + return + +def remove_files(): + # - BAM files (if CRAMs are present) + + # - unaligned.bam + + # remove tiles + + return + From 71dbe6a2333a0db662ca618a5e4393b9f541daa2 Mon Sep 17 00:00:00 2001 From: nukappa Date: Thu, 19 Dec 2024 11:24:50 +0100 Subject: [PATCH 4/6] added transfer timestamp --- spacemake/migrate.py | 49 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/spacemake/migrate.py b/spacemake/migrate.py index 54b028a..1f56930 100644 --- a/spacemake/migrate.py +++ b/spacemake/migrate.py @@ -2,6 +2,7 @@ import subprocess import time + def find_bam_files(folder): """ Finds all .bam files in the given folder and checks if any of them is a symlink. @@ -13,23 +14,54 @@ def find_bam_files(folder): # Find files and check for symlinks bam_files = [f for f in os.listdir(folder) if f.endswith('.bam')] - are_symlinks = [os.path.islink(bam_file) for bam_file in bam_files] + bam_file_paths = [os.path.join(folder, f) for f in bam_files] + are_symlinks = [os.path.islink(bam_file_path) for bam_file_path in bam_file_paths] return list(zip(bam_files, are_symlinks)) + +def sync_timestamps(original_file, new_file): + """ + Sync the timestamps (access and modification time) of new_file with those of original_file. + + Args: + original_file (str): Path to the file whose timestamps will be copied. + new_file (str): Path to the file that will have its timestamps updated. + """ + try: + # Get the access time and modification time from original_file + source_times = os.stat(original_file) + + # Set the same access and modification time for new_file + os.utime(new_file, (source_times.st_atime, source_times.st_mtime)) + + print(f"File timestamps of {new_file} set to match {original_file}.") + except FileNotFoundError: + print(f"Error: One or both of the files '{original_file}' or '{new_file}' do not exist.") + except Exception as e: + print(f"An error occurred: {e}") + + def convert_bam_to_cram(ref_sequence, project_folder, threads=4): bam_files = find_bam_files(project_folder) for idx in range(len(bam_files)): bam_filename, bam_file_is_symlink = bam_files[idx] bam_filename_prefix = bam_filename.rsplit('.', 1)[0] - cram_filename = bam_filename_prefix + ".cram" + cram_filename = bam_filename_prefix + '.cram' + + if os.path.exists(os.path.join(project_folder, cram_filename)): + print('CRAM file', cram_filename, 'already exists. Skipping conversion.') + continue # TODO: change ref sequence for genome, rRNA, phiX, custom? if bam_file_is_symlink: - # TODO: deal with this - continue + # TODO: fix timestamp for symlink + true_bam_filename = os.readlink(os.path.join(project_folder, bam_filename)) + true_bam_filename_prefix = true_bam_filename.rsplit('.', 1)[0] + os.symlink(true_bam_filename_prefix + '.cram', + os.path.join(project_folder, bam_filename_prefix + '.cram')) else: print('Converting', bam_filename, 'to', cram_filename, '...', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) @@ -44,16 +76,15 @@ def convert_bam_to_cram(ref_sequence, project_folder, threads=4): ] ) - # TODO: transfer timestamp + sync_timestamps(os.path.join(project_folder, bam_filename), + os.path.join(project_folder, cram_filename)) - return def remove_files(): - # - BAM files (if CRAMs are present) + # - BAM files (only if CRAMs are present) + bam_files = find_bam_files(project_folder) # - unaligned.bam # remove tiles - return - From e2eb4a20cae95314801e8c1ae18d52908649d1a2 Mon Sep 17 00:00:00 2001 From: nukappa Date: Thu, 19 Dec 2024 14:25:56 +0100 Subject: [PATCH 5/6] moved sync_timestamps to util and fixed symlink bug --- spacemake/cmdline.py | 1 + spacemake/migrate.py | 29 ++++------------------------- spacemake/util.py | 26 ++++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 25 deletions(-) diff --git a/spacemake/cmdline.py b/spacemake/cmdline.py index c995b0f..4b81184 100644 --- a/spacemake/cmdline.py +++ b/spacemake/cmdline.py @@ -1223,6 +1223,7 @@ def spacemake_migrate(args): print("Stats file found on disk") if not os.path.exists(os.path.join(project_folder, 'final.cram')): + # TODO: fix this with a proper check. print(f"CRAM files for sample with (project-id, sample-id)=({project_id}, {sample_id}) " "not found on disk. Will generate them now.") # Execute code to convert to CRAM diff --git a/spacemake/migrate.py b/spacemake/migrate.py index 1f56930..84d3c93 100644 --- a/spacemake/migrate.py +++ b/spacemake/migrate.py @@ -2,6 +2,8 @@ import subprocess import time +from spacemake.util import sync_timestamps + def find_bam_files(folder): """ @@ -20,28 +22,6 @@ def find_bam_files(folder): return list(zip(bam_files, are_symlinks)) -def sync_timestamps(original_file, new_file): - """ - Sync the timestamps (access and modification time) of new_file with those of original_file. - - Args: - original_file (str): Path to the file whose timestamps will be copied. - new_file (str): Path to the file that will have its timestamps updated. - """ - try: - # Get the access time and modification time from original_file - source_times = os.stat(original_file) - - # Set the same access and modification time for new_file - os.utime(new_file, (source_times.st_atime, source_times.st_mtime)) - - print(f"File timestamps of {new_file} set to match {original_file}.") - except FileNotFoundError: - print(f"Error: One or both of the files '{original_file}' or '{new_file}' do not exist.") - except Exception as e: - print(f"An error occurred: {e}") - - def convert_bam_to_cram(ref_sequence, project_folder, threads=4): bam_files = find_bam_files(project_folder) @@ -54,14 +34,13 @@ def convert_bam_to_cram(ref_sequence, project_folder, threads=4): print('CRAM file', cram_filename, 'already exists. Skipping conversion.') continue - # TODO: change ref sequence for genome, rRNA, phiX, custom? + # TODO: change ref sequence for genome, rRNA, phiX, custom? Get it from map_strategy if bam_file_is_symlink: - # TODO: fix timestamp for symlink true_bam_filename = os.readlink(os.path.join(project_folder, bam_filename)) true_bam_filename_prefix = true_bam_filename.rsplit('.', 1)[0] os.symlink(true_bam_filename_prefix + '.cram', - os.path.join(project_folder, bam_filename_prefix + '.cram')) + os.path.join(project_folder, cram_filename)) else: print('Converting', bam_filename, 'to', cram_filename, '...', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) diff --git a/spacemake/util.py b/spacemake/util.py index 7fdedf4..3bab109 100644 --- a/spacemake/util.py +++ b/spacemake/util.py @@ -578,3 +578,29 @@ def load_config_with_fallbacks(args, try_yaml="config.yaml"): import argparse return argparse.Namespace(**args_kw) + + +def sync_timestamps(original_file, new_file): + """ + Sync the timestamps (access and modification time) of new_file with those of original_file. + + Args: + original_file (str): Path to the file whose timestamps will be copied. + new_file (str): Path to the file that will have its timestamps updated. + """ + try: + # Get the access time and modification time from original_file + if os.path.islink(original_file): + source_times = os.lstat(original_file) + else: + source_times = os.stat(original_file) + + # Set the same access and modification time for new_file + os.utime(new_file, (source_times.st_atime, source_times.st_mtime), + follow_symlinks=not os.path.islink(original_file)) + + print(f"File timestamps of {new_file} set to match {original_file}.") + except FileNotFoundError: + print(f"Error: One or both of the files '{original_file}' or '{new_file}' do not exist.") + except Exception as e: + print(f"An error occurred: {e}") From fddf04f65b64995f994fa0a815681b39f4417578 Mon Sep 17 00:00:00 2001 From: nukappa Date: Thu, 19 Dec 2024 17:37:41 +0100 Subject: [PATCH 6/6] refactored and added reference_type detection --- spacemake/cmdline.py | 10 ++------ spacemake/migrate.py | 59 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 48 insertions(+), 21 deletions(-) diff --git a/spacemake/cmdline.py b/spacemake/cmdline.py index 4b81184..88e9ce9 100644 --- a/spacemake/cmdline.py +++ b/spacemake/cmdline.py @@ -1205,13 +1205,7 @@ def spacemake_migrate(args): # Make sure that the project-id and sample-id combination provided exists pdf.assert_sample(project_id, sample_id) project_folder = os.path.join('projects', project_id, 'processed_data', sample_id, 'illumina', 'complete_data') - - # Extract vars from the config.yaml for later use - with open("config.yaml") as yamlfile: - cf = yaml.safe_load(yamlfile.read()) - sample_species = pdf.get_sample_info(project_id, sample_id)['species'] - genome_sequence = cf['species'][sample_species]['genome']['sequence'] - + # Begin migration print('Beginning migration ...', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) @@ -1227,7 +1221,7 @@ def spacemake_migrate(args): print(f"CRAM files for sample with (project-id, sample-id)=({project_id}, {sample_id}) " "not found on disk. Will generate them now.") # Execute code to convert to CRAM - convert_bam_to_cram(genome_sequence, project_folder, threads) + convert_bam_to_cram(project_id, sample_id, threads) else: print(f"CRAM files for sample with (project-id, sample-id)=({project_id}, {sample_id}) " "already on disk.") diff --git a/spacemake/migrate.py b/spacemake/migrate.py index 84d3c93..3f0ced4 100644 --- a/spacemake/migrate.py +++ b/spacemake/migrate.py @@ -1,7 +1,9 @@ import os import subprocess import time +import yaml +from spacemake.project_df import get_global_ProjectDF from spacemake.util import sync_timestamps @@ -19,47 +21,78 @@ def find_bam_files(folder): bam_file_paths = [os.path.join(folder, f) for f in bam_files] are_symlinks = [os.path.islink(bam_file_path) for bam_file_path in bam_file_paths] - return list(zip(bam_files, are_symlinks)) + return list(zip(bam_file_paths, are_symlinks)) -def convert_bam_to_cram(ref_sequence, project_folder, threads=4): - bam_files = find_bam_files(project_folder) +def get_map_strategy_sequences(project_id, sample_id): + """ + Returns a dictionary of reference_types and their location, e.g. {rRNA : /path/to/disk/sequence.fa} + """ + pdf = get_global_ProjectDF() + + map_strategy = pdf.get_sample_info(project_id, sample_id)['map_strategy'] + sequence_type = [mapping.split(':')[1] for mapping in map_strategy.split('->')] + with open("config.yaml") as yamlfile: + cf = yaml.safe_load(yamlfile.read()) + sample_species = pdf.get_sample_info(project_id, sample_id)['species'] + + reference_type = {st : cf['species'][sample_species][st]['sequence'] for st in sequence_type} + + return reference_type + + +def convert_bam_to_cram(project_id, sample_id, threads=4): + """ + Converts all BAM files to CRAM and updates the timestamps to those of the + original files. Symbolic links are treated as such. + """ + species_sequences = get_map_strategy_sequences(project_id, sample_id) + + project_folder = os.path.join('projects', project_id, 'processed_data', + sample_id, 'illumina', 'complete_data') + bam_files = find_bam_files(project_folder) + for idx in range(len(bam_files)): bam_filename, bam_file_is_symlink = bam_files[idx] bam_filename_prefix = bam_filename.rsplit('.', 1)[0] cram_filename = bam_filename_prefix + '.cram' - if os.path.exists(os.path.join(project_folder, cram_filename)): + if os.path.exists(cram_filename): print('CRAM file', cram_filename, 'already exists. Skipping conversion.') continue - # TODO: change ref sequence for genome, rRNA, phiX, custom? Get it from map_strategy + if 'unaligned' in bam_filename: + continue if bam_file_is_symlink: - true_bam_filename = os.readlink(os.path.join(project_folder, bam_filename)) + true_bam_filename = os.readlink(bam_filename) true_bam_filename_prefix = true_bam_filename.rsplit('.', 1)[0] - os.symlink(true_bam_filename_prefix + '.cram', - os.path.join(project_folder, cram_filename)) + os.symlink(true_bam_filename_prefix + '.cram', cram_filename) else: print('Converting', bam_filename, 'to', cram_filename, '...', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) + + for ref_type in species_sequences: + if ref_type in bam_filename: + ref_sequence = species_sequences[ref_type] + break + subprocess.run( [ "samtools", "view", "-T", ref_sequence, "-C", "--threads", str(threads), - "-o", os.path.join(project_folder, cram_filename), - os.path.join(project_folder, bam_filename) + "-o", cram_filename, + bam_filename ] ) - sync_timestamps(os.path.join(project_folder, bam_filename), - os.path.join(project_folder, cram_filename)) + sync_timestamps(bam_filename, cram_filename) -def remove_files(): +def remove_files(project_folder): # - BAM files (only if CRAMs are present) bam_files = find_bam_files(project_folder)