-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCoverageJobAssistant.py
122 lines (95 loc) · 5.23 KB
/
CoverageJobAssistant.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/bin/env python
#######################################################################
# Copyright (C) 2023 Christian Bluemel
#
# This file is part of Spice.
#
# CoverageJobAssistant is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# CoverageJobAssistant is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Spice. If not, see <http://www.gnu.org/licenses/>.
#
#######################################################################
import os
from typing import Dict, Any, List
from pathlib import Path
from Classes.ReduxArgParse.ReduxArgParse import ReduxArgParse
JOB_ARRAY: str = """#!/bin/bash
#SBATCH --partition={0}
#SBATCH --cpus-per-task=8
#SBATCH --job-name="coverage"
#SBATCH --output=/dev/null
#SBATCH --error=/dev/null
#SBATCH --array=1-{1}
alignment_job=$(awk FNR==$SLURM_ARRAY_TASK_ID "{2}")
$alignment_job
"""
RAW_SCRIPT: str = "{0} -eB -G {1} -o {2}.gtf {3} -p 8"
class CoverageJobAssistant:
def __init__(self, aligner_path: str, out_path: str, alignment_id: str, annotation_id: str):
self.bam_file_path: str = os.path.join(out_path, alignment_id + ".bam")
self.gtf_file_path: str = os.path.join(out_path, annotation_id + ".gtf")
self.path_prefix: str = os.path.join(out_path, "coverage_" + alignment_id)
self.out_file_path: str = os.path.join(self.path_prefix, alignment_id)
self.command = RAW_SCRIPT.format(aligner_path,
self.gtf_file_path,
self.out_file_path,
self.bam_file_path)
def __str__(self):
return self.command
def main():
argument_parser: ReduxArgParse = ReduxArgParse(["--input_data", "--aligner_path", "--partitions",
"--out_path"],
[str, str, str, str],
["store", "store", "store", "store"],
[1, 1, "*", 1],
["Path to directory containing collected data.",
"Path to the aligner to be used.",
"Set of partitions to be used for the jobs.",
"Path to directory that shall contain the 'align_list.txt'."
"Directory to which the jobs will be saved."])
argument_parser.generate_parser()
argument_parser.execute()
argument_dict: Dict[str, Any] = argument_parser.get_args()
argument_dict['input_data'] = argument_dict['input_data'][0]
argument_dict['aligner_path'] = argument_dict['aligner_path'][0]
argument_dict['out_path'] = argument_dict['out_path'][0]
job_list: List[str] = list()
with open(os.path.join(argument_dict["input_data"], "experiment_list.txt"), "r") as f:
experiment_id_list: List[str] = f.read().split("\n")
coverage_list_path: str = os.path.join(argument_dict["input_data"], "coverage_list.txt")
if not Path(coverage_list_path).exists():
Path(coverage_list_path).touch()
for exp_id in experiment_id_list:
experiment_directory: str = os.path.join(argument_dict["input_data"], exp_id)
with open(os.path.join(experiment_directory, "annotation_list.txt"), "r") as f:
annotation_list: List[str] = f.read().split("\n")
with open(os.path.join(experiment_directory, "alignment_list.txt"), "r") as f:
alignment_list: List[str] = f.read().split("\n")
for i, annotation_id in enumerate(annotation_list):
alignment_id = alignment_list[i]
coverage_job: CoverageJobAssistant = CoverageJobAssistant(argument_dict["aligner_path"],
experiment_directory,
alignment_id,
annotation_id)
Path(coverage_job.path_prefix).mkdir(parents=True, exist_ok=True)
job_list.append(str(coverage_job))
alignment_job_list_path: str = os.path.join(argument_dict['out_path'], "coverage_job_list.txt")
with open(alignment_job_list_path, "w") as f:
f.write("\n".join(job_list))
alignment_job_array_path: str = os.path.join(argument_dict['out_path'], "coverage_job_array.job")
alignment_job_array: str = JOB_ARRAY.format(",".join(argument_dict["partitions"]),
str(len(job_list)),
alignment_job_list_path)
with open(alignment_job_array_path, "w") as f:
f.write(alignment_job_array)
if __name__ == "__main__":
main()