-
Notifications
You must be signed in to change notification settings - Fork 1
/
find_VAF_peaks.py
79 lines (69 loc) · 1.96 KB
/
find_VAF_peaks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 3 13:09:24 2018
@author: lpsmith
"""
from __future__ import division
from os import walk
from os import path
from os import mkdir
from shutil import copytree
VAF_2v4dir = "VAF_2v4_hist/"
onlysomepatients = False
somepatients = ["997"]
VAFfile = open (VAF_2v4dir + "2v4_peaks.tsv", "w")
VAFfile.write("Patient")
VAFfile.write("\tSample")
VAFfile.write("\tVAF")
VAFfile.write("\tProbability")
VAFfile.write("\tPeak or Valley")
VAFfile.write("\tnVAFs")
VAFfile.write("\n")
infiles = []
for (_, _, f) in walk(VAF_2v4dir):
infiles += f
for file in infiles:
if "VAF_hist.tsv" not in file:
continue
(patient, sample) = file.split("_")[0:2]
if onlysomepatients and patient not in somepatients:
continue
hist = []
nVAFs = 0
for line in open(VAF_2v4dir + file, "r"):
if "VAF" in line:
nVAFs = line.split()[3]
continue
(VAF, prob) = line.split()
VAF = float(VAF)
prob = float(prob)
hist.append((VAF, prob))
hist.sort()
peakindex = 0
peakprob = 0
allpeaks = []
slope = "pos"
for index in range(0,len(hist)):
(VAF, prob) = hist[index]
if prob > peakprob:
if slope=="neg":
allpeaks.append((hist[peakindex][0], hist[peakindex][1], "Valley"))
peakindex = index
peakprob = prob
slope = "pos"
if peakindex + 20 < index:
if slope=="pos":
allpeaks.append((hist[peakindex][0], hist[peakindex][1], "Peak"))
slope = "neg"
peakindex += 1
peakprob = hist[peakindex][1]
for peak in allpeaks:
VAFfile.write(patient)
VAFfile.write("\t" + sample)
VAFfile.write("\t" + str(peak[0]))
VAFfile.write("\t" + str(peak[1]))
VAFfile.write("\t" + str(peak[2]))
VAFfile.write("\t" + nVAFs)
VAFfile.write("\n")
VAFfile.close()