Skip to content

Commit

Permalink
Merge pull request #228 from abyzovlab/ms_dev
Browse files Browse the repository at this point in the history
Ms dev
  • Loading branch information
suvakov authored Jul 18, 2024
2 parents eccec8d + cf1babe commit b2af48d
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 19 deletions.
2 changes: 1 addition & 1 deletion cnvpytor/bam.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def pileup(self, chr_name, pos, ref, alt, tmp_file=".cnvpytor"):
_logger.warning("Can not find chromosome '%s' in file '%s'." % (chr_name, self.filename))
return
_logger.debug("Pileup chromosome %s from filename %s" % (chr_name, self.filename))
tmp_file += "_" + str(random.randint(0, 1e10)) + "_" + chr_name
tmp_file += "_" + str(random.randint(0, 10000000000)) + "_" + chr_name
f = open(tmp_file, "w")
for i in pos:
print(chr_name, i, file=f)
Expand Down
4 changes: 3 additions & 1 deletion cnvpytor/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,8 @@ def chromosomes_bin_sizes_with_signal(self, signal, flags=0, name=''):
res = re.findall(search_string, key)
if len(res) > 0:
chrs_bss.append(res[0])
if signal == "SNP likelihood":
chrs_bss = list(map(lambda x:(x[0].replace("half_",""), x[1]), chrs_bss))
return chrs_bss

def signal_exists(self, chr_name, bin_size, signal, flags=0, name=''):
Expand Down Expand Up @@ -610,7 +612,7 @@ def ls(self):
print()
chr_len = list(np.array(self.get_signal(None, None, "chromosome lengths")).astype("str"))
chr_len = dict(zip(chr_len[::2], chr_len[1::2]))
print("Chromosome lengths: " + str(chr_len))
print("Chromosome lengths: " + ", ".join([str(cc)+": "+str(ll) for cc, ll in chr_len.items()]))

@staticmethod
def save_root_trees(root_filename):
Expand Down
37 changes: 20 additions & 17 deletions cnvpytor/root.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,28 +489,30 @@ def save_data(chr, rd_p, rd_u):
self.io.add_meta_attribute("RD from VCF", vcf_file)
return count

@staticmethod
def pileup_chromosome(x):
c, l, snpc, bam_path, reference_filename, pos, ref, alt = x
_logger.info("Pileup chromosome %s with length %d" % (c, l))
bamf = Bam(bam_path, reference_filename=reference_filename)
return bamf.pileup(c, pos[snpc], ref[snpc], alt[snpc])

def _pileup_bam(self, bamfile, chroms, pos, ref, alt, nref, nalt, reference_filename):
_logger.info("Calculating pileup from bam file '%s'." % bamfile)
bamf = Bam(bamfile, reference_filename=reference_filename)

def pileup_chromosome(x):
c, l, snpc = x
_logger.info("Pileup chromosome %s with length %d" % (c, l))
return bamf.pileup(c, pos[snpc], ref[snpc], alt[snpc])

chrname, chrlen = bamf.get_chr_len()
chr_len = [(c, l, self.io.snp_chromosome_name(c)) for (c, l) in zip(chrname, chrlen) if
self.io.snp_chromosome_name(c) in chroms]
chr_len = [(c, l, self.io.snp_chromosome_name(c), bamfile, reference_filename, pos, ref, alt) for (c, l) in
zip(chrname, chrlen) if self.io.snp_chromosome_name(c) in chroms]

if self.max_cores == 1:
for cl in chr_len:
r = pileup_chromosome(cl)
r = self.pileup_chromosome(cl)
nref[cl[2]] = [x + y for x, y in zip(nref[cl[2]], r[0])]
nalt[cl[2]] = [x + y for x, y in zip(nalt[cl[2]], r[1])]

else:
from .pool import parmap
res = parmap(pileup_chromosome, chr_len, cores=self.max_cores)
res = parmap(self.pileup_chromosome, chr_len, cores=self.max_cores)
for cl, r in zip(chr_len, res):
nref[cl[2]] = [x + y for x, y in zip(nref[cl[2]], r[0])]
nalt[cl[2]] = [x + y for x, y in zip(nalt[cl[2]], r[1])]
Expand Down Expand Up @@ -1137,7 +1139,7 @@ def calculate_histograms_from_snp_counts(self, bin_sizes, chroms=[], use_mask=Tr
np.seterr(divide='ignore', invalid='ignore')
scale = bin_size / 150
rdcg = scale * rdcg / rdc
rdcg[rdc < min_count] = np.NaN
rdcg[rdc < min_count] = np.nan

self.io.create_signal(c, bin_size, "RD", rdcg)
self.io.create_signal(c, bin_size, "RD unique", rdcg)
Expand Down Expand Up @@ -1791,10 +1793,10 @@ def call(self, bin_sizes, chroms=[], print_calls=False, use_gc_corr=True, use_ma
if i in nan_indices_set:
i += 1
else:
if tb==bs:
rbs=i
if tb==b:
rb=i
if tb == bs:
rbs = i
if tb == b:
rb = i
i += 1
tb += 1
start = bin_size * rbs + 1
Expand All @@ -1804,11 +1806,12 @@ def call(self, bin_sizes, chroms=[], print_calls=False, use_gc_corr=True, use_ma
pN = -1
dG = -1
if gc:
pN = (size - np.sum(gc[start // 100:end // 100]) - np.sum(at[start // 100:end // 100])) / size
pN = (size - np.sum(gc[start // 100:end // 100]) - np.sum(
at[start // 100:end // 100])) / size
dG = np.min(distN[start // 100:end // 100])

if filter_nan:
pN = np.sum(np.isnan(rd_org[rbs:rb]))/(rb-rbs)
pN = np.sum(np.isnan(rd_org[rbs:rb])) / (rb - rbs)

if print_calls:
print("%s\t%s:%d-%d\t%d\t%.4f\t%e\t%e\t%e\t%e\t%.4f\t%.4f\t%d" % (
Expand Down Expand Up @@ -1875,7 +1878,7 @@ def call_mosaic(self, bin_sizes, chroms=[], use_gc_corr=True, use_mask=False, om
bins = len(rd)
valid = np.isfinite(rd)
level = rd[valid]
if len(level)<=3:
if len(level) <= 3:
continue
error = np.sqrt(level) ** 2 + std ** 2
loc_fl = np.min(list(zip(np.abs(np.diff(level))[:-1], np.abs(np.diff(level))[1:])), axis=1)
Expand Down

0 comments on commit b2af48d

Please sign in to comment.