Merge pull request #228 from abyzovlab/ms_dev

Ms dev
abyzovlab · Jul 18, 2024 · b2af48d · b2af48d
2 parents eccec8d + cf1babe
commit b2af48d
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 19 deletions.
diff --git a/cnvpytor/bam.py b/cnvpytor/bam.py
@@ -151,7 +151,7 @@ def pileup(self, chr_name, pos, ref, alt, tmp_file=".cnvpytor"):
             _logger.warning("Can not find chromosome '%s' in file '%s'." % (chr_name, self.filename))
             return
         _logger.debug("Pileup chromosome %s from filename %s" % (chr_name, self.filename))
-        tmp_file += "_" + str(random.randint(0, 1e10)) + "_" + chr_name
+        tmp_file += "_" + str(random.randint(0, 10000000000)) + "_" + chr_name
         f = open(tmp_file, "w")
         for i in pos:
             print(chr_name, i, file=f)

diff --git a/cnvpytor/io.py b/cnvpytor/io.py
@@ -351,6 +351,8 @@ def chromosomes_bin_sizes_with_signal(self, signal, flags=0, name=''):
             res = re.findall(search_string, key)
             if len(res) > 0:
                 chrs_bss.append(res[0])
+        if signal == "SNP likelihood":
+            chrs_bss = list(map(lambda x:(x[0].replace("half_",""), x[1]), chrs_bss))
         return chrs_bss
 
     def signal_exists(self, chr_name, bin_size, signal, flags=0, name=''):
@@ -610,7 +612,7 @@ def ls(self):
         print()
         chr_len = list(np.array(self.get_signal(None, None, "chromosome lengths")).astype("str"))
         chr_len = dict(zip(chr_len[::2], chr_len[1::2]))
-        print("Chromosome lengths: " + str(chr_len))
+        print("Chromosome lengths: " + ", ".join([str(cc)+": "+str(ll) for cc, ll in chr_len.items()]))
 
     @staticmethod
     def save_root_trees(root_filename):

diff --git a/cnvpytor/root.py b/cnvpytor/root.py
@@ -489,28 +489,30 @@ def save_data(chr, rd_p, rd_u):
             self.io.add_meta_attribute("RD from VCF", vcf_file)
             return count
 
+    @staticmethod
+    def pileup_chromosome(x):
+        c, l, snpc, bam_path, reference_filename, pos, ref, alt = x
+        _logger.info("Pileup chromosome %s with length %d" % (c, l))
+        bamf = Bam(bam_path, reference_filename=reference_filename)
+        return bamf.pileup(c, pos[snpc], ref[snpc], alt[snpc])
+
     def _pileup_bam(self, bamfile, chroms, pos, ref, alt, nref, nalt, reference_filename):
         _logger.info("Calculating pileup from bam file '%s'." % bamfile)
         bamf = Bam(bamfile, reference_filename=reference_filename)
 
-        def pileup_chromosome(x):
-            c, l, snpc = x
-            _logger.info("Pileup chromosome %s with length %d" % (c, l))
-            return bamf.pileup(c, pos[snpc], ref[snpc], alt[snpc])
-
         chrname, chrlen = bamf.get_chr_len()
-        chr_len = [(c, l, self.io.snp_chromosome_name(c)) for (c, l) in zip(chrname, chrlen) if
-                   self.io.snp_chromosome_name(c) in chroms]
+        chr_len = [(c, l, self.io.snp_chromosome_name(c), bamfile, reference_filename, pos, ref, alt) for (c, l) in
+                   zip(chrname, chrlen) if self.io.snp_chromosome_name(c) in chroms]
 
         if self.max_cores == 1:
             for cl in chr_len:
-                r = pileup_chromosome(cl)
+                r = self.pileup_chromosome(cl)
                 nref[cl[2]] = [x + y for x, y in zip(nref[cl[2]], r[0])]
                 nalt[cl[2]] = [x + y for x, y in zip(nalt[cl[2]], r[1])]
 
         else:
             from .pool import parmap
-            res = parmap(pileup_chromosome, chr_len, cores=self.max_cores)
+            res = parmap(self.pileup_chromosome, chr_len, cores=self.max_cores)
             for cl, r in zip(chr_len, res):
                 nref[cl[2]] = [x + y for x, y in zip(nref[cl[2]], r[0])]
                 nalt[cl[2]] = [x + y for x, y in zip(nalt[cl[2]], r[1])]
@@ -1137,7 +1139,7 @@ def calculate_histograms_from_snp_counts(self, bin_sizes, chroms=[], use_mask=Tr
                     np.seterr(divide='ignore', invalid='ignore')
                     scale = bin_size / 150
                     rdcg = scale * rdcg / rdc
-                    rdcg[rdc < min_count] = np.NaN
+                    rdcg[rdc < min_count] = np.nan
 
                     self.io.create_signal(c, bin_size, "RD", rdcg)
                     self.io.create_signal(c, bin_size, "RD unique", rdcg)
@@ -1791,10 +1793,10 @@ def call(self, bin_sizes, chroms=[], print_calls=False, use_gc_corr=True, use_ma
                                     if i in nan_indices_set:
                                         i += 1
                                     else:
-                                        if tb==bs:
-                                            rbs=i
-                                        if tb==b:
-                                            rb=i
+                                        if tb == bs:
+                                            rbs = i
+                                        if tb == b:
+                                            rb = i
                                         i += 1
                                         tb += 1
                                 start = bin_size * rbs + 1
@@ -1804,11 +1806,12 @@ def call(self, bin_sizes, chroms=[], print_calls=False, use_gc_corr=True, use_ma
                             pN = -1
                             dG = -1
                             if gc:
-                                pN = (size - np.sum(gc[start // 100:end // 100]) - np.sum(at[start // 100:end // 100])) / size
+                                pN = (size - np.sum(gc[start // 100:end // 100]) - np.sum(
+                                    at[start // 100:end // 100])) / size
                                 dG = np.min(distN[start // 100:end // 100])
 
                             if filter_nan:
-                                pN = np.sum(np.isnan(rd_org[rbs:rb]))/(rb-rbs)
+                                pN = np.sum(np.isnan(rd_org[rbs:rb])) / (rb - rbs)
 
                             if print_calls:
                                 print("%s\t%s:%d-%d\t%d\t%.4f\t%e\t%e\t%e\t%e\t%.4f\t%.4f\t%d" % (
@@ -1875,7 +1878,7 @@ def call_mosaic(self, bin_sizes, chroms=[], use_gc_corr=True, use_mask=False, om
                         bins = len(rd)
                         valid = np.isfinite(rd)
                         level = rd[valid]
-                        if len(level)<=3:
+                        if len(level) <= 3:
                             continue
                         error = np.sqrt(level) ** 2 + std ** 2
                         loc_fl = np.min(list(zip(np.abs(np.diff(level))[:-1], np.abs(np.diff(level))[1:])), axis=1)