From 5af64671601c79ce8494c4a18928a490bfb47280 Mon Sep 17 00:00:00 2001
From: aidaanva <aida.andrades@gmail.com>
Date: Mon, 27 Mar 2023 11:22:10 +0200
Subject: [PATCH] Fixed bug calculation percent duplicates+clonality

---
 endorS.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/endorS.py b/endorS.py
index c2bc8f2..4d61ea6 100755
--- a/endorS.py
+++ b/endorS.py
@@ -37,7 +37,7 @@
                     help= 'output of samtools flagstat in a txt file, whereby duplicate removal has been performed on the input reads')
 #parser.add_argument('samtoolsfiles', metavar='<samplefile>.stats', type=str, nargs='+',
 #                    help='output of samtools flagstat in a txt file (at least one required). If two files are supplied, the mapped reads of the second file is divided by the total reads in the first, since it assumes that the <samplefile.stats> are related to the same sample. Useful after BAM filtering')
-parser.add_argument('-v','--version', action='version', version='%(prog)s 1.2')
+parser.add_argument('-v','--version', action='version', version='%(prog)s 1.3')
 parser.add_argument('--output', '-o', nargs='?', help='specify a file format for an output file. Options: <json> for a MultiQC json output. Default: none')
 parser.add_argument('--name', '-n', nargs='?', help='specify name for the output file. Default: extracted from the first samtools flagstat file provided')
 parser.add_argument('--verbose', '-e', help='increase output verbosity', action='store_true')
@@ -56,6 +56,10 @@
     print("ERROR: only --qualityfiltered samtools flagstat file provided. No stats can be calculated")
     sys.exit(2)
 
+if ((args.raw is None) and (args.qualityfiltered is None)):
+    print("ERROR: only --deduplicated samtools flagstat file provided. No stats can be calculated")
+    sys.exit(2)
+
 try:
     with open(args.raw, 'r') as raw:
         contentsRaw = raw.read()
@@ -99,10 +103,16 @@
 try:
     with open(args.deduplicated, 'r') as deDup:
         contentsdeDup= deDup.read()
-    #Extract number of reads pre dedup
-    totalPreDedup = float((re.findall(r'^([0-9]+) \+ [0-9]+ in total',contentsdeDup))[0])
-    #Extract number of mapped reads pre-quality filtering:
+    
+    #Extract number of mapped reads post-dedup:
     mappedDedup = float((re.findall(r'([0-9]+) \+ [0-9]+ mapped ',contentsdeDup))[0])
+
+    #Extract number of reads pre dedup: can only be extracted from either raw or filtered
+    if args.qualityfiltered is not None:
+        totalPreDedup = mappedPost
+    elif args.raw is not None:
+        totalPreDedup = mappedRaw
+    
     #Check if --raw provided and calculate Percent on target postDedup
     if args.raw is not None:
         if totalReads == 0.0: