Merge remote-tracking branch 'origin/master' into fix/223

karel-brinda · Apr 3, 2023 · 6a46608 · 6a46608
2 parents 153e3ac + e8e681b
commit 6a46608
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 9 deletions.
diff --git a/Snakefile b/Snakefile
@@ -31,6 +31,7 @@ def get_batches():
     with open(config["batches"]) as fin:
         return sorted([x.strip() for x in fin])
 
+
 def get_filename_for_all_queries():
     return "___".join(get_all_query_filenames())
 
@@ -332,7 +333,9 @@ rule decompress_cobs:
         cobs_index_tmp=f"{decompression_dir}/{{batch}}.cobs_classic.tmp",
     threads:
         # The same number as of COBS threads to ensure that COBS is executed immediately after decompression
-        lambda wildcards, input: get_number_of_COBS_threads(wildcards, input, predefined_cobs_threads, streaming),
+        lambda wildcards, inp: get_number_of_COBS_threads(
+            wildcards, inp, predefined_cobs_threads, streaming
+        )
     shell:
         """
         ./scripts/benchmark.py --log logs/benchmarks/decompress_cobs/{wildcards.batch}.txt \\
@@ -355,7 +358,11 @@ rule run_cobs:
         max_ram_mb=lambda wildcards, input: get_uncompressed_batch_size_in_MB(
             wildcards, input, ignore_RAM, streaming
         ),
-    threads: lambda wildcards, input: get_number_of_COBS_threads(wildcards, input, predefined_cobs_threads, streaming),
+    threads:
+        # ...
+        lambda wildcards, input: get_number_of_COBS_threads(
+            wildcards, input, predefined_cobs_threads, streaming
+        )
     params:
         kmer_thres=config["cobs_kmer_thres"],
         load_complete="--load-complete" if load_complete else "",
@@ -392,7 +399,11 @@ rule decompress_and_run_cobs:
         max_ram_mb=lambda wildcards, input: get_uncompressed_batch_size_in_MB(
             wildcards, input, ignore_RAM, streaming
         ),
-    threads: lambda wildcards, input: get_number_of_COBS_threads(wildcards, input, predefined_cobs_threads, streaming),
+    threads:
+        # ...
+        lambda wildcards, input: get_number_of_COBS_threads(
+            wildcards, input, predefined_cobs_threads, streaming
+        )
     params:
         kmer_thres=config["cobs_kmer_thres"],
         decompression_dir=decompression_dir,

diff --git a/scripts/benchmark.py b/scripts/benchmark.py
@@ -49,12 +49,16 @@ def main():
     main_process = subprocess.Popen(f'{benchmark_command} {args.command}', shell=True)
     if is_benchmarking_pipeline:
         RAM_tmp_log_file = Path(f"{log_file}.RAM.tmp")
-        RAM_benchmarking_process = subprocess.Popen([sys.executable, "scripts/get_RAM_usage.py", str(RAM_tmp_log_file),
-                                                     str(main_process.pid)])
+        RAM_benchmarking_process = subprocess.Popen(
+            [sys.executable, "scripts/get_RAM_usage.py",
+             str(RAM_tmp_log_file),
+             str(main_process.pid)])
     return_code = main_process.wait()
     if return_code:
-        raise subprocess.CalledProcessError(return_code, main_process.args,
-                                 output=main_process.stdout, stderr=main_process.stderr)
+        raise subprocess.CalledProcessError(return_code,
+                                            main_process.args,
+                                            output=main_process.stdout,
+                                            stderr=main_process.stderr)
 
     end_time = datetime.datetime.now()
     elapsed_seconds = (end_time - start_time).total_seconds()

diff --git a/scripts/filter_queries.py b/scripts/filter_queries.py
@@ -177,9 +177,9 @@ def process_cobs_file(self, cobs_fn):
         for i, (qname, batch, matches) in enumerate(cobs_iterator(cobs_fn)):
             print(f"Processing batch {batch} query #{i} ({qname})", file=sys.stderr)
             try:
-               _ = self._query_dict[qname]
+                _ = self._query_dict[qname]
             except KeyError:
-               self._query_dict[qname] = SingleQuery(qname, self._keep_matches)
+                self._query_dict[qname] = SingleQuery(qname, self._keep_matches)
             self._query_dict[qname].add_matches(batch, matches)
 
     def print_tsv_summary(self):