Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into fix/223
Browse files Browse the repository at this point in the history
  • Loading branch information
leoisl committed Apr 3, 2023
2 parents 153e3ac + e8e681b commit 6a46608
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 9 deletions.
17 changes: 14 additions & 3 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def get_batches():
with open(config["batches"]) as fin:
return sorted([x.strip() for x in fin])


def get_filename_for_all_queries():
return "___".join(get_all_query_filenames())

Expand Down Expand Up @@ -332,7 +333,9 @@ rule decompress_cobs:
cobs_index_tmp=f"{decompression_dir}/{{batch}}.cobs_classic.tmp",
threads:
# The same number as of COBS threads to ensure that COBS is executed immediately after decompression
lambda wildcards, input: get_number_of_COBS_threads(wildcards, input, predefined_cobs_threads, streaming),
lambda wildcards, inp: get_number_of_COBS_threads(
wildcards, inp, predefined_cobs_threads, streaming
)
shell:
"""
./scripts/benchmark.py --log logs/benchmarks/decompress_cobs/{wildcards.batch}.txt \\
Expand All @@ -355,7 +358,11 @@ rule run_cobs:
max_ram_mb=lambda wildcards, input: get_uncompressed_batch_size_in_MB(
wildcards, input, ignore_RAM, streaming
),
threads: lambda wildcards, input: get_number_of_COBS_threads(wildcards, input, predefined_cobs_threads, streaming),
threads:
# ...
lambda wildcards, input: get_number_of_COBS_threads(
wildcards, input, predefined_cobs_threads, streaming
)
params:
kmer_thres=config["cobs_kmer_thres"],
load_complete="--load-complete" if load_complete else "",
Expand Down Expand Up @@ -392,7 +399,11 @@ rule decompress_and_run_cobs:
max_ram_mb=lambda wildcards, input: get_uncompressed_batch_size_in_MB(
wildcards, input, ignore_RAM, streaming
),
threads: lambda wildcards, input: get_number_of_COBS_threads(wildcards, input, predefined_cobs_threads, streaming),
threads:
# ...
lambda wildcards, input: get_number_of_COBS_threads(
wildcards, input, predefined_cobs_threads, streaming
)
params:
kmer_thres=config["cobs_kmer_thres"],
decompression_dir=decompression_dir,
Expand Down
12 changes: 8 additions & 4 deletions scripts/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,16 @@ def main():
main_process = subprocess.Popen(f'{benchmark_command} {args.command}', shell=True)
if is_benchmarking_pipeline:
RAM_tmp_log_file = Path(f"{log_file}.RAM.tmp")
RAM_benchmarking_process = subprocess.Popen([sys.executable, "scripts/get_RAM_usage.py", str(RAM_tmp_log_file),
str(main_process.pid)])
RAM_benchmarking_process = subprocess.Popen(
[sys.executable, "scripts/get_RAM_usage.py",
str(RAM_tmp_log_file),
str(main_process.pid)])
return_code = main_process.wait()
if return_code:
raise subprocess.CalledProcessError(return_code, main_process.args,
output=main_process.stdout, stderr=main_process.stderr)
raise subprocess.CalledProcessError(return_code,
main_process.args,
output=main_process.stdout,
stderr=main_process.stderr)

end_time = datetime.datetime.now()
elapsed_seconds = (end_time - start_time).total_seconds()
Expand Down
4 changes: 2 additions & 2 deletions scripts/filter_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,9 @@ def process_cobs_file(self, cobs_fn):
for i, (qname, batch, matches) in enumerate(cobs_iterator(cobs_fn)):
print(f"Processing batch {batch} query #{i} ({qname})", file=sys.stderr)
try:
_ = self._query_dict[qname]
_ = self._query_dict[qname]
except KeyError:
self._query_dict[qname] = SingleQuery(qname, self._keep_matches)
self._query_dict[qname] = SingleQuery(qname, self._keep_matches)
self._query_dict[qname].add_matches(batch, matches)

def print_tsv_summary(self):
Expand Down

0 comments on commit 6a46608

Please sign in to comment.