Skip to content

Commit

Permalink
Added LMI log at bin level to ml.py
Browse files Browse the repository at this point in the history
The log that was stored in MetalociObject.lmi_info can be now
exported to tsv with the -l --log option. This will work even if the
LMI for the regions have already been computed but the -l option
was not enabled.
  • Loading branch information
Leo Zuber committed Jul 14, 2023
1 parent a3ac963 commit e84d8af
Showing 1 changed file with 58 additions and 4 deletions.
62 changes: 58 additions & 4 deletions metaloci/tools/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from argparse import HelpFormatter
from datetime import timedelta
from time import time
import pathlib

import pandas as pd

Expand Down Expand Up @@ -78,6 +79,14 @@ def populate_args(parser):
help="P-value significance threshold (default: %(default)4.2f).",
)

optional_arg.add_argument(
"-l",
"--log",
dest="log",
action="store_true",
help="Flag to unpickle LMI log.",
)

optional_arg.add_argument(
"-m",
"--mp",
Expand All @@ -87,9 +96,14 @@ def populate_args(parser):
)

optional_arg.add_argument(
"-t", "--threads", dest="threads", type=int, action="store", help="Number of threads to use in multiprocessing."
"-t", "--threads", dest="threads", default=int(mp.cpu_count() / 3), type=int, action="store", help="Number"
" of threads to use in multiprocessing. Recommended value is one third of your total cpu count, although"
" increasing this number may improve performance in machines with few cores. (default: %(default)s)"
)

optional_arg.add_argument("-h", "--help", action="help", help="Show this help message and exit.")


def get_lmi(region_iter, opts, signal_data, progress=None, i=None, silent: bool = True):

INFLUENCE = 1.5
Expand All @@ -100,6 +114,7 @@ def get_lmi(region_iter, opts, signal_data, progress=None, i=None, silent: bool
regions = opts.region_file
n_permutations = opts.perms
signipval = opts.signipval
moran_log = opts.log
force = opts.force

if not work_dir.endswith("/"):
Expand Down Expand Up @@ -152,23 +167,40 @@ def get_lmi(region_iter, opts, signal_data, progress=None, i=None, silent: bool
# This checks if every signal you want to process is already computed. If the user works with a few signals
# but decides to add some more later, she can use the same working directory and KK, and the LMI will be
# computed again with the new signals. If everything is already computed, does nothing.
if mlobject.lmi_info is not None:
# TODO compute only the missing signals instead of all the signals again.
if mlobject.lmi_info is not None and force is False:

if [k for k, _ in mlobject.lmi_info.items()] == signal_types:
if [signal for signal, _ in mlobject.lmi_info.items()] == signal_types:

if silent == False:

print("\tLMI already computed for this region. \n\tSkipping to next region...")

if progress is not None: progress["done"] = True

if moran_log:

for signal, df in mlobject.lmi_info.items():

moran_log_path = f"{work_dir}{mlobject.chrom}/moran_log/{signal}"
pathlib.Path(moran_log_path).mkdir(parents=True, exist_ok=True)

df.to_csv(f"{moran_log_path}/{mlobject.region}_{mlobject.poi}_{signal}.txt", sep="\t", index=False)

if silent == False:

print(f"\tLog saved to: {moran_log_path}/{mlobject.region}_{mlobject.poi}_{signal}.txt")

return

# Get average distance between consecutive points to define influence,
# which should be ~2 particles of radius.
mean_distance = mlobject.kk_distances.diagonal(1).mean()
buffer = mean_distance * INFLUENCE
mlobject.lmi_geometry = lmi.construct_voronoi(mlobject, buffer)

if mlobject.lmi_geometry is None:

mlobject.lmi_geometry = lmi.construct_voronoi(mlobject, buffer)

if silent == False:

Expand All @@ -192,6 +224,19 @@ def get_lmi(region_iter, opts, signal_data, progress=None, i=None, silent: bool

mlobject.save(hamlo_namendle)

if moran_log:

for signal, df in mlobject.lmi_info.items():

moran_log_path = f"{work_dir}{mlobject.chrom}/moran_log/{signal}"
pathlib.Path(moran_log_path).mkdir(parents=True, exist_ok=True)

df.to_csv(f"{moran_log_path}/{mlobject.region}_{mlobject.poi}_{signal}.txt", sep="\t", index=False)

if silent == False:

print(f"\tLog saved to: {moran_log_path}/{mlobject.region}_{mlobject.poi}_{signal}.txt")

if progress is not None:

progress['value'] += 1
Expand All @@ -210,6 +255,11 @@ def run(opts):
regions = opts.region_file
multiprocess = opts.multiprocess
cores = opts.threads
moran_log = opts.log

if opts.threads is None:

cores = int(mp.cpu_count() / 3)

if not work_dir.endswith("/"):

Expand Down Expand Up @@ -247,6 +297,10 @@ def run(opts):
if progress["done"] == True:

print("\tSome regions had already been computed and have been skipped.", end="")

if moran_log:

print(f"\n\tLog saved to: {work_dir}chrN/moran_log", end="")

pool.close()
pool.join()
Expand Down

0 comments on commit e84d8af

Please sign in to comment.