Skip to content

Commit

Permalink
Added progress to layout.py and ml.py.
Browse files Browse the repository at this point in the history
Created shared variable to keep track of time and iterations,
in order to be able to show progress, time spent and estimated
time remaining.
Added an error handling where figure.py whould crash when trying
to calculate a regression when all values where identical (more likely
to happen at the beggining of the chromosome).
Added a check to recalculate the LMI when the user adds another signal
succeding the compute of KK and LMI with less signals. This way, the
user does not need to create a new working directory and compute
everything again.
  • Loading branch information
Leo Zuber committed Jul 13, 2023
1 parent b670e49 commit 9611d57
Show file tree
Hide file tree
Showing 8 changed files with 834 additions and 75 deletions.
674 changes: 674 additions & 0 deletions LICENSE

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion metaloci/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.24
0.2.3
42 changes: 25 additions & 17 deletions metaloci/plot/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,29 +334,37 @@ def get_lmi_scatterplot(
x = zscore(lmi_geometry.signal)
y = zscore(y_lag)

_, _, r_value_scat, p_value_scat, _ = linregress(x, y)
scatter, ax = plt.subplots(figsize=(5, 5)) # , subplot_kw={'aspect':'equal'})
try:

alpha_sp = [1.0 if val < signipval else 0.1 for val in lmi_geometry.LMI_pvalue]
colors_sp = [colors_lmi[val] for val in lmi_geometry.moran_quadrant]
_, _, r_value_scat, p_value_scat, _ = linregress(x, y)
scatter, ax = plt.subplots(figsize=(5, 5)) # , subplot_kw={'aspect':'equal'})

plt.scatter(x=x, y=y, s=100, ec="white", fc=colors_sp, alpha=alpha_sp)
alpha_sp = [1.0 if val < signipval else 0.1 for val in lmi_geometry.LMI_pvalue]
colors_sp = [colors_lmi[val] for val in lmi_geometry.moran_quadrant]

sns.scatterplot(
x=[x[mlobject.poi]], y=[y[mlobject.poi]], s=150, ec="lime", fc="none", zorder=len(lmi_geometry)
)
sns.regplot(x=x, y=y, scatter=False, color="k")
sns.despine(top=True, right=True, left=False, bottom=False, offset=10, trim=False)
plt.scatter(x=x, y=y, s=100, ec="white", fc=colors_sp, alpha=alpha_sp)

sns.scatterplot(
x=[x[mlobject.poi]], y=[y[mlobject.poi]], s=150, ec="lime", fc="none", zorder=len(lmi_geometry)
)
sns.regplot(x=x, y=y, scatter=False, color="k")
sns.despine(top=True, right=True, left=False, bottom=False, offset=10, trim=False)

plt.title(f"Moran Local Scatterplot\nr: {r_value_scat:4.2f} p-value: {p_value_scat:.1e}")
plt.axvline(x=0, color="k", linestyle=":")
plt.axhline(y=0, color="k", linestyle=":")

ax.set_xlabel("Z-score(Signal)")
ax.set_ylabel("Z-score(Signal Spatial Lag)")

plt.title(f"Moran Local Scatterplot\nr: {r_value_scat:4.2f} p-value: {p_value_scat:.1e}")
plt.axvline(x=0, color="k", linestyle=":")
plt.axhline(y=0, color="k", linestyle=":")
r_value_scat = float(r_value_scat)
p_value_scat = float(r_value_scat)

except ValueError:

ax.set_xlabel("Z-score(Signal)")
ax.set_ylabel("Z-score(Signal Spatial Lag)")
print("\t\tCannot compute lineal regression as all values are identical.")

r_value_scat = float(r_value_scat)
p_value_scat = float(r_value_scat)
return None, None, None

return scatter, r_value_scat, p_value_scat

Expand Down
41 changes: 32 additions & 9 deletions metaloci/tools/figure.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,15 @@ def run(opts):

except FileNotFoundError:

print("\n\t.mlo file not found for this region. \n\tSkipping to next region.")
print("\n\t.mlo file not found for this region. \n\tSkipping to the next one.")

continue

if mlobject.lmi_info == None:

print("\n\tLMI not calculated for this region. \n\tSkipping to the next one...")
continue

buffer = mlobject.kk_distances.diagonal(1).mean() * INFLUENCE

bins = []
Expand Down Expand Up @@ -246,14 +251,7 @@ def run(opts):
plt.close()
print("\t\tGaudi Type plot -> done.")

print("\t\tLMI Scatter plot", end="\r")
lmi_plt, r_value, p_value = plot.get_lmi_scatterplot(
mlobject, merged_lmi_geometry, buffer * BFACT, signipval, colors
)
lmi_plt.savefig(f"{plot_filename}_lmi.pdf", **plot_opt)
lmi_plt.savefig(f"{plot_filename}_lmi.png", **plot_opt)
plt.close()
print("\t\tLMI Scatter plot -> done.")


print("\t\tSignal plot", end="\r")
bed_data, selmetaloci = plot.signal_bed(
Expand All @@ -271,6 +269,31 @@ def run(opts):
sig_plt.savefig(f"{plot_filename}_signal.png", **plot_opt)
plt.close()
print("\t\tSignal plot -> done.")

print("\t\tLMI Scatter plot", end="\r")
lmi_plt, r_value, p_value = plot.get_lmi_scatterplot(
mlobject, merged_lmi_geometry, buffer * BFACT, signipval, colors
)

if lmi_plt is not None:

lmi_plt.savefig(f"{plot_filename}_lmi.pdf", **plot_opt)
lmi_plt.savefig(f"{plot_filename}_lmi.png", **plot_opt)
plt.close()

print("\t\tLMI Scatter plot -> done.")

else:

if signal == signals[-1]:

print("\t\tSkipping to next region...")
continue

else:

print("\t\tSkipping to next signal...")
continue

print(f"\t\tFinal composite figure for region '{region}' and signal '{signal}'", end="\r")
img1 = Image.open(f"{plot_filename}_lmi.png")
Expand Down
54 changes: 44 additions & 10 deletions metaloci/tools/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from collections import defaultdict
from datetime import timedelta
from time import time
import subprocess as sp

import cooler
import hicstraw
Expand Down Expand Up @@ -116,9 +117,10 @@ def populate_args(parser):
help="Set a persistence length for the Kamada-Kawai layout.",
)

def get_region_layout(row, opts, silent: bool = True):
def get_region_layout(row, opts, progress=None, silent: bool = True):

work_dir = opts.work_dir
regions = opts.regions
hic_path = opts.hic_file
resolution = opts.reso
cutoffs = opts.cutoff
Expand All @@ -130,6 +132,14 @@ def get_region_layout(row, opts, silent: bool = True):

cutoffs = [0.2]

if os.path.isfile(regions):

df_regions = pd.read_table(regions)

else:

df_regions = pd.DataFrame({"coords": [regions], "symbol": ["symbol"], "id": ["id"]})

cutoffs.sort(key=float, reverse=True)

bad_regions = defaultdict(list) # does not belong here, this is to prevent an error TODO
Expand Down Expand Up @@ -161,6 +171,8 @@ def get_region_layout(row, opts, silent: bool = True):

print(f"\n------> Region {region_coords} already done (no data).")

if progress is not None: progress["done"] = True

return

else:
Expand All @@ -169,13 +181,15 @@ def get_region_layout(row, opts, silent: bool = True):

print(f"\n------> Region {region_coords} already done.")

if progress is not None: progress["done"] = True

if force:

if silent == False:

print(
"\tForce option (-f) selected, recalculating "
"the Kamada-Kawai layout (files will be overwritten)\n"
"the Kamada-Kawai layout (files will be overwritten)"
)

mlobject = mlo.MetalociObject(
Expand Down Expand Up @@ -213,9 +227,9 @@ def get_region_layout(row, opts, silent: bool = True):

return

for cutoff in cutoffs:
time_per_region = time()

time_per_kk = time()
for cutoff in cutoffs:

mlobject.kk_cutoff = cutoff

Expand Down Expand Up @@ -270,14 +284,14 @@ def get_region_layout(row, opts, silent: bool = True):

if silent == False:

print(f"\tdone in {timedelta(seconds=round(time() - time_per_kk))}.\n")
print(f"\tdone in {timedelta(seconds=round(time() - time_per_region))}.\n")

if len(cutoffs) == 1:

if silent == False:
print(
f"\tKamada-Kawai layout of region {mlobject.region} saved"
f" at {int(cutoff * 100)} % cutoff to file: {mlobject.save_path}"
f"\tKamada-Kawai layout of region {mlobject.region} "
f"at {int(cutoff * 100)} % cutoff saved to file: {mlobject.save_path}"
)

# Save mlobject.
Expand Down Expand Up @@ -318,6 +332,18 @@ def get_region_layout(row, opts, silent: bool = True):
# # if word not in file then write the word to the file
# handler.write(f"{region}\t{reason[0]}\n")
# handler.flush()
# print(int(timedelta(seconds=round(time() - time_per_region))))

if progress is not None:

progress['value'] += 1

time_spent = time() - progress['timer']
time_remaining = int(time_spent / progress['value'] * (len(df_regions) - progress['value']))

print(f"\033[A{' '*int(sp.Popen(['tput','cols'], stdout=sp.PIPE).communicate()[0].strip())}\033[A")
print(f"\t[{progress['value']}/{len(df_regions)}] | Time spent: {timedelta(seconds=round(time_spent))} | "
f"ETR: {timedelta(seconds=round(time_remaining))}", end='\r')


def run(opts):
Expand Down Expand Up @@ -359,13 +385,21 @@ def run(opts):

if multiprocess:

print(f"{len(df_regions)} regions will be computed.\n")
print(f"\n------> {len(df_regions)} regions will be computed.\n")

try:

manager = mp.Manager()
progress = manager.dict(value=0, timer = start_timer, done = False)

with mp.Pool(processes=cores) as pool:

pool.starmap(get_region_layout, [(row, opts) for _, row in df_regions.iterrows()])
pool.starmap(get_region_layout, [(row, opts, progress) for _, row in df_regions.iterrows()])

if progress["done"] == True:

print("\tSome regions had already been computed and have been skipped.", end="")

pool.close()
pool.join()

Expand Down Expand Up @@ -393,5 +427,5 @@ def run(opts):
# if not any(f"{row.region}\t{row.reason}" in line for line in handler)
# ]

print(f"\nTotal time spent: {timedelta(seconds=round(time() - start_timer))}.")
print(f"\n\nTotal time spent: {timedelta(seconds=round(time() - start_timer))}.")
print("\nall done.")
Loading

0 comments on commit 9611d57

Please sign in to comment.