diff --git a/environment.yml b/environment.yml index ceabba5b..fc37f956 100644 --- a/environment.yml +++ b/environment.yml @@ -16,6 +16,7 @@ dependencies: - openmmtools - pandas >= 1.1 - perses + - plotly - pydantic - pymbar - python >= 3.6 diff --git a/fah_xchem/analysis/__init__.py b/fah_xchem/analysis/__init__.py index 6b7ea962..350ffb47 100644 --- a/fah_xchem/analysis/__init__.py +++ b/fah_xchem/analysis/__init__.py @@ -5,35 +5,44 @@ import multiprocessing import os from typing import List, Optional +import networkx as nx +import numpy as np from ..fah_utils import list_results from ..schema import ( AnalysisConfig, CompoundSeries, CompoundSeriesAnalysis, + CompoundMicrostate, FahConfig, GenAnalysis, PhaseAnalysis, + PointEstimate, ProjectPair, Transformation, TransformationAnalysis, WorkPair, FragalysisConfig, + RunStatus ) -from .diffnet import combine_free_energies +from .constants import KT_KCALMOL +from .diffnet import combine_free_energies, pIC50_to_DG from .exceptions import AnalysisError, DataValidationError from .extract_work import extract_work_pair -from .free_energy import compute_relative_free_energy +from .free_energy import compute_relative_free_energy, InsufficientDataError from .plots import generate_plots from .report import generate_report, gens_are_consistent from .structures import generate_representative_snapshots from .website import generate_website +EXP_DDG_IJ_ERR = 0.2 # TODO check this is correct + + def analyze_phase(server: FahConfig, run: int, project: int, config: AnalysisConfig): paths = list_results(config=server, run=run, project=project) - + if not paths: raise AnalysisError(f"No data found for project {project}, RUN {run}") @@ -62,14 +71,24 @@ def get_gen_analysis(gen: int, works: List[WorkPair]) -> GenAnalysis: # TODO: round raw work output? return GenAnalysis(gen=gen, works=filtered_works, free_energy=free_energy) + # Analyze gens, omitting incomplete gens + gens = list() + for gen, works in works_by_gen.items(): + try: + gens.append( get_gen_analysis(gen, works) ) + except InsufficientDataError as e: + # It's OK if we don't have sufficient data here + pass + return PhaseAnalysis( free_energy=free_energy, - gens=[get_gen_analysis(gen, works) for gen, works in works_by_gen.items()], + gens=gens, ) def analyze_transformation( transformation: Transformation, + compounds: CompoundSeries, projects: ProjectPair, server: FahConfig, config: AnalysisConfig, @@ -86,11 +105,37 @@ def analyze_transformation( complex_phase.free_energy.delta_f - solvent_phase.free_energy.delta_f ) + # get associated DDGs between compounds, if experimentally known + exp_ddg = calc_exp_ddg(transformation=transformation, compounds=compounds) + absolute_error = ( + abs(binding_free_energy - exp_ddg) if (exp_ddg.point is not None) else None + ) + # Check for consistency across GENS, if requested consistent_bool = None if filter_gen_consistency: consistent_bool = gens_are_consistent( - complex_phase=complex_phase, solvent_phase=solvent_phase, nsigma=3 + complex_phase=complex_phase, solvent_phase=solvent_phase, nsigma=1 + ) + + return TransformationAnalysis( + transformation=transformation, + reliable_transformation=consistent_bool, + binding_free_energy=binding_free_energy, + complex_phase=complex_phase, + solvent_phase=solvent_phase, + exp_ddg=exp_ddg, + absolute_error=absolute_error, + ) + + else: + + return TransformationAnalysis( + transformation=transformation, + binding_free_energy=binding_free_energy, + complex_phase=complex_phase, + solvent_phase=solvent_phase, + exp_ddg=exp_ddg, ) return TransformationAnalysis( @@ -101,9 +146,59 @@ def analyze_transformation( solvent_phase=solvent_phase, ) + +def calc_exp_ddg(transformation: TransformationAnalysis, compounds: CompoundSeries): + """ + Compute experimental free energy difference between two compounds, if available. + + NOTE: This method makes the approximation that each microstate has the same affinity as the parent compound. + + TODO: Instead, solve DiffNet without experimental data and use derived DDGs between compounds (not transformations). + + Parameters + ---------- + transformation : TransformationAnalysis + The transformation of interest + compounds : CompoundSeries + Data for the compound series. + + Returns + ------- + ddg : PointEstimate + Point estimate of free energy difference for this transformation, + or PointEstimate(None, None) if not available. + + """ + compounds_by_microstate = { + microstate.microstate_id: compound + for compound in compounds + for microstate in compound.microstates + } + + initial_experimental_data = compounds_by_microstate[ + transformation.initial_microstate.microstate_id + ].metadata.experimental_data + final_experimental_data = compounds_by_microstate[ + transformation.final_microstate.microstate_id + ].metadata.experimental_data + + if ("pIC50" in initial_experimental_data) and ("pIC50" in final_experimental_data): + initial_dg = PointEstimate( + point=pIC50_to_DG(initial_experimental_data["pIC50"]), stderr=EXP_DDG_IJ_ERR + ) + final_dg = PointEstimate( + point=pIC50_to_DG(final_experimental_data["pIC50"]), stderr=EXP_DDG_IJ_ERR + ) + error = final_dg - initial_dg + return error + else: + return PointEstimate(point=None, stderr=None) + + def analyze_transformation_or_warn( transformation: Transformation, **kwargs ) -> Optional[TransformationAnalysis]: + try: return analyze_transformation(transformation, **kwargs) except AnalysisError as exc: @@ -111,15 +206,31 @@ def analyze_transformation_or_warn( return None -def analyze_compound_series( +def analyze_compound_series( series: CompoundSeries, config: AnalysisConfig, server: FahConfig, num_procs: Optional[int] = None, ) -> CompoundSeriesAnalysis: + """ + Analyze a compound series to generate JSON. + """ from rich.progress import track + # TODO: Cache results and only update RUNs for which we have received new data + + # Pre-filter based on which transformations have any work data + logging.info(f'Pre-filtering {len(series.transformations)} transformations to identify those with work data...') + available_transformations = [ + transformation for transformation in series.transformations + if len(list_results(config=server, run=transformation.run_id, project=series.metadata.fah_projects.complex_phase)) > 0 + and len(list_results(config=server, run=transformation.run_id, project=series.metadata.fah_projects.solvent_phase)) > 0 + ] + #available_transformations = series.transformations[:50] + + # Process compound series in parallel + logging.info(f'Processing {len(available_transformations)} / {len(series.transformations)} available transformations in parallel...') with multiprocessing.Pool(num_procs) as pool: results_iter = pool.imap_unordered( partial( @@ -127,19 +238,50 @@ def analyze_compound_series( projects=series.metadata.fah_projects, server=server, config=config, + compounds=series.compounds, ), - series.transformations, + available_transformations, ) transformations = [ result for result in track( results_iter, - total=len(series.transformations), + total=len(available_transformations), description="Computing transformation free energies", ) if result is not None ] + # Reprocess transformation experimental errors to only include most favorable transformation + # NOTE: This is a hack, and should be replaced by a more robust method for accounting for racemic mixtures + # Compile list of all microstate transformations for each compound + compound_ddgs = dict() + for transformation in transformations: + compound_id = transformation.transformation.final_microstate.compound_id + if compound_id in compound_ddgs: + compound_ddgs[compound_id].append(transformation.binding_free_energy.point) + else: + compound_ddgs[compound_id] = [transformation.binding_free_energy.point] + # Collapse to a single estimate + from scipy.special import logsumexp + for compound_id, ddgs in compound_ddgs.items(): + compound_ddgs[compound_id] = -logsumexp(-np.array(ddgs)) + np.log(len(ddgs)) + # Regenerate list of transformations + for index, t in enumerate(transformations): + if (t.exp_ddg is None) or (t.exp_ddg.point is None): + continue + compound_id = t.transformation.final_microstate.compound_id + absolute_error_point = abs(t.exp_ddg.point - compound_ddgs[compound_id]) + transformations[index] = TransformationAnalysis( + transformation=t.transformation, + reliable_transformation=t.reliable_transformation, + binding_free_energy=t.binding_free_energy, + complex_phase=t.complex_phase, + solvent_phase=t.solvent_phase, + exp_ddg=t.exp_ddg, + absolute_error=PointEstimate(point=absolute_error_point, stderr=t.absolute_error.stderr), + ) + # Sort transformations by RUN # transformations.sort(key=lambda transformation_analysis : transformation_analysis.transformation.run_id) # Sort transformations by free energy difference @@ -190,10 +332,17 @@ def generate_artifacts( data_dir, f"PROJ{series.metadata.fah_projects.complex_phase}" ) + # Pre-filter based on which transformations have any data + available_transformations = [ + transformation for transformation in series.transformations + if transformation.binding_free_energy is not None + and transformation.binding_free_energy.point is not None + ] + if snapshots: logging.info("Generating representative snapshots") generate_representative_snapshots( - transformations=series.transformations, + transformations=available_transformations, project_dir=complex_project_dir, project_data_dir=complex_data_dir, output_dir=os.path.join(output_dir, "transformations"), diff --git a/fah_xchem/analysis/free_energy.py b/fah_xchem/analysis/free_energy.py index 06b6e7c4..183d4d1c 100644 --- a/fah_xchem/analysis/free_energy.py +++ b/fah_xchem/analysis/free_energy.py @@ -172,7 +172,7 @@ def compute_relative_free_energy( # TODO: Flag problematic RUN/CLONE/GEN trajectories for further analysis and debugging works = _filter_work_values(all_works) - + if len(works) < (min_num_work_values or 1): raise InsufficientDataError( f"Need at least {min_num_work_values} good work values for analysis, " diff --git a/fah_xchem/analysis/plots.py b/fah_xchem/analysis/plots.py index 1001b8e4..c079d34b 100644 --- a/fah_xchem/analysis/plots.py +++ b/fah_xchem/analysis/plots.py @@ -9,6 +9,7 @@ from matplotlib.font_manager import FontProperties import multiprocessing import numpy as np +import networkx as nx import pandas as pd from pymbar import BAR from typing import Generator, Iterable, List, Optional @@ -19,6 +20,37 @@ TransformationAnalysis, ) from .constants import KT_KCALMOL +from arsenic import plotting + + +def plot_retrospective( + transformations: List[TransformationAnalysis], + output_dir: str, + filename: str = "retrospective", +): + + graph = nx.DiGraph() + + # TODO this loop can be sped up + for analysis in transformations: + transformation = analysis.transformation + + # Only interested if the compounds have an experimental DDG + if analysis.binding_free_energy is None or analysis.exp_ddg.point is None: + continue + + graph.add_edge( + transformation.initial_microstate, + transformation.final_microstate, + exp_DDG=analysis.exp_ddg.point * KT_KCALMOL, + exp_dDDG=analysis.exp_ddg.stderr * KT_KCALMOL, + calc_DDG=analysis.binding_free_energy.point * KT_KCALMOL, + calc_dDDG=analysis.binding_free_energy.stderr * KT_KCALMOL, + ) + + filename_png = filename + ".png" + + plotting.plot_DDGs(graph, filename=os.path.join(output_dir, filename_png)) def plot_work_distributions( @@ -721,6 +753,8 @@ def generate_plots( """ from rich.progress import track + # TODO: Cache results and only update RUNs for which we have received new data + binding_delta_fs = [ transformation.binding_free_energy.point for transformation in series.transformations @@ -763,3 +797,28 @@ def generate_plots( description="Generating plots", ): pass + + # + # Retrospective plots + # + + # NOTE this is handled by Arsenic + # this needs to be plotted last as the figure isn't cleared by default in Arsenic + # TODO generate time stamp + + # All transformations + plot_retrospective(output_dir=output_dir, transformations=series.transformations, filename='retrospective-transformations-all') + + # Reliable subset of transformations + plot_retrospective(output_dir=output_dir, transformations=[transformation for transformation in series.transformations if transformation.reliable_transformation], filename='retrospective-transformations-reliable') + + # Transformations not involving racemates + # TODO: Find a simpler way to filter non-racemates + nmicrostates = { compound.metadata.compound_id : len(compound.microstates) for compound in series.compounds } + def is_racemate(microstate): + return True if (nmicrostates[microstate.compound_id] > 1) else False + plot_retrospective( + output_dir=output_dir, + transformations=[transformation for transformation in series.transformations if (not is_racemate(transformation.transformation.initial_microstate) and not is_racemate(transformation.transformation.final_microstate))], + filename='retrospective-transformations-noracemates' + ) diff --git a/fah_xchem/analysis/report.py b/fah_xchem/analysis/report.py index 95498094..be69d0a1 100755 --- a/fah_xchem/analysis/report.py +++ b/fah_xchem/analysis/report.py @@ -603,6 +603,27 @@ def generate_report( from openeye import oechem +import os +from contextlib import contextmanager +@contextmanager +def working_directory(path): + """ + A context manager which changes the working directory to the given + path, and then changes it back to its previous value on exit. + Usage: + > # Do something in original directory + > with working_directory('/my/new/path'): + > # Do something in new directory + > # Back to old directory + """ + + prev_cwd = os.getcwd() + os.chdir(path) + try: + yield + finally: + os.chdir(prev_cwd) + def consolidate_protein_snapshots_into_pdb( oemols: List[oechem.OEMol], @@ -658,29 +679,26 @@ def consolidate_protein_snapshots_into_pdb( # produce multiple PDB files and zip for fragalysis upload if fragalysis_input: - base_pdb_filename = os.path.basename(pdb_filename).split(".")[0] - n_proteins = 1 - n_atoms = proteins[0].topology.n_atoms - n_dim = 3 - for index, protein in enumerate(proteins): - xyz = np.zeros([n_proteins, n_atoms, n_dim], np.float32) - xyz[0, :, :] = protein.xyz[0, :, :] - trajectory = md.Trajectory(xyz, protein.topology) - trajectory.save( - os.path.join(fragalysis_path, f"{base_pdb_filename}_{index}.pdb") - ) - - from zipfile import ZipFile - - with ZipFile(os.path.join(fragalysis_path, "references.zip"), "w") as zipobj: - for _, _, filenames in os.walk(fragalysis_path): - for pdb_file in track( - filenames, description="Zipping protein snapshots for Fragalysis..." - ): - if pdb_file.endswith(".pdb"): - zipobj.write(os.path.join(fragalysis_path, pdb_file)) - - zipobj.close() + with working_directory(fragalysis_path): + base_pdb_filename = os.path.basename(pdb_filename).split(".")[0] + n_proteins = 1 + n_atoms = proteins[0].topology.n_atoms + n_dim = 3 + for index, protein in enumerate(proteins): + xyz = np.zeros([n_proteins, n_atoms, n_dim], np.float32) + xyz[0, :, :] = protein.xyz[0, :, :] + trajectory = md.Trajectory(xyz, protein.topology) + trajectory.save(f"{base_pdb_filename}_{index}.pdb") + + from zipfile import ZipFile, ZIP_BZIP2 + with ZipFile(os.path.join(fragalysis_path, "references.zip"), mode="w", compression=ZIP_BZIP2, compresslevel=9) as zipobj: + from glob import glob + pdb_files = glob('*.pdb') + for pdb_file in track(pdb_files, description="Zipping protein snapshots for Fragalysis..."): + zipobj.write(pdb_file) + + # TODO: Is this necessary? + zipobj.close() # produce one PDB file with multiple frames else: diff --git a/fah_xchem/analysis/structures.py b/fah_xchem/analysis/structures.py index 232ac2b4..cb75cca6 100644 --- a/fah_xchem/analysis/structures.py +++ b/fah_xchem/analysis/structures.py @@ -180,8 +180,12 @@ def extract_snapshot( fragment = load_fragment(fragment_id) # Align the trajectory to the fragment (in place) - trajectory.image_molecules(inplace=True) - trajectory.superpose(fragment, atom_indices=fragment.top.select("name CA")) + #trajectory.image_molecules(inplace=True) # No need to image molecules anymore now that perses adds zero-energy bonds between protein and ligand! + #trajectory.superpose(fragment, atom_indices=fragment.top.select("name CA")) + + # TODO: fix this hardcode for *MPro*! + trajectory.superpose(fragment, + atom_indices=fragment.top.select("(name CA) and (residue 145 or residue 41 or residue 164 or residue 165 or residue 142 or residue 163)")) # DEBUG : Mpro active site only # Extract the snapshot snapshot = trajectory[frame] @@ -194,7 +198,7 @@ def extract_snapshot( components = dict() for name in ["protein", "old_ligand", "new_ligand"]: components[name] = mdtraj_to_oemol(sliced_snapshot[name]) - + return sliced_snapshot, components @@ -217,8 +221,9 @@ def get_stored_atom_indices(project_dir: str, run: int): } # Get all atom indices from the hybrid system - protein_atom_indices = htf.hybrid_topology.select("protein") - hybrid_ligand_atom_indices = htf.hybrid_topology.select("resn MOL") + # Omit hydrogens + protein_atom_indices = htf.hybrid_topology.select("protein and (mass > 1.1)") + hybrid_ligand_atom_indices = htf.hybrid_topology.select("resn MOL and (mass > 1.1)") # Identify atom index subsets for the old and new ligands from the hybrid system old_ligand_atom_indices = [ @@ -353,6 +358,8 @@ def generate_representative_snapshot( run_id = transformation.transformation.run_id os.makedirs(os.path.join(output_dir, f"RUN{run_id}"), exist_ok=True) + # TODO: Cache results and only update RUNs for which we have received new data + if ( max_binding_free_energy is not None and transformation.binding_free_energy.point > max_binding_free_energy @@ -385,14 +392,16 @@ def generate_representative_snapshot( frame = 1 # TODO: Magic numbers + gen_analysis, workpair = gen_work + # Extract representative snapshot try: sliced_snapshots, components = extract_snapshot( project_dir=project_dir, project_data_dir=project_data_dir, run=run_id, - clone=gen_work[1].clone, - gen=gen_work[0].gen, + clone=workpair.clone, + gen=gen_analysis.gen, frame=frame, fragment_id=transformation.transformation.xchem_fragment_id, cache_dir=cache_dir, @@ -420,6 +429,7 @@ def generate_representative_snapshot( ) as ofs: oechem.OEWriteMolecule(ofs, components[name]) except Exception as e: + print(f'\nException occurred extracting snapshot from {project_dir} data {project_data_dir} run {run_id} clone {gen_work[1].clone} gen {gen_work[0].gen}') print(e) diff --git a/fah_xchem/analysis/website/__init__.py b/fah_xchem/analysis/website/__init__.py index 28a188f2..ae69476c 100644 --- a/fah_xchem/analysis/website/__init__.py +++ b/fah_xchem/analysis/website/__init__.py @@ -5,6 +5,7 @@ import os import re from typing import Any, NamedTuple, Optional +import numpy as np import jinja2 import requests @@ -60,7 +61,7 @@ def postera_url(compound_or_microstate_id: str) -> Optional[str]: import re match = re.match( - "^(?P[A-Z]{3}-[A-Z]{3}-[0-9a-f]{8}-[0-9]+)(_(?P[0-9]+))?$", + "^(?P[A-Z_]{3}-[A-Z_]{3}-[0-9a-f]{8}-[0-9]+)(_(?P[0-9]+))?([_0-9]*)$", compound_or_microstate_id, ) @@ -201,7 +202,7 @@ def generate_website( environment.filters["experimental_data_url"] = experimental_data_url environment.filters["smiles_to_filename"] = get_image_filename - for subdir in ["compounds", "microstates", "transformations", "reliable_transformations"]: + for subdir in ["compounds", "microstates", "transformations", "reliable_transformations", "retrospective_transformations"]: os.makedirs(os.path.join(path, subdir), exist_ok=True) def write_html( @@ -237,12 +238,14 @@ def write_html( num_top_compounds=num_top_compounds, ) - compound_free_energies = [ - (compound.free_energy.point, compound) - for compound in series.compounds - if compound.free_energy - ] - compounds_sorted = [p[1] for p in sorted(compound_free_energies)] + compounds_sorted = sorted( + [ + compound + for compound in series.compounds + if compound.free_energy + ], + key = lambda m : m.free_energy.point + ) _generate_paginated_index( write_html=lambda items, **kwargs: write_html( @@ -272,15 +275,16 @@ def write_html( ], ) - microstate_free_energies = [ - (microstate.free_energy.point, microstate) - for compound in series.compounds - for microstate in compound.microstates - if microstate.free_energy - ] - - microstates_sorted = [p[1] for p in sorted(microstate_free_energies)] - + microstates_sorted = sorted( + [ + microstate + for compound in series.compounds + for microstate in compound.microstates + if microstate.free_energy + ], + key = lambda m : m.free_energy.point + ) + _generate_paginated_index( write_html=lambda items, **kwargs: write_html( microstates=items, total_microstates=len(microstates_sorted), **kwargs @@ -306,3 +310,14 @@ def write_html( items_per_page=items_per_page, description="Generating html for reliable transformations index", ) + + _generate_paginated_index( + write_html=lambda items, **kwargs: write_html(transformations=items, **kwargs), + url_prefix="retrospective_transformations", + items=sorted( + [transformation for transformation in series.transformations if (transformation.absolute_error is not None)], + key = lambda transformation : -transformation.absolute_error.point + ), + items_per_page=items_per_page, + description="Generating html for retrospective transformations index", + ) diff --git a/fah_xchem/analysis/website/templates/base.html b/fah_xchem/analysis/website/templates/base.html index fc17286b..b6eb6cb2 100644 --- a/fah_xchem/analysis/website/templates/base.html +++ b/fah_xchem/analysis/website/templates/base.html @@ -62,7 +62,8 @@ ("compounds/index.html", "compounds", "Compounds"), ("microstates/index.html", "microstates", "Microstates"), ("transformations/index.html", "transformations", "Transformations"), - ("reliable_transformations/index.html", "reliable_transformations", "Reliable transformations") + ("reliable_transformations/index.html", "reliable_transformations", "Reliable transformations"), + ("retrospective_transformations/index.html", "retrospective_transformations", "Retrospective transformations") ] -%} {% set active_page = active_page | default("index") -%} diff --git a/fah_xchem/analysis/website/templates/retrospective_transformations/index.html b/fah_xchem/analysis/website/templates/retrospective_transformations/index.html new file mode 100644 index 00000000..30ae7217 --- /dev/null +++ b/fah_xchem/analysis/website/templates/retrospective_transformations/index.html @@ -0,0 +1,101 @@ +{% extends "base.html" %} +{% import "postera.html" as postera %} +{% set active_page = "retrospective_transformations" %} +{% block content %} +

Retrospective Transformations

+ + retrospective transformations plot - all transformations + + + retrospective transformations plot - reliable transformations + + + retrospective transformations plot - no racemates + +
+Showing {{ start_index }} through {{ end_index }} of {{ series.transformations | length }} + +{% if prev_page %}{% endif %} +{% if next_page %}{% endif %} + +
+ + + + + + + + + + + + {% for transformation in (transformations | selectattr("absolute_error", "ne", None)) %} + {% if transformation.absolute_error.point is not none %} + + + + + + + + + + + + + + + + + {% endif %} + {% endfor %} +
RUN Initial microstate Final microstate ΔΔG / kcal M-1 ΔΔGexp / kcal M-1 |ΔΔG-ΔΔGexp| / kcal M-1 Work distribution Convergence
RUN{{ transformation.transformation.run_id }}{{ transformation.transformation.initial_microstate.microstate_id | format_compound_id }}{{ postera.maybe_link(transformation.transformation.initial_microstate.compound_id) }} + + molecule + + + + + + + + + + {{ transformation.transformation.final_microstate.microstate_id | format_compound_id }}{{ postera.maybe_link(transformation.transformation.final_microstate.compound_id) }} + + molecule + + + + + + + + + + + + {{ (transformation.binding_free_energy * KT_KCALMOL) | format_point }} + ± {{ (transformation.binding_free_energy * KT_KCALMOL) | format_stderr }} + + + + {{ (transformation.exp_ddg * KT_KCALMOL) | format_point }} + ± {{ (transformation.exp_ddg * KT_KCALMOL) | format_stderr }} + + + + {{ (transformation.absolute_error * KT_KCALMOL) | format_point }} + ± {{ (transformation.absolute_error * KT_KCALMOL) | format_stderr }} + + + + work distributions + + + + convergence plot + +
+{% endblock %} diff --git a/fah_xchem/app.py b/fah_xchem/app.py index fbf1b952..254afd3d 100644 --- a/fah_xchem/app.py +++ b/fah_xchem/app.py @@ -91,6 +91,7 @@ def run_analysis( transformations=compound_series.transformations[:max_transformations] ) + # TODO: Remove this? if use_only_reference_compound_data: # Strip experimental data frorm all but reference compound logging.warning(f'Stripping experimental data from all but reference compound') @@ -125,7 +126,7 @@ def run_analysis( os.makedirs(output_dir, exist_ok=True) with open(os.path.join(output_dir, "analysis.json"), "w") as output_file: - output_file.write(output.json()) + output_file.write(output.json(indent=3)) def generate_artifacts( diff --git a/fah_xchem/schema.py b/fah_xchem/schema.py index c7ed6563..485a2c35 100644 --- a/fah_xchem/schema.py +++ b/fah_xchem/schema.py @@ -1,5 +1,5 @@ import datetime as dt -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Union from pydantic import BaseModel, Field @@ -11,8 +11,8 @@ class Config: class PointEstimate(Model): - point: float - stderr: float + point: Union[None, float] + stderr: Union[None, float] def __add__(self, other: "PointEstimate") -> "PointEstimate": from math import sqrt @@ -22,6 +22,9 @@ def __add__(self, other: "PointEstimate") -> "PointEstimate": stderr=sqrt(self.stderr ** 2 + other.stderr ** 2), ) + def __abs__(self) -> "PointEstimate": + return PointEstimate(point=abs(self.point), stderr=self.stderr) + def __neg__(self) -> "PointEstimate": return PointEstimate(point=-self.point, stderr=self.stderr) @@ -34,7 +37,10 @@ def __mul__(self, c: float) -> "PointEstimate": def precision_decimals(self) -> Optional[int]: from math import floor, isfinite, log10 - return -floor(log10(self.stderr)) if isfinite(self.stderr) else None + if self.point is None: + return None + else: + return -floor(log10(self.stderr)) if isfinite(self.stderr) else None class ProjectPair(Model): @@ -168,6 +174,8 @@ class TransformationAnalysis(Model): transformation: Transformation reliable_transformation: bool = Field(None, description="Specify if the transformation is reliable or not") # JSON boolean binding_free_energy: PointEstimate + exp_ddg: PointEstimate # TODO: Make optional, with None as default? + absolute_error: Optional[PointEstimate] = None complex_phase: PhaseAnalysis solvent_phase: PhaseAnalysis @@ -214,3 +222,19 @@ class FragalysisConfig(Model): method: str = None upload_key: str = None new_upload: bool = Field(False) + + +class RunStatus(Model): + run_id: int = Field( + None, + description="The RUN number corresponding to the Folding@Home directory structure", + ) + complex_phase_work_units: int = Field( + 0, + description="The number of completed complex phase work units", + ) + solvent_phase_work_units: int = Field( + 0, + description="The number of completed solvent phase work units", + ) + has_changed: bool = True