From eac01b3a84a7f538860e21bbe5f2ad7a259118c7 Mon Sep 17 00:00:00 2001 From: glass-w Date: Tue, 8 Dec 2020 12:08:50 +0000 Subject: [PATCH 01/30] create basic outline for new tab --- .../templates/retrospective/index.html | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 fah_xchem/analysis/website/templates/retrospective/index.html diff --git a/fah_xchem/analysis/website/templates/retrospective/index.html b/fah_xchem/analysis/website/templates/retrospective/index.html new file mode 100644 index 00000000..a4e2d119 --- /dev/null +++ b/fah_xchem/analysis/website/templates/retrospective/index.html @@ -0,0 +1,76 @@ +{% extends "base.html" %} +{% import "postera.html" as postera %} +{% set active_page = "retrospective" %} +{% block content %} +

Retrospective

+
+Showing {{ start_index }} through {{ end_index }} of {{ series.transformations | length }} + +{% if prev_page %}{% endif %} +{% if next_page %}{% endif %} + +
+ + + + + + + + + + {% for transformation in transformations %} + + + + + + + + + + + + + + + {% endfor %} +
RUN Initial microstate Final microstate ΔΔG / kcal M-1 Work distribution Convergence
RUN{{ transformation.transformation.run_id }}{{ transformation.transformation.initial_microstate.microstate_id | format_compound_id }}{{ postera.maybe_link(transformation.transformation.initial_microstate.microstate_id) }} + + molecule + + + + + + + + + + {{ transformation.transformation.final_microstate.microstate_id | format_compound_id }}{{ postera.maybe_link(transformation.transformation.final_microstate.microstate_id) }} + + molecule + + + + + + + + + + + + {{ (transformation.binding_free_energy * KT_KCALMOL) | format_point }} + ± {{ (transformation.binding_free_energy * KT_KCALMOL) | format_stderr }} + + + + work distributions + + + + convergence plot + +
+{% endblock %} From eb150910cace542ea49286a6cbe49037d7159ea1 Mon Sep 17 00:00:00 2001 From: glass-w Date: Mon, 14 Dec 2020 14:23:01 +0000 Subject: [PATCH 02/30] create basic retrospective analysis tab --- fah_xchem/analysis/website/__init__.py | 10 ++- .../analysis/website/templates/base.html | 3 +- .../templates/retrospective/index.html | 76 ------------------- .../retrospective_analysis/index.html | 65 ++++++++++++++++ 4 files changed, 76 insertions(+), 78 deletions(-) delete mode 100644 fah_xchem/analysis/website/templates/retrospective/index.html create mode 100644 fah_xchem/analysis/website/templates/retrospective_analysis/index.html diff --git a/fah_xchem/analysis/website/__init__.py b/fah_xchem/analysis/website/__init__.py index 28a188f2..7fd82873 100644 --- a/fah_xchem/analysis/website/__init__.py +++ b/fah_xchem/analysis/website/__init__.py @@ -201,7 +201,7 @@ def generate_website( environment.filters["experimental_data_url"] = experimental_data_url environment.filters["smiles_to_filename"] = get_image_filename - for subdir in ["compounds", "microstates", "transformations", "reliable_transformations"]: + for subdir in ["compounds", "microstates", "transformations", "reliable_transformations", "retrospective_analysis"]: os.makedirs(os.path.join(path, subdir), exist_ok=True) def write_html( @@ -254,6 +254,14 @@ def write_html( description="Generating html for compounds index", ) + _generate_paginated_index( + write_html=lambda items, **kwargs: write_html(compounds=items, num_top_compounds=num_top_compounds, **kwargs), + url_prefix="retrospective_analysis", + items=compounds_sorted, + items_per_page=items_per_page, + description="Generating html for retrospective_analysis index", + ) + for compound in track( compounds_sorted[:num_top_compounds], description="Generating html for individual compound views", diff --git a/fah_xchem/analysis/website/templates/base.html b/fah_xchem/analysis/website/templates/base.html index fc17286b..265a8206 100644 --- a/fah_xchem/analysis/website/templates/base.html +++ b/fah_xchem/analysis/website/templates/base.html @@ -62,7 +62,8 @@ ("compounds/index.html", "compounds", "Compounds"), ("microstates/index.html", "microstates", "Microstates"), ("transformations/index.html", "transformations", "Transformations"), - ("reliable_transformations/index.html", "reliable_transformations", "Reliable transformations") + ("reliable_transformations/index.html", "reliable_transformations", "Reliable transformations"), + ("retrospective_analysis/index.html", "retrospective_analysis", "Retrospective analysis") ] -%} {% set active_page = active_page | default("index") -%} diff --git a/fah_xchem/analysis/website/templates/retrospective/index.html b/fah_xchem/analysis/website/templates/retrospective/index.html deleted file mode 100644 index a4e2d119..00000000 --- a/fah_xchem/analysis/website/templates/retrospective/index.html +++ /dev/null @@ -1,76 +0,0 @@ -{% extends "base.html" %} -{% import "postera.html" as postera %} -{% set active_page = "retrospective" %} -{% block content %} -

Retrospective

-
-Showing {{ start_index }} through {{ end_index }} of {{ series.transformations | length }} - -{% if prev_page %}{% endif %} -{% if next_page %}{% endif %} - -
- - - - - - - - - - {% for transformation in transformations %} - - - - - - - - - - - - - - - {% endfor %} -
RUN Initial microstate Final microstate ΔΔG / kcal M-1 Work distribution Convergence
RUN{{ transformation.transformation.run_id }}{{ transformation.transformation.initial_microstate.microstate_id | format_compound_id }}{{ postera.maybe_link(transformation.transformation.initial_microstate.microstate_id) }} - - molecule - - - - - - - - - - {{ transformation.transformation.final_microstate.microstate_id | format_compound_id }}{{ postera.maybe_link(transformation.transformation.final_microstate.microstate_id) }} - - molecule - - - - - - - - - - - - {{ (transformation.binding_free_energy * KT_KCALMOL) | format_point }} - ± {{ (transformation.binding_free_energy * KT_KCALMOL) | format_stderr }} - - - - work distributions - - - - convergence plot - -
-{% endblock %} diff --git a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html new file mode 100644 index 00000000..77f18707 --- /dev/null +++ b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html @@ -0,0 +1,65 @@ +{% extends "base.html" %} +{% import "postera.html" as postera %} +{% set active_page = "retrospective_analysis" %} +{% block content %} +

Retrospective Analysis

+
+Showing {{ start_index }} through {{ end_index }} of {{ series.compounds | length }} + +{% if prev_page %}{% endif %} +{% if next_page %}{% endif %} + +
+ + + + + + + + + {% for compound in compounds %} + {% set url = compound|experimental_data_url -%} + {% if url is not none %} + + + + + + + + + {% endif %} + {% endfor %} +
Rank Compound SMILES ΔG / kcal M-1 pIC50
{{ start_index + loop.index - 1 }} + {% if loop.index <= num_top_compounds %} + {{ compound.metadata.compound_id }} + {% else %} + {{ compound.metadata.compound_id }} + {% endif %} + {{ postera.maybe_link(compound.metadata.compound_id) }} + {{ postera.maybe_experimental_data(compound) }} + + + molecule + + {{ compound.metadata.smiles }} + {% if compound.free_energy %} + + {{ (compound.free_energy * KT_KCALMOL) | format_point }} + ± {{ (compound.free_energy * KT_KCALMOL) | format_stderr }} + + {% else %} + no data + {% endif %} + + {% if compound.free_energy %} + + {{ (-compound.free_energy * KT_PIC50) | format_point }} + ± {{ (compound.free_energy * KT_PIC50) | format_stderr }} + + {% else %} + no data + {% endif %} +
+{% endblock %} From 7be597ef4fe1a475c6186501c30d4b23f54c363f Mon Sep 17 00:00:00 2001 From: glass-w Date: Mon, 14 Dec 2020 16:31:54 +0000 Subject: [PATCH 03/30] change index skeleton for retrospective analysis --- fah_xchem/analysis/website/__init__.py | 16 ++-- .../retrospective_analysis/index.html | 89 +++++++++++-------- 2 files changed, 60 insertions(+), 45 deletions(-) diff --git a/fah_xchem/analysis/website/__init__.py b/fah_xchem/analysis/website/__init__.py index 7fd82873..0b29e96d 100644 --- a/fah_xchem/analysis/website/__init__.py +++ b/fah_xchem/analysis/website/__init__.py @@ -254,14 +254,6 @@ def write_html( description="Generating html for compounds index", ) - _generate_paginated_index( - write_html=lambda items, **kwargs: write_html(compounds=items, num_top_compounds=num_top_compounds, **kwargs), - url_prefix="retrospective_analysis", - items=compounds_sorted, - items_per_page=items_per_page, - description="Generating html for retrospective_analysis index", - ) - for compound in track( compounds_sorted[:num_top_compounds], description="Generating html for individual compound views", @@ -314,3 +306,11 @@ def write_html( items_per_page=items_per_page, description="Generating html for reliable transformations index", ) + + _generate_paginated_index( + write_html=lambda items, **kwargs: write_html(transformations=items, **kwargs), + url_prefix="retrospective_analysis", + items=series.transformations, + items_per_page=items_per_page, + description="Generating html for retrospective analysis index", + ) diff --git a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html index 77f18707..9fcd6742 100644 --- a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html +++ b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html @@ -3,8 +3,12 @@ {% set active_page = "retrospective_analysis" %} {% block content %}

Retrospective Analysis

+

Plot

+ + retrospective plot +
-Showing {{ start_index }} through {{ end_index }} of {{ series.compounds | length }} +Showing {{ start_index }} through {{ end_index }} of {{ series.transformations | length }} {% if prev_page %}{% endif %} {% if next_page %}{% endif %} @@ -12,54 +16,65 @@

Retrospective Analysis

- - - - - + + + + + + - {% for compound in compounds %} - {% set url = compound|experimental_data_url -%} - {% if url is not none %} + {% for transformation in transformations %} - - + + + + + - - + + + - {% endif %} {% endfor %}
Rank Compound SMILES ΔG / kcal M-1 pIC50 RUN Initial microstate Final microstate ΔΔG / kcal M-1 Work distribution Convergence
{{ start_index + loop.index - 1 }} - {% if loop.index <= num_top_compounds %} - {{ compound.metadata.compound_id }} - {% else %} - {{ compound.metadata.compound_id }} - {% endif %} - {{ postera.maybe_link(compound.metadata.compound_id) }} - {{ postera.maybe_experimental_data(compound) }} + RUN{{ transformation.transformation.run_id }}{{ transformation.transformation.initial_microstate.microstate_id | format_compound_id }}{{ postera.maybe_link(transformation.transformation.initial_microstate.microstate_id) }} + + molecule + + + + + + + + + {{ transformation.transformation.final_microstate.microstate_id | format_compound_id }}{{ postera.maybe_link(transformation.transformation.final_microstate.microstate_id) }} - - molecule + + molecule {{ compound.metadata.smiles }} - {% if compound.free_energy %} - - {{ (compound.free_energy * KT_KCALMOL) | format_point }} - ± {{ (compound.free_energy * KT_KCALMOL) | format_stderr }} - - {% else %} - no data - {% endif %} + + + + + + + + - {% if compound.free_energy %} - {{ (-compound.free_energy * KT_PIC50) | format_point }} - ± {{ (compound.free_energy * KT_PIC50) | format_stderr }} + {{ (transformation.binding_free_energy * KT_KCALMOL) | format_point }} + ± {{ (transformation.binding_free_energy * KT_KCALMOL) | format_stderr }} - {% else %} - no data - {% endif %} + + + work distributions + + + + convergence plot +
{% endblock %} From 34b305d474dd2956838402107e544ace472d71a0 Mon Sep 17 00:00:00 2001 From: glass-w Date: Tue, 15 Dec 2020 18:05:31 +0000 Subject: [PATCH 04/30] set new column skeleton and use compound ID only --- .../templates/retrospective_analysis/index.html | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html index 9fcd6742..400b5d6c 100644 --- a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html +++ b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html @@ -20,13 +20,14 @@

Plot

Initial microstate Final microstate ΔΔG / kcal M-1 + ΔΔGexp / kcal M-1 Work distribution Convergence {% for transformation in transformations %} RUN{{ transformation.transformation.run_id }} - {{ transformation.transformation.initial_microstate.microstate_id | format_compound_id }}{{ postera.maybe_link(transformation.transformation.initial_microstate.microstate_id) }} + {{ transformation.transformation.initial_microstate.compound_id | format_compound_id }}{{ postera.maybe_link(transformation.transformation.initial_microstate.compound_id) }} molecule @@ -42,7 +43,7 @@

Plot

- {{ transformation.transformation.final_microstate.microstate_id | format_compound_id }}{{ postera.maybe_link(transformation.transformation.final_microstate.microstate_id) }} + {{ transformation.transformation.final_microstate.compound_id | format_compound_id }}{{ postera.maybe_link(transformation.transformation.final_microstate.compound_id) }} molecule @@ -64,6 +65,12 @@

Plot

± {{ (transformation.binding_free_energy * KT_KCALMOL) | format_stderr }} + + + {{ (transformation.binding_free_energy * KT_KCALMOL) | format_point }} + ± {{ (transformation.binding_free_energy * KT_KCALMOL) | format_stderr }} + +
work distributions From 89f50a047f3a8ffd595f5bc895933bc8cbeac029 Mon Sep 17 00:00:00 2001 From: glass-w Date: Wed, 16 Dec 2020 17:30:36 +0000 Subject: [PATCH 05/30] add skeleton function for extracting relatives --- fah_xchem/analysis/diffnet.py | 66 +++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/fah_xchem/analysis/diffnet.py b/fah_xchem/analysis/diffnet.py index f1056fab..e82f6299 100644 --- a/fah_xchem/analysis/diffnet.py +++ b/fah_xchem/analysis/diffnet.py @@ -166,6 +166,72 @@ def build_transformation_graph( return graph +# WIP +def calc_exp_ddg( + compounds: List[Compound], + transformations: List[TransformationAnalysis], +): + + # TODO will probably need to move this function + # NOTE below does not follow dnry + + supergraph = build_transformation_graph(compounds, transformations) + + # Split supergraph into weakly-connected subgraphs + # NOTE: the subgraphs are "views" into the supergraph, meaning + # updates made to the subgraphs are reflected in the supergraph + # (we exploit this below) + connected_subgraphs = [ + supergraph.subgraph(nodes) + for nodes in nx.weakly_connected_components(supergraph) + ] + + # Filter to connected subgraphs containing at least one + # experimental measurement + valid_subgraphs = [ + graph + for graph in connected_subgraphs + if any( + "pIC50" in graph.nodes[node]["compound"].metadata.experimental_data + for node in graph + ) + ] + + if len(valid_subgraphs) < len(connected_subgraphs): + logging.warning( + "Found %d out of %d connected subgraphs without experimental data", + len(connected_subgraphs) - len(valid_subgraphs), + len(connected_subgraphs), + ) + + for graph in valid_subgraphs: + for node_1, node_2, edge in graph.edges(data=True): + # see if both nodes contain experimental pIC50 data + # if they do calculate the free energy difference between them + try: + node_1_pic50 = graph.nodes[node_1]["compound"].metadata.experimental_data["pIC50"] # ref molecule + node_2_pic50 = graph.nodes[node_2]["compound"].metadata.experimental_data["pIC50"] # new molecule + + # TODO need to add microstate check here + print(f"length: {len(graph.nodes[node_2]['compound'].microstates)}") + print(graph.nodes[node_2]['compound'].microstates) + n_microstates = len(graph.nodes[node_2]['compound'].microstates) + + # Get experimental DeltaDeltaG by subtracting from experimental inspiration fragment (ref) + exp_ddg_ij = pIC50_to_DG(node_1_pic50) - pIC50_to_DG(node_2_pic50) + print(pIC50_to_DG(node_1_pic50), pIC50_to_DG(node_2_pic50)) + print(pIC50_to_DG(node_1_pic50) - pIC50_to_DG(node_2_pic50)) + + edge["exp_ddg_ij"] = exp_ddg_ij + except KeyError: + logging.info( + "Failed to get experimental pIC50 value" + ) + edge["exp_ddg_ij"] = None + + + + def combine_free_energies( compounds: List[Compound], transformations: List[TransformationAnalysis], From d740ae07ae9bb1e429d9f8e95baf674c27a6da17 Mon Sep 17 00:00:00 2001 From: glass-w Date: Thu, 17 Dec 2020 11:58:34 +0000 Subject: [PATCH 06/30] move function, update schema, test html, tidy up --- fah_xchem/analysis/__init__.py | 64 +++++++++++++++++- fah_xchem/analysis/diffnet.py | 66 ------------------- .../retrospective_analysis/index.html | 6 +- fah_xchem/schema.py | 1 + 4 files changed, 68 insertions(+), 69 deletions(-) diff --git a/fah_xchem/analysis/__init__.py b/fah_xchem/analysis/__init__.py index ddb8935c..9e4ceeea 100644 --- a/fah_xchem/analysis/__init__.py +++ b/fah_xchem/analysis/__init__.py @@ -11,6 +11,7 @@ AnalysisConfig, CompoundSeries, CompoundSeriesAnalysis, + CompoundMicrostate, FahConfig, GenAnalysis, PhaseAnalysis, @@ -19,7 +20,7 @@ TransformationAnalysis, WorkPair, ) -from .diffnet import combine_free_energies +from .diffnet import combine_free_energies, pIC50_to_DG from .exceptions import AnalysisError, DataValidationError from .extract_work import extract_work_pair from .free_energy import compute_relative_free_energy @@ -69,6 +70,7 @@ def get_gen_analysis(gen: int, works: List[WorkPair]) -> GenAnalysis: def analyze_transformation( transformation: Transformation, + compounds: CompoundSeries, projects: ProjectPair, server: FahConfig, config: AnalysisConfig, @@ -85,6 +87,9 @@ def analyze_transformation( complex_phase.free_energy.delta_f - solvent_phase.free_energy.delta_f ) + # get associated DDGs between compounds, if experimentally known + exp_ddg = calc_retrospective(transformation=transformation, compounds=compounds) + # Check for consistency across GENS, if requested if filter_gen_consistency: consistent_bool = gens_are_consistent( @@ -97,6 +102,7 @@ def analyze_transformation( binding_free_energy=binding_free_energy, complex_phase=complex_phase, solvent_phase=solvent_phase, + exp_ddg=exp_ddg, ) else: @@ -106,9 +112,64 @@ def analyze_transformation( binding_free_energy=binding_free_energy, complex_phase=complex_phase, solvent_phase=solvent_phase, + exp_ddg=exp_ddg, ) +def calc_retrospective( + transformation: TransformationAnalysis, compounds: CompoundSeries +): + + # TODO add docs + # TODO change func name? + + import networkx as nx + + graph = nx.DiGraph() + + graph.add_edge( + transformation.initial_microstate, + transformation.final_microstate, + ) + + for compound in compounds: + for microstate in compound.microstates: + node = CompoundMicrostate( + compound_id=compound.metadata.compound_id, + microstate_id=microstate.microstate_id, + ) + if node in graph: + graph.nodes[node]["compound"] = compound + graph.nodes[node]["microstate"] = microstate + + for node_1, node_2, edge in graph.edges(data=True): + # see if both nodes contain experimental pIC50 data + # if they do calculate the free energy difference between them + try: + node_1_pic50 = graph.nodes[node_1]["compound"].metadata.experimental_data[ + "pIC50" + ] # ref molecule + node_2_pic50 = graph.nodes[node_2]["compound"].metadata.experimental_data[ + "pIC50" + ] # new molecule + + # TODO need to add microstate check here + # print(f"length: {len(graph.nodes[node_2]['compound'].microstates)}") + # print(graph.nodes[node_2]['compound'].microstates) + # n_microstates = len(graph.nodes[node_2]['compound'].microstates) + + # Get experimental DeltaDeltaG by subtracting from experimental inspiration fragment (ref) + exp_ddg_ij = pIC50_to_DG(node_1_pic50) - pIC50_to_DG(node_2_pic50) + + # TODO get error + + except KeyError: + logging.info("Failed to get experimental pIC50 value") + exp_ddg_ij = None + + return exp_ddg_ij + + def analyze_transformation_or_warn( transformation: Transformation, **kwargs ) -> Optional[TransformationAnalysis]: @@ -135,6 +196,7 @@ def analyze_compound_series( projects=series.metadata.fah_projects, server=server, config=config, + compounds=series.compounds, ), series.transformations, ) diff --git a/fah_xchem/analysis/diffnet.py b/fah_xchem/analysis/diffnet.py index e82f6299..f1056fab 100644 --- a/fah_xchem/analysis/diffnet.py +++ b/fah_xchem/analysis/diffnet.py @@ -166,72 +166,6 @@ def build_transformation_graph( return graph -# WIP -def calc_exp_ddg( - compounds: List[Compound], - transformations: List[TransformationAnalysis], -): - - # TODO will probably need to move this function - # NOTE below does not follow dnry - - supergraph = build_transformation_graph(compounds, transformations) - - # Split supergraph into weakly-connected subgraphs - # NOTE: the subgraphs are "views" into the supergraph, meaning - # updates made to the subgraphs are reflected in the supergraph - # (we exploit this below) - connected_subgraphs = [ - supergraph.subgraph(nodes) - for nodes in nx.weakly_connected_components(supergraph) - ] - - # Filter to connected subgraphs containing at least one - # experimental measurement - valid_subgraphs = [ - graph - for graph in connected_subgraphs - if any( - "pIC50" in graph.nodes[node]["compound"].metadata.experimental_data - for node in graph - ) - ] - - if len(valid_subgraphs) < len(connected_subgraphs): - logging.warning( - "Found %d out of %d connected subgraphs without experimental data", - len(connected_subgraphs) - len(valid_subgraphs), - len(connected_subgraphs), - ) - - for graph in valid_subgraphs: - for node_1, node_2, edge in graph.edges(data=True): - # see if both nodes contain experimental pIC50 data - # if they do calculate the free energy difference between them - try: - node_1_pic50 = graph.nodes[node_1]["compound"].metadata.experimental_data["pIC50"] # ref molecule - node_2_pic50 = graph.nodes[node_2]["compound"].metadata.experimental_data["pIC50"] # new molecule - - # TODO need to add microstate check here - print(f"length: {len(graph.nodes[node_2]['compound'].microstates)}") - print(graph.nodes[node_2]['compound'].microstates) - n_microstates = len(graph.nodes[node_2]['compound'].microstates) - - # Get experimental DeltaDeltaG by subtracting from experimental inspiration fragment (ref) - exp_ddg_ij = pIC50_to_DG(node_1_pic50) - pIC50_to_DG(node_2_pic50) - print(pIC50_to_DG(node_1_pic50), pIC50_to_DG(node_2_pic50)) - print(pIC50_to_DG(node_1_pic50) - pIC50_to_DG(node_2_pic50)) - - edge["exp_ddg_ij"] = exp_ddg_ij - except KeyError: - logging.info( - "Failed to get experimental pIC50 value" - ) - edge["exp_ddg_ij"] = None - - - - def combine_free_energies( compounds: List[Compound], transformations: List[TransformationAnalysis], diff --git a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html index 400b5d6c..8c9f7271 100644 --- a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html +++ b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html @@ -25,6 +25,7 @@

Plot

Convergence {% for transformation in transformations %} + {% if transformation.exp_ddg %} RUN{{ transformation.transformation.run_id }} {{ transformation.transformation.initial_microstate.compound_id | format_compound_id }}{{ postera.maybe_link(transformation.transformation.initial_microstate.compound_id) }} @@ -67,8 +68,8 @@

Plot

- {{ (transformation.binding_free_energy * KT_KCALMOL) | format_point }} - ± {{ (transformation.binding_free_energy * KT_KCALMOL) | format_stderr }} + {{ (transformation.exp_ddg * KT_KCALMOL) }} + ± {{ (transformation.exp_ddg * KT_KCALMOL) }} @@ -82,6 +83,7 @@

Plot

+ {% endif %} {% endfor %} {% endblock %} diff --git a/fah_xchem/schema.py b/fah_xchem/schema.py index 66f1d903..2e043791 100644 --- a/fah_xchem/schema.py +++ b/fah_xchem/schema.py @@ -168,6 +168,7 @@ class TransformationAnalysis(Model): transformation: Transformation reliable_transformation: bool = Field(None, description="Specify if the transformation is reliable or not") # JSON boolean binding_free_energy: PointEstimate + exp_ddg: Optional[float] complex_phase: PhaseAnalysis solvent_phase: PhaseAnalysis From ba32faea1c5f0d0487d80eba6f424b77f647a61d Mon Sep 17 00:00:00 2001 From: glass-w Date: Thu, 17 Dec 2020 17:02:16 +0000 Subject: [PATCH 07/30] add simple arsenic plots WIP --- fah_xchem/analysis/plots.py | 44 +++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/fah_xchem/analysis/plots.py b/fah_xchem/analysis/plots.py index c434494b..df72e1cc 100644 --- a/fah_xchem/analysis/plots.py +++ b/fah_xchem/analysis/plots.py @@ -19,6 +19,45 @@ TransformationAnalysis, ) from .constants import KT_KCALMOL +from arsenic import plotting + + +def plot_retrospective( + transformations: List[TransformationAnalysis], + output_dir: str, + filename: str = 'retrospective.png', + ): + + import networkx as nx + import os + + graph = nx.DiGraph() + + for analysis in transformations: + transformation = analysis.transformation + + if analysis.binding_free_energy is None: + continue + + print(transformation) + + # Only interested if the compounds have an experimental DDG + if analysis.exp_ddg is not None: + + graph.add_edge( + transformation.initial_microstate, + transformation.final_microstate, + exp_DDG=analysis.exp_ddg, + exp_dDDG=0.0, # TODO get error + calc_DDG=analysis.binding_free_energy.point, + calc_dDDG=analysis.binding_free_energy.stderr, + ) + + plotting.plot_DDGs(graph, filename=os.path.join(output_dir, filename)) + + + + def plot_work_distributions( @@ -683,6 +722,11 @@ def generate_plots( # Summary plots + plot_retrospective( + output_dir=output_dir, + transformations=series.transformations + ) + with save_summary_plot( name="relative_fe_dist", ): From 46d8dbc6bf5084d5763348459ad94a02a6a553a7 Mon Sep 17 00:00:00 2001 From: glass-w Date: Thu, 17 Dec 2020 17:59:06 +0000 Subject: [PATCH 08/30] change func name, add microstate check, tidy plots --- fah_xchem/analysis/__init__.py | 26 ++++++++++++++------------ fah_xchem/analysis/plots.py | 33 ++++++++++++--------------------- 2 files changed, 26 insertions(+), 33 deletions(-) diff --git a/fah_xchem/analysis/__init__.py b/fah_xchem/analysis/__init__.py index 9e4ceeea..f9c810f6 100644 --- a/fah_xchem/analysis/__init__.py +++ b/fah_xchem/analysis/__init__.py @@ -5,6 +5,8 @@ import multiprocessing import os from typing import List, Optional +import networkx as nx +import numpy as np from ..fah_utils import list_results from ..schema import ( @@ -20,6 +22,7 @@ TransformationAnalysis, WorkPair, ) +from .constants import KT_KCALMOL from .diffnet import combine_free_energies, pIC50_to_DG from .exceptions import AnalysisError, DataValidationError from .extract_work import extract_work_pair @@ -88,7 +91,7 @@ def analyze_transformation( ) # get associated DDGs between compounds, if experimentally known - exp_ddg = calc_retrospective(transformation=transformation, compounds=compounds) + exp_ddg = calc_exp_ddg(transformation=transformation, compounds=compounds) # Check for consistency across GENS, if requested if filter_gen_consistency: @@ -116,17 +119,16 @@ def analyze_transformation( ) -def calc_retrospective( +def calc_exp_ddg( transformation: TransformationAnalysis, compounds: CompoundSeries ): # TODO add docs - # TODO change func name? - - import networkx as nx graph = nx.DiGraph() + # make a simple two node graph + # NOTE there may be a faster way of doing this graph.add_edge( transformation.initial_microstate, transformation.final_microstate, @@ -141,10 +143,10 @@ def calc_retrospective( if node in graph: graph.nodes[node]["compound"] = compound graph.nodes[node]["microstate"] = microstate - + for node_1, node_2, edge in graph.edges(data=True): - # see if both nodes contain experimental pIC50 data - # if they do calculate the free energy difference between them + # if both nodes contain exp pIC50 calculate the free energy difference between them + # NOTE assume star map (node 1 is our reference) try: node_1_pic50 = graph.nodes[node_1]["compound"].metadata.experimental_data[ "pIC50" @@ -153,14 +155,14 @@ def calc_retrospective( "pIC50" ] # new molecule - # TODO need to add microstate check here - # print(f"length: {len(graph.nodes[node_2]['compound'].microstates)}") - # print(graph.nodes[node_2]['compound'].microstates) - # n_microstates = len(graph.nodes[node_2]['compound'].microstates) + n_microstates = len(graph.nodes[node_2]['compound'].microstates) # Get experimental DeltaDeltaG by subtracting from experimental inspiration fragment (ref) exp_ddg_ij = pIC50_to_DG(node_1_pic50) - pIC50_to_DG(node_2_pic50) + if n_microstates > 1: + exp_ddg_ij += (0.6 * np.log(n_microstates)) / KT_KCALMOL # TODO check this is correct + # TODO get error except KeyError: diff --git a/fah_xchem/analysis/plots.py b/fah_xchem/analysis/plots.py index df72e1cc..c08d7730 100644 --- a/fah_xchem/analysis/plots.py +++ b/fah_xchem/analysis/plots.py @@ -9,6 +9,7 @@ from matplotlib.font_manager import FontProperties import multiprocessing import numpy as np +import networkx as nx import pandas as pd from pymbar import BAR from typing import Generator, Iterable, List, Optional @@ -28,38 +29,28 @@ def plot_retrospective( filename: str = 'retrospective.png', ): - import networkx as nx - import os - graph = nx.DiGraph() + # TODO this loop can be sped up for analysis in transformations: transformation = analysis.transformation - if analysis.binding_free_energy is None: + # Only interested if the compounds have an experimental DDG + if analysis.binding_free_energy and analysis.exp_ddg is None: continue - print(transformation) - - # Only interested if the compounds have an experimental DDG - if analysis.exp_ddg is not None: - - graph.add_edge( - transformation.initial_microstate, - transformation.final_microstate, - exp_DDG=analysis.exp_ddg, - exp_dDDG=0.0, # TODO get error - calc_DDG=analysis.binding_free_energy.point, - calc_dDDG=analysis.binding_free_energy.stderr, - ) + graph.add_edge( + transformation.initial_microstate, + transformation.final_microstate, + exp_DDG=analysis.exp_ddg, + exp_dDDG=0.0, # TODO get error + calc_DDG=analysis.binding_free_energy.point, + calc_dDDG=analysis.binding_free_energy.stderr, + ) plotting.plot_DDGs(graph, filename=os.path.join(output_dir, filename)) - - - - def plot_work_distributions( complex_forward_works: List[float], complex_reverse_works: List[float], From 4ccdc0ff766ed9a57750b9e5d26a75bb092230cf Mon Sep 17 00:00:00 2001 From: glass-w Date: Fri, 18 Dec 2020 13:40:15 +0000 Subject: [PATCH 09/30] wrangle exp values to fewer decimal places --- fah_xchem/analysis/__init__.py | 6 ++++-- fah_xchem/analysis/plots.py | 6 +++--- .../templates/retrospective_analysis/index.html | 6 +++--- fah_xchem/schema.py | 13 ++++++++----- 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/fah_xchem/analysis/__init__.py b/fah_xchem/analysis/__init__.py index f9c810f6..e41a287f 100644 --- a/fah_xchem/analysis/__init__.py +++ b/fah_xchem/analysis/__init__.py @@ -17,6 +17,7 @@ FahConfig, GenAnalysis, PhaseAnalysis, + PointEstimate, ProjectPair, Transformation, TransformationAnalysis, @@ -163,13 +164,14 @@ def calc_exp_ddg( if n_microstates > 1: exp_ddg_ij += (0.6 * np.log(n_microstates)) / KT_KCALMOL # TODO check this is correct - # TODO get error + exp_ddg_ij_err = 0.01 except KeyError: logging.info("Failed to get experimental pIC50 value") exp_ddg_ij = None + exp_ddg_ij_err = None - return exp_ddg_ij + return PointEstimate(point=exp_ddg_ij, stderr=exp_ddg_ij_err) def analyze_transformation_or_warn( diff --git a/fah_xchem/analysis/plots.py b/fah_xchem/analysis/plots.py index c08d7730..3efbff8b 100644 --- a/fah_xchem/analysis/plots.py +++ b/fah_xchem/analysis/plots.py @@ -36,14 +36,14 @@ def plot_retrospective( transformation = analysis.transformation # Only interested if the compounds have an experimental DDG - if analysis.binding_free_energy and analysis.exp_ddg is None: + if analysis.binding_free_energy and analysis.exp_ddg.point is None: continue graph.add_edge( transformation.initial_microstate, transformation.final_microstate, - exp_DDG=analysis.exp_ddg, - exp_dDDG=0.0, # TODO get error + exp_DDG=analysis.exp_ddg.point, + exp_dDDG=analysis.exp_ddg.stderr, calc_DDG=analysis.binding_free_energy.point, calc_dDDG=analysis.binding_free_energy.stderr, ) diff --git a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html index 8c9f7271..128b3df1 100644 --- a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html +++ b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html @@ -25,7 +25,7 @@

Plot

Convergence {% for transformation in transformations %} - {% if transformation.exp_ddg %} + {% if transformation.exp_ddg.point is not none %} RUN{{ transformation.transformation.run_id }} {{ transformation.transformation.initial_microstate.compound_id | format_compound_id }}{{ postera.maybe_link(transformation.transformation.initial_microstate.compound_id) }} @@ -68,8 +68,8 @@

Plot

- {{ (transformation.exp_ddg * KT_KCALMOL) }} - ± {{ (transformation.exp_ddg * KT_KCALMOL) }} + {{ (transformation.exp_ddg * KT_KCALMOL) | format_point }} + diff --git a/fah_xchem/schema.py b/fah_xchem/schema.py index 2e043791..2023d762 100644 --- a/fah_xchem/schema.py +++ b/fah_xchem/schema.py @@ -1,5 +1,5 @@ import datetime as dt -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Union from pydantic import BaseModel, Field @@ -11,8 +11,8 @@ class Config: class PointEstimate(Model): - point: float - stderr: float + point: Union[None, float] + stderr: Union[None, float] def __add__(self, other: "PointEstimate") -> "PointEstimate": from math import sqrt @@ -34,7 +34,10 @@ def __mul__(self, c: float) -> "PointEstimate": def precision_decimals(self) -> Optional[int]: from math import floor, isfinite, log10 - return -floor(log10(self.stderr)) if isfinite(self.stderr) else None + if self.point is None: + return None + else: + return -floor(log10(self.stderr)) if isfinite(self.stderr) else None class ProjectPair(Model): @@ -168,7 +171,7 @@ class TransformationAnalysis(Model): transformation: Transformation reliable_transformation: bool = Field(None, description="Specify if the transformation is reliable or not") # JSON boolean binding_free_energy: PointEstimate - exp_ddg: Optional[float] + exp_ddg: PointEstimate complex_phase: PhaseAnalysis solvent_phase: PhaseAnalysis From 94eed6fad903de45002be990e9e570c1e2409a59 Mon Sep 17 00:00:00 2001 From: glass-w Date: Fri, 18 Dec 2020 14:54:39 +0000 Subject: [PATCH 10/30] fix plot, minor fixes --- fah_xchem/analysis/__init__.py | 12 +++++++----- fah_xchem/analysis/plots.py | 10 +++++----- .../templates/retrospective_analysis/index.html | 2 +- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/fah_xchem/analysis/__init__.py b/fah_xchem/analysis/__init__.py index e41a287f..ad646cc4 100644 --- a/fah_xchem/analysis/__init__.py +++ b/fah_xchem/analysis/__init__.py @@ -156,15 +156,17 @@ def calc_exp_ddg( "pIC50" ] # new molecule - n_microstates = len(graph.nodes[node_2]['compound'].microstates) + n_microstates_node_1 = len(graph.nodes[node_1]['compound'].microstates) + n_microstates_node_2 = len(graph.nodes[node_2]['compound'].microstates) # Get experimental DeltaDeltaG by subtracting from experimental inspiration fragment (ref) - exp_ddg_ij = pIC50_to_DG(node_1_pic50) - pIC50_to_DG(node_2_pic50) + + node_1_DG = pIC50_to_DG(node_1_pic50) + (0.6 * np.log(n_microstates_node_1)) / KT_KCALMOL # TODO check this is correct + node_2_DG = pIC50_to_DG(node_2_pic50) + (0.6 * np.log(n_microstates_node_2)) / KT_KCALMOL # TODO check this is correct - if n_microstates > 1: - exp_ddg_ij += (0.6 * np.log(n_microstates)) / KT_KCALMOL # TODO check this is correct + exp_ddg_ij = node_1_DG - node_2_DG - exp_ddg_ij_err = 0.01 + exp_ddg_ij_err = 0.1 # TODO temporary fix except KeyError: logging.info("Failed to get experimental pIC50 value") diff --git a/fah_xchem/analysis/plots.py b/fah_xchem/analysis/plots.py index 3efbff8b..d805cf72 100644 --- a/fah_xchem/analysis/plots.py +++ b/fah_xchem/analysis/plots.py @@ -36,16 +36,16 @@ def plot_retrospective( transformation = analysis.transformation # Only interested if the compounds have an experimental DDG - if analysis.binding_free_energy and analysis.exp_ddg.point is None: + if analysis.binding_free_energy is None or analysis.exp_ddg.point is None: continue graph.add_edge( transformation.initial_microstate, transformation.final_microstate, - exp_DDG=analysis.exp_ddg.point, - exp_dDDG=analysis.exp_ddg.stderr, - calc_DDG=analysis.binding_free_energy.point, - calc_dDDG=analysis.binding_free_energy.stderr, + exp_DDG=analysis.exp_ddg.point * KT_KCALMOL, + exp_dDDG=analysis.exp_ddg.stderr * KT_KCALMOL, + calc_DDG=analysis.binding_free_energy.point * KT_KCALMOL, + calc_dDDG=analysis.binding_free_energy.stderr * KT_KCALMOL, ) plotting.plot_DDGs(graph, filename=os.path.join(output_dir, filename)) diff --git a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html index 128b3df1..e74e051f 100644 --- a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html +++ b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html @@ -69,7 +69,7 @@

Plot

{{ (transformation.exp_ddg * KT_KCALMOL) | format_point }} - + ± {{ (transformation.exp_ddg * KT_KCALMOL) | format_stderr }} From 02a4f154579bb987446d3a3d607ff65a16dd3429 Mon Sep 17 00:00:00 2001 From: glass-w Date: Fri, 18 Dec 2020 15:01:27 +0000 Subject: [PATCH 11/30] fix analysis.json formatting --- fah_xchem/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fah_xchem/app.py b/fah_xchem/app.py index 4c010429..3e1e9e2d 100644 --- a/fah_xchem/app.py +++ b/fah_xchem/app.py @@ -124,7 +124,7 @@ def run_analysis( os.makedirs(output_dir, exist_ok=True) with open(os.path.join(output_dir, "analysis.json"), "w") as output_file: - output_file.write(output.json()) + output_file.write(output.json(indent=3)) def generate_artifacts( From addd3e44ff267b4e3d1c19f4b3c524bda157e965 Mon Sep 17 00:00:00 2001 From: glass-w Date: Fri, 18 Dec 2020 17:18:30 +0000 Subject: [PATCH 12/30] format with black, update tab and plots --- fah_xchem/analysis/__init__.py | 24 +++++++++++-------- fah_xchem/analysis/plots.py | 22 ++++++++++------- .../retrospective_analysis/index.html | 7 +++--- 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/fah_xchem/analysis/__init__.py b/fah_xchem/analysis/__init__.py index ad646cc4..9ca12c0b 100644 --- a/fah_xchem/analysis/__init__.py +++ b/fah_xchem/analysis/__init__.py @@ -120,9 +120,7 @@ def analyze_transformation( ) -def calc_exp_ddg( - transformation: TransformationAnalysis, compounds: CompoundSeries -): +def calc_exp_ddg(transformation: TransformationAnalysis, compounds: CompoundSeries): # TODO add docs @@ -144,7 +142,7 @@ def calc_exp_ddg( if node in graph: graph.nodes[node]["compound"] = compound graph.nodes[node]["microstate"] = microstate - + for node_1, node_2, edge in graph.edges(data=True): # if both nodes contain exp pIC50 calculate the free energy difference between them # NOTE assume star map (node 1 is our reference) @@ -156,17 +154,23 @@ def calc_exp_ddg( "pIC50" ] # new molecule - n_microstates_node_1 = len(graph.nodes[node_1]['compound'].microstates) - n_microstates_node_2 = len(graph.nodes[node_2]['compound'].microstates) + n_microstates_node_1 = len(graph.nodes[node_1]["compound"].microstates) + n_microstates_node_2 = len(graph.nodes[node_2]["compound"].microstates) # Get experimental DeltaDeltaG by subtracting from experimental inspiration fragment (ref) - - node_1_DG = pIC50_to_DG(node_1_pic50) + (0.6 * np.log(n_microstates_node_1)) / KT_KCALMOL # TODO check this is correct - node_2_DG = pIC50_to_DG(node_2_pic50) + (0.6 * np.log(n_microstates_node_2)) / KT_KCALMOL # TODO check this is correct + + node_1_DG = ( + pIC50_to_DG(node_1_pic50) + + (0.6 * np.log(n_microstates_node_1)) / KT_KCALMOL + ) # TODO check this is correct + node_2_DG = ( + pIC50_to_DG(node_2_pic50) + + (0.6 * np.log(n_microstates_node_2)) / KT_KCALMOL + ) # TODO check this is correct exp_ddg_ij = node_1_DG - node_2_DG - exp_ddg_ij_err = 0.1 # TODO temporary fix + exp_ddg_ij_err = 0.1 # TODO temporary fix except KeyError: logging.info("Failed to get experimental pIC50 value") diff --git a/fah_xchem/analysis/plots.py b/fah_xchem/analysis/plots.py index d805cf72..4e0563dc 100644 --- a/fah_xchem/analysis/plots.py +++ b/fah_xchem/analysis/plots.py @@ -26,8 +26,8 @@ def plot_retrospective( transformations: List[TransformationAnalysis], output_dir: str, - filename: str = 'retrospective.png', - ): + filename: str = "retrospective", +): graph = nx.DiGraph() @@ -48,7 +48,9 @@ def plot_retrospective( calc_dDDG=analysis.binding_free_energy.stderr * KT_KCALMOL, ) - plotting.plot_DDGs(graph, filename=os.path.join(output_dir, filename)) + filename_png = filename + ".png" + + plotting.plot_DDGs(graph, filename=os.path.join(output_dir, filename_png)) def plot_work_distributions( @@ -713,11 +715,6 @@ def generate_plots( # Summary plots - plot_retrospective( - output_dir=output_dir, - transformations=series.transformations - ) - with save_summary_plot( name="relative_fe_dist", ): @@ -729,7 +726,7 @@ def generate_plots( ): plot_cumulative_distribution(binding_delta_fs) plt.title("Cumulative distribution") - + with _save_table_pdf(path=output_dir, name="poor_complex_convergence_fe_table"): plot_poor_convergence_fe_table(series.transformations) @@ -748,3 +745,10 @@ def generate_plots( description="Generating plots", ): pass + + # Retrospective plots + + # NOTE this is handled by Arsenic + # this needs to be plotted last as the figure isn't cleared by default in Arsenic + # TODO generate time stamp + plot_retrospective(output_dir=output_dir, transformations=series.transformations) diff --git a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html index e74e051f..e2041f31 100644 --- a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html +++ b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html @@ -2,9 +2,8 @@ {% import "postera.html" as postera %} {% set active_page = "retrospective_analysis" %} {% block content %} -

Retrospective Analysis

-

Plot

- +

Retrospective Analysis

+
retrospective plot
@@ -20,7 +19,7 @@

Plot

Initial microstate Final microstate ΔΔG / kcal M-1 - ΔΔGexp / kcal M-1 + ΔΔGexp / kcal M-1 Convergence From c31f0b81501df8730483b1919cf7ef65b17e856e Mon Sep 17 00:00:00 2001 From: glass-w Date: Fri, 18 Dec 2020 18:39:53 +0000 Subject: [PATCH 13/30] fix formatting --- .../website/templates/retrospective_analysis/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html index e2041f31..a09384b0 100644 --- a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html +++ b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html @@ -2,7 +2,7 @@ {% import "postera.html" as postera %} {% set active_page = "retrospective_analysis" %} {% block content %} -

Retrospective Analysis

+

Retrospective Analysis

retrospective plot From 45b2961a33f39aea2b2bb53b7b8426c7c59a2f5d Mon Sep 17 00:00:00 2001 From: glass-w Date: Fri, 18 Dec 2020 18:45:47 +0000 Subject: [PATCH 14/30] fix column format --- .../website/templates/retrospective_analysis/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html index a09384b0..e131d1af 100644 --- a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html +++ b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html @@ -19,7 +19,7 @@

Retrospective Analysis Initial microstate Final microstate ΔΔG / kcal M-1 - ΔΔGexp / kcal M-1 Work distribution Convergence From c4386524801ac19b9a639450f7b194a02af46502 Mon Sep 17 00:00:00 2001 From: glass-w Date: Mon, 11 Jan 2021 14:22:09 +0000 Subject: [PATCH 15/30] use 0.1 error --- fah_xchem/analysis/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fah_xchem/analysis/__init__.py b/fah_xchem/analysis/__init__.py index 9ca12c0b..7935d3a7 100644 --- a/fah_xchem/analysis/__init__.py +++ b/fah_xchem/analysis/__init__.py @@ -170,7 +170,7 @@ def calc_exp_ddg(transformation: TransformationAnalysis, compounds: CompoundSeri exp_ddg_ij = node_1_DG - node_2_DG - exp_ddg_ij_err = 0.1 # TODO temporary fix + exp_ddg_ij_err = 0.1 # TODO check this is correct except KeyError: logging.info("Failed to get experimental pIC50 value") From 63d03fdc66b002d043d3efd4bd0cbd9174fa2f9a Mon Sep 17 00:00:00 2001 From: glass-w Date: Tue, 12 Jan 2021 09:54:01 +0000 Subject: [PATCH 16/30] add plotly --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index ceabba5b..fc37f956 100644 --- a/environment.yml +++ b/environment.yml @@ -16,6 +16,7 @@ dependencies: - openmmtools - pandas >= 1.1 - perses + - plotly - pydantic - pymbar - python >= 3.6 From fe76b7df85d33305af4256b858ed25bf6db6423c Mon Sep 17 00:00:00 2001 From: jchodera Date: Wed, 13 Jan 2021 06:10:55 +0000 Subject: [PATCH 17/30] Bugfixes galore! --- fah_xchem/analysis/__init__.py | 49 +++++++++++++++++-- fah_xchem/analysis/structures.py | 7 +-- fah_xchem/analysis/website/__init__.py | 7 ++- .../retrospective_analysis/index.html | 11 ++++- fah_xchem/schema.py | 6 ++- 5 files changed, 70 insertions(+), 10 deletions(-) diff --git a/fah_xchem/analysis/__init__.py b/fah_xchem/analysis/__init__.py index e35dd3f0..1cac8112 100644 --- a/fah_xchem/analysis/__init__.py +++ b/fah_xchem/analysis/__init__.py @@ -94,7 +94,8 @@ def analyze_transformation( # get associated DDGs between compounds, if experimentally known exp_ddg = calc_exp_ddg(transformation=transformation, compounds=compounds) - + absolute_error = abs(binding_free_energy - exp_ddg) if (exp_ddg.point is not None) else None + # Check for consistency across GENS, if requested consistent_bool = None if filter_gen_consistency: @@ -109,6 +110,7 @@ def analyze_transformation( complex_phase=complex_phase, solvent_phase=solvent_phase, exp_ddg=exp_ddg, + absolute_error=absolute_error, ) else: @@ -129,10 +131,18 @@ def analyze_transformation( solvent_phase=solvent_phase, ) -def calc_exp_ddg(transformation: TransformationAnalysis, compounds: CompoundSeries): +def calc_exp_ddg_DEPRECATED(transformation: TransformationAnalysis, compounds: CompoundSeries): + """ + Compute experimental free energy difference between two compounds, if available. - # TODO add docs + Parameters + ---------- + transformation : TransformationAnalysis + The transformation of interest + compounds : CompoundSeries + Data for the compound series. + """ graph = nx.DiGraph() # make a simple two node graph @@ -188,6 +198,39 @@ def calc_exp_ddg(transformation: TransformationAnalysis, compounds: CompoundSeri return PointEstimate(point=exp_ddg_ij, stderr=exp_ddg_ij_err) +def calc_exp_ddg(transformation: TransformationAnalysis, compounds: CompoundSeries): + """ + Compute experimental free energy difference between two compounds, if available. + + NOTE: This method makes the approximation that each microstate has the same affinity as the parent compound. + + Parameters + ---------- + transformation : TransformationAnalysis + The transformation of interest + compounds : CompoundSeries + Data for the compound series. + + Returns + ------- + ddg : PointEstimate + Point estimate of free energy difference for this transformation, + or PointEstimate(None, None) if not available. + + """ + compounds_by_microstate = { microstate.microstate_id : compound for compound in compounds for microstate in compound.microstates } + + initial_experimental_data = compounds_by_microstate[transformation.initial_microstate.microstate_id].metadata.experimental_data + final_experimental_data = compounds_by_microstate[transformation.final_microstate.microstate_id].metadata.experimental_data + + if ('pIC50' in initial_experimental_data) and ('pIC50' in final_experimental_data): + exp_ddg_ij_err = 0.2 # TODO check this is correct + initial_dg = PointEstimate(point=pIC50_to_DG(initial_experimental_data['pIC50']), stderr=exp_ddg_ij_err) + final_dg = PointEstimate(point=pIC50_to_DG(final_experimental_data['pIC50']), stderr=exp_ddg_ij_err) + error = (final_dg - initial_dg) + return error + else: + return PointEstimate(point=None, stderr=None) def analyze_transformation_or_warn( transformation: Transformation, **kwargs diff --git a/fah_xchem/analysis/structures.py b/fah_xchem/analysis/structures.py index 1ca6aabb..e38671f4 100644 --- a/fah_xchem/analysis/structures.py +++ b/fah_xchem/analysis/structures.py @@ -170,7 +170,7 @@ def extract_snapshot( fragment = load_fragment(fragment_id) # Align the trajectory to the fragment (in place) - trajectory.image_molecules(inplace=True) + #trajectory.image_molecules(inplace=True) # No need to image molecules anymore now that perses adds zero-energy bonds between protein and ligand! trajectory.superpose(fragment, atom_indices=fragment.top.select("name CA")) # Extract the snapshot @@ -363,8 +363,8 @@ def generate_representative_snapshot( project_dir=project_dir, project_data_dir=project_data_dir, run=run_id, - clone=gen_work[1].clone, - gen=gen_work[0].gen, + clone=gen_work[1].clone, # TODO: Check index + gen=gen_work[0].gen, # TODO: Check index frame=frame, fragment_id=transformation.transformation.xchem_fragment_id, cache_dir=cache_dir, @@ -393,6 +393,7 @@ def generate_representative_snapshot( ) as ofs: oechem.OEWriteMolecule(ofs, components[name]) except Exception as e: + print(f'\nException occurred extracting snapshot from {project_dir} data {project_data_dir} run {run_id} clone {gen_work[1].clone} gen {gen_work[0].gen}') print(e) def generate_representative_snapshots( diff --git a/fah_xchem/analysis/website/__init__.py b/fah_xchem/analysis/website/__init__.py index 0b29e96d..c14aac56 100644 --- a/fah_xchem/analysis/website/__init__.py +++ b/fah_xchem/analysis/website/__init__.py @@ -5,6 +5,7 @@ import os import re from typing import Any, NamedTuple, Optional +import numpy as np import jinja2 import requests @@ -193,6 +194,7 @@ def generate_website( template_path = os.path.join(os.path.dirname(__file__), "templates") template_loader = jinja2.FileSystemLoader(searchpath=template_path) environment = jinja2.Environment(loader=template_loader) + environment.filters["np"] = np # numpy environment.filters["format_point"] = format_point environment.filters["format_stderr"] = format_stderr environment.filters["format_compound_id"] = format_compound_id @@ -310,7 +312,10 @@ def write_html( _generate_paginated_index( write_html=lambda items, **kwargs: write_html(transformations=items, **kwargs), url_prefix="retrospective_analysis", - items=series.transformations, + items=sorted( + [transformation for transformation in series.transformations if (transformation.absolute_error is not None)], + key = lambda transformation : -transformation.absolute_error.point + ), items_per_page=items_per_page, description="Generating html for retrospective analysis index", ) diff --git a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html index e131d1af..c8a7c66f 100644 --- a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html +++ b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html @@ -20,11 +20,12 @@

Retrospective Analysis Final microstate ΔΔG / kcal M-1 ΔΔGexp / kcal M-1 + |ΔΔG-ΔΔGexp| / kcal M-1 Work distribution Convergence - {% for transformation in transformations %} - {% if transformation.exp_ddg.point is not none %} + {% for transformation in (transformations | selectattr("absolute_error", "ne", None)) %} + {% if transformation.absolute_error.point is not none %} RUN{{ transformation.transformation.run_id }} {{ transformation.transformation.initial_microstate.compound_id | format_compound_id }}{{ postera.maybe_link(transformation.transformation.initial_microstate.compound_id) }} @@ -71,6 +72,12 @@

Retrospective Analysis ± {{ (transformation.exp_ddg * KT_KCALMOL) | format_stderr }} + + + {{ (transformation.absolute_error * KT_KCALMOL) | format_point }} + ± {{ (transformation.absolute_error * KT_KCALMOL) | format_stderr }} + + work distributions diff --git a/fah_xchem/schema.py b/fah_xchem/schema.py index aab43a6a..826165c9 100644 --- a/fah_xchem/schema.py +++ b/fah_xchem/schema.py @@ -22,6 +22,9 @@ def __add__(self, other: "PointEstimate") -> "PointEstimate": stderr=sqrt(self.stderr ** 2 + other.stderr ** 2), ) + def __abs__(self) -> "PointEstimate": + return PointEstimate(point=abs(self.point), stderr=self.stderr) + def __neg__(self) -> "PointEstimate": return PointEstimate(point=-self.point, stderr=self.stderr) @@ -171,7 +174,8 @@ class TransformationAnalysis(Model): transformation: Transformation reliable_transformation: bool = Field(None, description="Specify if the transformation is reliable or not") # JSON boolean binding_free_energy: PointEstimate - exp_ddg: PointEstimate + exp_ddg: PointEstimate # TODO: Make optional, with None as deafault? + absolute_error: Optional[PointEstimate] = None complex_phase: PhaseAnalysis solvent_phase: PhaseAnalysis From 2513495500ec7ab97afb9f9cde6edf8e37e733f1 Mon Sep 17 00:00:00 2001 From: jchodera Date: Wed, 13 Jan 2021 06:18:26 +0000 Subject: [PATCH 18/30] Add reliable transformations plot to retrospective tab. --- fah_xchem/analysis/plots.py | 3 ++- .../website/templates/retrospective_analysis/index.html | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fah_xchem/analysis/plots.py b/fah_xchem/analysis/plots.py index 4e0563dc..6b0bca90 100644 --- a/fah_xchem/analysis/plots.py +++ b/fah_xchem/analysis/plots.py @@ -751,4 +751,5 @@ def generate_plots( # NOTE this is handled by Arsenic # this needs to be plotted last as the figure isn't cleared by default in Arsenic # TODO generate time stamp - plot_retrospective(output_dir=output_dir, transformations=series.transformations) + plot_retrospective(output_dir=output_dir, transformations=series.transformations, filename='retrospective') + plot_retrospective(output_dir=output_dir, transformations=[transformation for transformation in series.transformations if transformation.reliable_transformation], filename='retrospective-reliable') diff --git a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html index c8a7c66f..918e1834 100644 --- a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html +++ b/fah_xchem/analysis/website/templates/retrospective_analysis/index.html @@ -6,6 +6,9 @@

Retrospective Analysis retrospective plot + + reliable transfromations retrospective plot +
Showing {{ start_index }} through {{ end_index }} of {{ series.transformations | length }} From f2ea7145b8954fd3e85e31eb780c4a4e435252c1 Mon Sep 17 00:00:00 2001 From: glass-w Date: Wed, 13 Jan 2021 16:31:59 +0000 Subject: [PATCH 19/30] format with Black --- fah_xchem/analysis/__init__.py | 47 ++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/fah_xchem/analysis/__init__.py b/fah_xchem/analysis/__init__.py index 1cac8112..6b62a097 100644 --- a/fah_xchem/analysis/__init__.py +++ b/fah_xchem/analysis/__init__.py @@ -94,8 +94,10 @@ def analyze_transformation( # get associated DDGs between compounds, if experimentally known exp_ddg = calc_exp_ddg(transformation=transformation, compounds=compounds) - absolute_error = abs(binding_free_energy - exp_ddg) if (exp_ddg.point is not None) else None - + absolute_error = ( + abs(binding_free_energy - exp_ddg) if (exp_ddg.point is not None) else None + ) + # Check for consistency across GENS, if requested consistent_bool = None if filter_gen_consistency: @@ -131,7 +133,10 @@ def analyze_transformation( solvent_phase=solvent_phase, ) -def calc_exp_ddg_DEPRECATED(transformation: TransformationAnalysis, compounds: CompoundSeries): + +def calc_exp_ddg_DEPRECATED( + transformation: TransformationAnalysis, compounds: CompoundSeries +): """ Compute experimental free energy difference between two compounds, if available. @@ -198,6 +203,7 @@ def calc_exp_ddg_DEPRECATED(transformation: TransformationAnalysis, compounds: C return PointEstimate(point=exp_ddg_ij, stderr=exp_ddg_ij_err) + def calc_exp_ddg(transformation: TransformationAnalysis, compounds: CompoundSeries): """ Compute experimental free energy difference between two compounds, if available. @@ -214,24 +220,37 @@ def calc_exp_ddg(transformation: TransformationAnalysis, compounds: CompoundSeri Returns ------- ddg : PointEstimate - Point estimate of free energy difference for this transformation, + Point estimate of free energy difference for this transformation, or PointEstimate(None, None) if not available. """ - compounds_by_microstate = { microstate.microstate_id : compound for compound in compounds for microstate in compound.microstates } - - initial_experimental_data = compounds_by_microstate[transformation.initial_microstate.microstate_id].metadata.experimental_data - final_experimental_data = compounds_by_microstate[transformation.final_microstate.microstate_id].metadata.experimental_data - - if ('pIC50' in initial_experimental_data) and ('pIC50' in final_experimental_data): - exp_ddg_ij_err = 0.2 # TODO check this is correct - initial_dg = PointEstimate(point=pIC50_to_DG(initial_experimental_data['pIC50']), stderr=exp_ddg_ij_err) - final_dg = PointEstimate(point=pIC50_to_DG(final_experimental_data['pIC50']), stderr=exp_ddg_ij_err) - error = (final_dg - initial_dg) + compounds_by_microstate = { + microstate.microstate_id: compound + for compound in compounds + for microstate in compound.microstates + } + + initial_experimental_data = compounds_by_microstate[ + transformation.initial_microstate.microstate_id + ].metadata.experimental_data + final_experimental_data = compounds_by_microstate[ + transformation.final_microstate.microstate_id + ].metadata.experimental_data + + if ("pIC50" in initial_experimental_data) and ("pIC50" in final_experimental_data): + exp_ddg_ij_err = 0.2 # TODO check this is correct + initial_dg = PointEstimate( + point=pIC50_to_DG(initial_experimental_data["pIC50"]), stderr=exp_ddg_ij_err + ) + final_dg = PointEstimate( + point=pIC50_to_DG(final_experimental_data["pIC50"]), stderr=exp_ddg_ij_err + ) + error = final_dg - initial_dg return error else: return PointEstimate(point=None, stderr=None) + def analyze_transformation_or_warn( transformation: Transformation, **kwargs ) -> Optional[TransformationAnalysis]: From b52279d77e341aeda4d0d178799ec870d88c451e Mon Sep 17 00:00:00 2001 From: jchodera Date: Sun, 24 Jan 2021 05:43:24 +0000 Subject: [PATCH 20/30] Handle InsufficientDataError --- fah_xchem/analysis/__init__.py | 13 +++++++++++-- fah_xchem/analysis/free_energy.py | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/fah_xchem/analysis/__init__.py b/fah_xchem/analysis/__init__.py index 1cac8112..f05e6ed6 100644 --- a/fah_xchem/analysis/__init__.py +++ b/fah_xchem/analysis/__init__.py @@ -28,7 +28,7 @@ from .diffnet import combine_free_energies, pIC50_to_DG from .exceptions import AnalysisError, DataValidationError from .extract_work import extract_work_pair -from .free_energy import compute_relative_free_energy +from .free_energy import compute_relative_free_energy, InsufficientDataError from .plots import generate_plots from .report import generate_report, gens_are_consistent from .structures import generate_representative_snapshots @@ -67,9 +67,18 @@ def get_gen_analysis(gen: int, works: List[WorkPair]) -> GenAnalysis: # TODO: round raw work output? return GenAnalysis(gen=gen, works=filtered_works, free_energy=free_energy) + # Analyze gens, omitting incomplete gens + gens = list() + for gen, works in works_by_gen.items(): + try: + gens.append( get_gen_analysis(gen, works) ) + except InsufficientDataError as e: + # It's OK if we don't have sufficient data here + pass + return PhaseAnalysis( free_energy=free_energy, - gens=[get_gen_analysis(gen, works) for gen, works in works_by_gen.items()], + gens=gens, ) diff --git a/fah_xchem/analysis/free_energy.py b/fah_xchem/analysis/free_energy.py index 06b6e7c4..183d4d1c 100644 --- a/fah_xchem/analysis/free_energy.py +++ b/fah_xchem/analysis/free_energy.py @@ -172,7 +172,7 @@ def compute_relative_free_energy( # TODO: Flag problematic RUN/CLONE/GEN trajectories for further analysis and debugging works = _filter_work_values(all_works) - + if len(works) < (min_num_work_values or 1): raise InsufficientDataError( f"Need at least {min_num_work_values} good work values for analysis, " From 585b87d4a9b363b8b08d1a76420af949ef8e756c Mon Sep 17 00:00:00 2001 From: jchodera Date: Sun, 24 Jan 2021 06:26:25 +0000 Subject: [PATCH 21/30] Accelerate analysis of incomplete transformations by skipping RUNs without work values --- fah_xchem/analysis/__init__.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/fah_xchem/analysis/__init__.py b/fah_xchem/analysis/__init__.py index dfe83c63..041a5e04 100644 --- a/fah_xchem/analysis/__init__.py +++ b/fah_xchem/analysis/__init__.py @@ -38,7 +38,7 @@ def analyze_phase(server: FahConfig, run: int, project: int, config: AnalysisConfig): paths = list_results(config=server, run=run, project=project) - + if not paths: raise AnalysisError(f"No data found for project {project}, RUN {run}") @@ -263,6 +263,7 @@ def calc_exp_ddg(transformation: TransformationAnalysis, compounds: CompoundSeri def analyze_transformation_or_warn( transformation: Transformation, **kwargs ) -> Optional[TransformationAnalysis]: + try: return analyze_transformation(transformation, **kwargs) except AnalysisError as exc: @@ -270,7 +271,7 @@ def analyze_transformation_or_warn( return None -def analyze_compound_series( +def analyze_compound_series( series: CompoundSeries, config: AnalysisConfig, server: FahConfig, @@ -279,6 +280,12 @@ def analyze_compound_series( from rich.progress import track + # Pre-filter based on which transformations have any data + available_transformations = [ + transformation for transformation in series.transformations + if len(list_results(config=server, run=transformation.run_id, project=series.metadata.fah_projects.complex_phase)) > 0 + and len(list_results(config=server, run=transformation.run_id, project=series.metadata.fah_projects.solvent_phase)) > 0 + ] with multiprocessing.Pool(num_procs) as pool: results_iter = pool.imap_unordered( partial( @@ -288,13 +295,13 @@ def analyze_compound_series( config=config, compounds=series.compounds, ), - series.transformations, + available_transformations, ) transformations = [ result for result in track( results_iter, - total=len(series.transformations), + total=len(available_transformations), description="Computing transformation free energies", ) if result is not None @@ -349,10 +356,17 @@ def generate_artifacts( data_dir, f"PROJ{series.metadata.fah_projects.complex_phase}" ) + # Pre-filter based on which transformations have any data + available_transformations = [ + transformation for transformation in series.transformations + if transformation.binding_free_energy is not None + and transformation.binding_free_energy.point is not None + ] + if snapshots: logging.info("Generating representative snapshots") generate_representative_snapshots( - transformations=series.transformations, + transformations=available_transformations, project_dir=complex_project_dir, project_data_dir=complex_data_dir, output_dir=os.path.join(output_dir, "transformations"), From 2a723920e9345ce788ff7a5a898ba852fbfc6f7d Mon Sep 17 00:00:00 2001 From: jchodera Date: Wed, 27 Jan 2021 04:18:10 +0000 Subject: [PATCH 22/30] Accept _ in PostEra compound IDs --- fah_xchem/analysis/website/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fah_xchem/analysis/website/__init__.py b/fah_xchem/analysis/website/__init__.py index c14aac56..f7596bd9 100644 --- a/fah_xchem/analysis/website/__init__.py +++ b/fah_xchem/analysis/website/__init__.py @@ -61,7 +61,7 @@ def postera_url(compound_or_microstate_id: str) -> Optional[str]: import re match = re.match( - "^(?P[A-Z]{3}-[A-Z]{3}-[0-9a-f]{8}-[0-9]+)(_(?P[0-9]+))?$", + "^(?P[A-Z_]{3}-[A-Z_]{3}-[0-9a-f]{8}-[0-9]+)(_(?P[0-9]+))?$", compound_or_microstate_id, ) From d3e3a889ebd3e50e4fd1af2c22d6929cc7261f13 Mon Sep 17 00:00:00 2001 From: jchodera Date: Fri, 29 Jan 2021 18:47:20 +0000 Subject: [PATCH 23/30] Suppress nonpolar hydrogens by default --- fah_xchem/analysis/structures.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fah_xchem/analysis/structures.py b/fah_xchem/analysis/structures.py index e38671f4..bd930e3f 100644 --- a/fah_xchem/analysis/structures.py +++ b/fah_xchem/analysis/structures.py @@ -135,6 +135,7 @@ def extract_snapshot( frame: int, fragment_id: str, cache_dir: Optional[str], + suppress_hydrogens: Optional[bool] = True ): """ Extract the specified snapshot, align it to the reference fragment, and write protein and ligands to separate PDB files @@ -154,6 +155,8 @@ def extract_snapshot( Fragment ID (e.g. 'x10789') cache_dir : str or None If specified, cache relevant parts of "htf.npz" file in a local directory of this name + suppress_hydrogens : bool, optional, default=True + If True, suppress non-polar hydrogens Returns ------- @@ -184,7 +187,10 @@ def extract_snapshot( components = dict() for name in ["protein", "old_ligand", "new_ligand"]: components[name] = mdtraj_to_oemol(sliced_snapshot[name]) - + if suppress_hydrogens: + from openeye import oechem + oechem.OESuppressHydrogens(components[name], retainPolar=True) + return sliced_snapshot, components From db1474dfb4a282cfd9020b667145966453df0da8 Mon Sep 17 00:00:00 2001 From: jchodera Date: Sat, 30 Jan 2021 23:19:52 +0000 Subject: [PATCH 24/30] Bugfix for sorting on webpages --- fah_xchem/analysis/structures.py | 3 ++- fah_xchem/analysis/website/__init__.py | 33 ++++++++++++++------------ 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/fah_xchem/analysis/structures.py b/fah_xchem/analysis/structures.py index bd930e3f..55a53fec 100644 --- a/fah_xchem/analysis/structures.py +++ b/fah_xchem/analysis/structures.py @@ -189,7 +189,8 @@ def extract_snapshot( components[name] = mdtraj_to_oemol(sliced_snapshot[name]) if suppress_hydrogens: from openeye import oechem - oechem.OESuppressHydrogens(components[name], retainPolar=True) + retainPolar = True + oechem.OESuppressHydrogens(components[name], retainPolar) return sliced_snapshot, components diff --git a/fah_xchem/analysis/website/__init__.py b/fah_xchem/analysis/website/__init__.py index f7596bd9..7bff9f83 100644 --- a/fah_xchem/analysis/website/__init__.py +++ b/fah_xchem/analysis/website/__init__.py @@ -239,12 +239,14 @@ def write_html( num_top_compounds=num_top_compounds, ) - compound_free_energies = [ - (compound.free_energy.point, compound) - for compound in series.compounds - if compound.free_energy - ] - compounds_sorted = [p[1] for p in sorted(compound_free_energies)] + compounds_sorted = sorted( + [ + compound + for compound in series.compounds + if compound.free_energy + ], + key = lambda m : m.free_energy.point + ) _generate_paginated_index( write_html=lambda items, **kwargs: write_html( @@ -274,15 +276,16 @@ def write_html( ], ) - microstate_free_energies = [ - (microstate.free_energy.point, microstate) - for compound in series.compounds - for microstate in compound.microstates - if microstate.free_energy - ] - - microstates_sorted = [p[1] for p in sorted(microstate_free_energies)] - + microstates_sorted = sorted( + [ + microstate + for compound in series.compounds + for microstate in compound.microstates + if microstate.free_energy + ], + key = lambda m : m.free_energy.point + ) + _generate_paginated_index( write_html=lambda items, **kwargs: write_html( microstates=items, total_microstates=len(microstates_sorted), **kwargs From 1a4d290c9d807c0967712da14a0c19abd54c1178 Mon Sep 17 00:00:00 2001 From: jchodera Date: Sat, 30 Jan 2021 23:27:18 +0000 Subject: [PATCH 25/30] Fix regex when multiple microstate suffixes are in use --- fah_xchem/analysis/website/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fah_xchem/analysis/website/__init__.py b/fah_xchem/analysis/website/__init__.py index 7bff9f83..6209f4d8 100644 --- a/fah_xchem/analysis/website/__init__.py +++ b/fah_xchem/analysis/website/__init__.py @@ -61,7 +61,7 @@ def postera_url(compound_or_microstate_id: str) -> Optional[str]: import re match = re.match( - "^(?P[A-Z_]{3}-[A-Z_]{3}-[0-9a-f]{8}-[0-9]+)(_(?P[0-9]+))?$", + "^(?P[A-Z_]{3}-[A-Z_]{3}-[0-9a-f]{8}-[0-9]+)(_(?P[0-9]+))[_0-9]*?$", compound_or_microstate_id, ) From cfb2604e2dd73e1a45a01f8124a2b498f6eb51d9 Mon Sep 17 00:00:00 2001 From: jchodera Date: Sun, 31 Jan 2021 07:54:59 +0000 Subject: [PATCH 26/30] Some cleanup --- fah_xchem/analysis/__init__.py | 46 ++++++++++++- fah_xchem/analysis/plots.py | 25 +++++++- fah_xchem/analysis/report.py | 64 ++++++++++++------- fah_xchem/analysis/structures.py | 5 +- fah_xchem/analysis/website/__init__.py | 8 +-- .../analysis/website/templates/base.html | 2 +- .../index.html | 25 ++++---- fah_xchem/app.py | 1 + fah_xchem/schema.py | 17 ++++- 9 files changed, 147 insertions(+), 46 deletions(-) rename fah_xchem/analysis/website/templates/{retrospective_analysis => retrospective_transformations}/index.html (73%) diff --git a/fah_xchem/analysis/__init__.py b/fah_xchem/analysis/__init__.py index 041a5e04..f22ca45d 100644 --- a/fah_xchem/analysis/__init__.py +++ b/fah_xchem/analysis/__init__.py @@ -23,6 +23,7 @@ TransformationAnalysis, WorkPair, FragalysisConfig, + RunStatus ) from .constants import KT_KCALMOL from .diffnet import combine_free_energies, pIC50_to_DG @@ -219,6 +220,8 @@ def calc_exp_ddg(transformation: TransformationAnalysis, compounds: CompoundSeri NOTE: This method makes the approximation that each microstate has the same affinity as the parent compound. + TODO: Instead, solve DiffNet without experimental data and use derived DDGs between compounds (not transformations). + Parameters ---------- transformation : TransformationAnalysis @@ -270,22 +273,31 @@ def analyze_transformation_or_warn( logging.warning("Failed to analyze RUN%d: %s", transformation.run_id, exc) return None - def analyze_compound_series( series: CompoundSeries, config: AnalysisConfig, server: FahConfig, num_procs: Optional[int] = None, ) -> CompoundSeriesAnalysis: + """ + Analyze a compound series to generate JSON. + """ from rich.progress import track - # Pre-filter based on which transformations have any data + # TODO: Cache results and only update RUNs for which we have received new data + + # Pre-filter based on which transformations have any work data + logging.info(f'Pre-filtering {len(series.transformations)} transformations to identify those with work data...') available_transformations = [ transformation for transformation in series.transformations if len(list_results(config=server, run=transformation.run_id, project=series.metadata.fah_projects.complex_phase)) > 0 and len(list_results(config=server, run=transformation.run_id, project=series.metadata.fah_projects.solvent_phase)) > 0 ] + #available_transformations = series.transformations[:50] + + # Process compound series in parallel + logging.info(f'Processing {len(available_transformations)} / {len(series.transformations)} available transformations in parallel...') with multiprocessing.Pool(num_procs) as pool: results_iter = pool.imap_unordered( partial( @@ -307,6 +319,36 @@ def analyze_compound_series( if result is not None ] + # Reprocess transformation experimental errors to only include most favorable transformation + # NOTE: This is a hack, and should be replaced by a more robust method for accounting for racemic mixtures + # Compile list of all microstate transformations for each compound + compound_ddgs = dict() + for transformation in transformations: + compound_id = transformation.transformation.final_microstate.compound_id + if compound_id in compound_ddgs: + compound_ddgs[compound_id].append(transformation.binding_free_energy.point) + else: + compound_ddgs[compound_id] = [transformation.binding_free_energy.point] + # Collapse to a single estimate + from scipy.special import logsumexp + for compound_id, ddgs in compound_ddgs.items(): + compound_ddgs[compound_id] = -logsumexp(-np.array(ddgs)) + np.log(len(ddgs)) + # Regenerate list of transformations + for index, t in enumerate(transformations): + if (t.exp_ddg is None) or (t.exp_ddg.point is None): + continue + compound_id = t.transformation.final_microstate.compound_id + absolute_error_point = abs(t.exp_ddg.point - compound_ddgs[compound_id]) + transformations[index] = TransformationAnalysis( + transformation=t.transformation, + reliable_transformation=t.reliable_transformation, + binding_free_energy=t.binding_free_energy, + complex_phase=t.complex_phase, + solvent_phase=t.solvent_phase, + exp_ddg=t.exp_ddg, + absolute_error=PointEstimate(point=absolute_error_point, stderr=t.absolute_error.stderr), + ) + # Sort transformations by RUN # transformations.sort(key=lambda transformation_analysis : transformation_analysis.transformation.run_id) # Sort transformations by free energy difference diff --git a/fah_xchem/analysis/plots.py b/fah_xchem/analysis/plots.py index 6b0bca90..c73a445a 100644 --- a/fah_xchem/analysis/plots.py +++ b/fah_xchem/analysis/plots.py @@ -702,6 +702,8 @@ def generate_plots( """ from rich.progress import track + # TODO: Cache results and only update RUNs for which we have received new data + binding_delta_fs = [ transformation.binding_free_energy.point for transformation in series.transformations @@ -746,10 +748,27 @@ def generate_plots( ): pass + # # Retrospective plots - + # + # NOTE this is handled by Arsenic # this needs to be plotted last as the figure isn't cleared by default in Arsenic # TODO generate time stamp - plot_retrospective(output_dir=output_dir, transformations=series.transformations, filename='retrospective') - plot_retrospective(output_dir=output_dir, transformations=[transformation for transformation in series.transformations if transformation.reliable_transformation], filename='retrospective-reliable') + + # All transformations + plot_retrospective(output_dir=output_dir, transformations=series.transformations, filename='retrospective-transformations-all') + + # Reliable subset of transformations + plot_retrospective(output_dir=output_dir, transformations=[transformation for transformation in series.transformations if transformation.reliable_transformation], filename='retrospective-transformations-reliable') + + # Transformations not involving racemates + # TODO: Find a simpler way to filter non-racemates + nmicrostates = { compound.metadata.compound_id : len(compound.microstates) for compound in series.compounds } + def is_racemate(microstate): + return True if (nmicrostates[microstate.compound_id] > 1) else False + plot_retrospective( + output_dir=output_dir, + transformations=[transformation for transformation in series.transformations if (not is_racemate(transformation.transformation.initial_microstate) and not is_racemate(transformation.transformation.final_microstate))], + filename='retrospective-transformations-noracemates' + ) diff --git a/fah_xchem/analysis/report.py b/fah_xchem/analysis/report.py index 95498094..aad1f519 100755 --- a/fah_xchem/analysis/report.py +++ b/fah_xchem/analysis/report.py @@ -603,6 +603,27 @@ def generate_report( from openeye import oechem +import os +from contextlib import contextmanager +@contextmanager +def working_directory(path): + """ + A context manager which changes the working directory to the given + path, and then changes it back to its previous value on exit. + Usage: + > # Do something in original directory + > with working_directory('/my/new/path'): + > # Do something in new directory + > # Back to old directory + """ + + prev_cwd = os.getcwd() + os.chdir(path) + try: + yield + finally: + os.chdir(prev_cwd) + def consolidate_protein_snapshots_into_pdb( oemols: List[oechem.OEMol], @@ -658,29 +679,26 @@ def consolidate_protein_snapshots_into_pdb( # produce multiple PDB files and zip for fragalysis upload if fragalysis_input: - base_pdb_filename = os.path.basename(pdb_filename).split(".")[0] - n_proteins = 1 - n_atoms = proteins[0].topology.n_atoms - n_dim = 3 - for index, protein in enumerate(proteins): - xyz = np.zeros([n_proteins, n_atoms, n_dim], np.float32) - xyz[0, :, :] = protein.xyz[0, :, :] - trajectory = md.Trajectory(xyz, protein.topology) - trajectory.save( - os.path.join(fragalysis_path, f"{base_pdb_filename}_{index}.pdb") - ) - - from zipfile import ZipFile - - with ZipFile(os.path.join(fragalysis_path, "references.zip"), "w") as zipobj: - for _, _, filenames in os.walk(fragalysis_path): - for pdb_file in track( - filenames, description="Zipping protein snapshots for Fragalysis..." - ): - if pdb_file.endswith(".pdb"): - zipobj.write(os.path.join(fragalysis_path, pdb_file)) - - zipobj.close() + with working_directory(fragalysis_path): + base_pdb_filename = os.path.basename(pdb_filename).split(".")[0] + n_proteins = 1 + n_atoms = proteins[0].topology.n_atoms + n_dim = 3 + for index, protein in enumerate(proteins): + xyz = np.zeros([n_proteins, n_atoms, n_dim], np.float32) + xyz[0, :, :] = protein.xyz[0, :, :] + trajectory = md.Trajectory(xyz, protein.topology) + trajectory.save(f"{base_pdb_filename}_{index}.pdb") + + from zipfile import ZipFile + with ZipFile(os.path.join(fragalysis_path, "references.zip"), "w") as zipobj: + from glob import glob + pdb_files = glob('*.pdb') + for pdb_file in track(pdb_files, description="Zipping protein snapshots for Fragalysis..."): + zipobj.write(pdb_file) + + # TODO: Is this necessary? + zipobj.close() # produce one PDB file with multiple frames else: diff --git a/fah_xchem/analysis/structures.py b/fah_xchem/analysis/structures.py index 55a53fec..4bdb74ab 100644 --- a/fah_xchem/analysis/structures.py +++ b/fah_xchem/analysis/structures.py @@ -174,7 +174,8 @@ def extract_snapshot( # Align the trajectory to the fragment (in place) #trajectory.image_molecules(inplace=True) # No need to image molecules anymore now that perses adds zero-energy bonds between protein and ligand! - trajectory.superpose(fragment, atom_indices=fragment.top.select("name CA")) + #trajectory.superpose(fragment, atom_indices=fragment.top.select("name CA")) + trajectory.superpose(fragment, atom_indices=fragment.top.select("(name CA) and (residue 145 or residue 41 or residue 164 or residue 165 or residue 142 or residue 163)")) # DEBUG : Mpro active site only # Extract the snapshot snapshot = trajectory[frame] @@ -338,6 +339,8 @@ def generate_representative_snapshot( None """ + # TODO: Cache results and only update RUNs for which we have received new data + if ( max_binding_free_energy is not None and transformation.binding_free_energy.point > max_binding_free_energy diff --git a/fah_xchem/analysis/website/__init__.py b/fah_xchem/analysis/website/__init__.py index 6209f4d8..f2ecaeeb 100644 --- a/fah_xchem/analysis/website/__init__.py +++ b/fah_xchem/analysis/website/__init__.py @@ -61,7 +61,7 @@ def postera_url(compound_or_microstate_id: str) -> Optional[str]: import re match = re.match( - "^(?P[A-Z_]{3}-[A-Z_]{3}-[0-9a-f]{8}-[0-9]+)(_(?P[0-9]+))[_0-9]*?$", + "^(?P[A-Z_]{3}-[A-Z_]{3}-[0-9a-f]{8}-[0-9]+)(_(?P[0-9]+))?([_0-9]*)$", compound_or_microstate_id, ) @@ -203,7 +203,7 @@ def generate_website( environment.filters["experimental_data_url"] = experimental_data_url environment.filters["smiles_to_filename"] = get_image_filename - for subdir in ["compounds", "microstates", "transformations", "reliable_transformations", "retrospective_analysis"]: + for subdir in ["compounds", "microstates", "transformations", "reliable_transformations", "retrospective_transformations"]: os.makedirs(os.path.join(path, subdir), exist_ok=True) def write_html( @@ -314,11 +314,11 @@ def write_html( _generate_paginated_index( write_html=lambda items, **kwargs: write_html(transformations=items, **kwargs), - url_prefix="retrospective_analysis", + url_prefix="retrospective_transformations", items=sorted( [transformation for transformation in series.transformations if (transformation.absolute_error is not None)], key = lambda transformation : -transformation.absolute_error.point ), items_per_page=items_per_page, - description="Generating html for retrospective analysis index", + description="Generating html for retrospective transformations index", ) diff --git a/fah_xchem/analysis/website/templates/base.html b/fah_xchem/analysis/website/templates/base.html index 265a8206..b6eb6cb2 100644 --- a/fah_xchem/analysis/website/templates/base.html +++ b/fah_xchem/analysis/website/templates/base.html @@ -63,7 +63,7 @@ ("microstates/index.html", "microstates", "Microstates"), ("transformations/index.html", "transformations", "Transformations"), ("reliable_transformations/index.html", "reliable_transformations", "Reliable transformations"), - ("retrospective_analysis/index.html", "retrospective_analysis", "Retrospective analysis") + ("retrospective_transformations/index.html", "retrospective_transformations", "Retrospective transformations") ] -%} {% set active_page = active_page | default("index") -%} diff --git a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html b/fah_xchem/analysis/website/templates/retrospective_transformations/index.html similarity index 73% rename from fah_xchem/analysis/website/templates/retrospective_analysis/index.html rename to fah_xchem/analysis/website/templates/retrospective_transformations/index.html index 918e1834..30ae7217 100644 --- a/fah_xchem/analysis/website/templates/retrospective_analysis/index.html +++ b/fah_xchem/analysis/website/templates/retrospective_transformations/index.html @@ -1,13 +1,16 @@ {% extends "base.html" %} {% import "postera.html" as postera %} -{% set active_page = "retrospective_analysis" %} +{% set active_page = "retrospective_transformations" %} {% block content %} -

Retrospective Analysis

- - retrospective plot +

Retrospective Transformations

+
+ retrospective transformations plot - all transformations - - reliable transfromations retrospective plot + + retrospective transformations plot - reliable transformations + + + retrospective transformations plot - no racemates
Showing {{ start_index }} through {{ end_index }} of {{ series.transformations | length }} @@ -21,9 +24,9 @@

Retrospective Analysis RUN Initial microstate Final microstate - ΔΔG / kcal M-1 - ΔΔGexp / kcal M-1 - |ΔΔG-ΔΔGexp| / kcal M-1 + ΔΔG / kcal M-1 + ΔΔGexp / kcal M-1 + |ΔΔG-ΔΔGexp| / kcal M-1 Work distribution Convergence @@ -31,7 +34,7 @@

Retrospective Analysis RUN{{ transformation.transformation.run_id }} - {{ transformation.transformation.initial_microstate.compound_id | format_compound_id }}{{ postera.maybe_link(transformation.transformation.initial_microstate.compound_id) }} + {{ transformation.transformation.initial_microstate.microstate_id | format_compound_id }}{{ postera.maybe_link(transformation.transformation.initial_microstate.compound_id) }} molecule @@ -47,7 +50,7 @@

Retrospective Analysis pdb - {{ transformation.transformation.final_microstate.compound_id | format_compound_id }}{{ postera.maybe_link(transformation.transformation.final_microstate.compound_id) }} + {{ transformation.transformation.final_microstate.microstate_id | format_compound_id }}{{ postera.maybe_link(transformation.transformation.final_microstate.compound_id) }} molecule diff --git a/fah_xchem/app.py b/fah_xchem/app.py index fcc79b40..1b7442c5 100644 --- a/fah_xchem/app.py +++ b/fah_xchem/app.py @@ -91,6 +91,7 @@ def run_analysis( transformations=compound_series.transformations[:max_transformations] ) + # TODO: Remove this? if use_only_reference_compound_data: # Strip experimental data frorm all but reference compound logging.warning(f'Stripping experimental data from all but reference compound') diff --git a/fah_xchem/schema.py b/fah_xchem/schema.py index 826165c9..783aea91 100644 --- a/fah_xchem/schema.py +++ b/fah_xchem/schema.py @@ -174,7 +174,7 @@ class TransformationAnalysis(Model): transformation: Transformation reliable_transformation: bool = Field(None, description="Specify if the transformation is reliable or not") # JSON boolean binding_free_energy: PointEstimate - exp_ddg: PointEstimate # TODO: Make optional, with None as deafault? + exp_ddg: PointEstimate # TODO: Make optional, with None as default? absolute_error: Optional[PointEstimate] = None complex_phase: PhaseAnalysis solvent_phase: PhaseAnalysis @@ -222,3 +222,18 @@ class FragalysisConfig(Model): method: str = None upload_key: str = None new_upload: bool = Field(False) + +class RunStatus(Model): + run_id: int = Field( + None, + description="The RUN number corresponding to the Folding@Home directory structure", + ) + complex_phase_work_units: int = Field( + 0, + description="The number of completed complex phase work units", + ) + solvent_phase_work_units: int = Field( + 0, + description="The number of completed solvent phase work units", + ) + has_changed: bool = True From c30b862ba28ad06c6ca06f699434724d6391e5e3 Mon Sep 17 00:00:00 2001 From: jchodera Date: Sat, 6 Feb 2021 18:15:24 +0000 Subject: [PATCH 27/30] Compress ZIP files --- fah_xchem/analysis/report.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fah_xchem/analysis/report.py b/fah_xchem/analysis/report.py index aad1f519..be69d0a1 100755 --- a/fah_xchem/analysis/report.py +++ b/fah_xchem/analysis/report.py @@ -690,8 +690,8 @@ def consolidate_protein_snapshots_into_pdb( trajectory = md.Trajectory(xyz, protein.topology) trajectory.save(f"{base_pdb_filename}_{index}.pdb") - from zipfile import ZipFile - with ZipFile(os.path.join(fragalysis_path, "references.zip"), "w") as zipobj: + from zipfile import ZipFile, ZIP_BZIP2 + with ZipFile(os.path.join(fragalysis_path, "references.zip"), mode="w", compression=ZIP_BZIP2, compresslevel=9) as zipobj: from glob import glob pdb_files = glob('*.pdb') for pdb_file in track(pdb_files, description="Zipping protein snapshots for Fragalysis..."): From d44a60c8dde707f08992f1dcc6596c9e47627516 Mon Sep 17 00:00:00 2001 From: jchodera Date: Sat, 6 Feb 2021 23:26:40 +0000 Subject: [PATCH 28/30] Correctly strip hydrogens from PDB files --- fah_xchem/analysis/structures.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/fah_xchem/analysis/structures.py b/fah_xchem/analysis/structures.py index 4bdb74ab..c7b4f8f5 100644 --- a/fah_xchem/analysis/structures.py +++ b/fah_xchem/analysis/structures.py @@ -135,7 +135,6 @@ def extract_snapshot( frame: int, fragment_id: str, cache_dir: Optional[str], - suppress_hydrogens: Optional[bool] = True ): """ Extract the specified snapshot, align it to the reference fragment, and write protein and ligands to separate PDB files @@ -155,8 +154,6 @@ def extract_snapshot( Fragment ID (e.g. 'x10789') cache_dir : str or None If specified, cache relevant parts of "htf.npz" file in a local directory of this name - suppress_hydrogens : bool, optional, default=True - If True, suppress non-polar hydrogens Returns ------- @@ -188,10 +185,6 @@ def extract_snapshot( components = dict() for name in ["protein", "old_ligand", "new_ligand"]: components[name] = mdtraj_to_oemol(sliced_snapshot[name]) - if suppress_hydrogens: - from openeye import oechem - retainPolar = True - oechem.OESuppressHydrogens(components[name], retainPolar) return sliced_snapshot, components @@ -215,8 +208,9 @@ def get_stored_atom_indices(project_dir: str, run: int): } # Get all atom indices from the hybrid system - protein_atom_indices = htf.hybrid_topology.select("protein") - hybrid_ligand_atom_indices = htf.hybrid_topology.select("resn MOL") + # Omit hydrogens + protein_atom_indices = htf.hybrid_topology.select("protein and (mass > 1.1)") + hybrid_ligand_atom_indices = htf.hybrid_topology.select("resn MOL and (mass > 1.1)") # Identify atom index subsets for the old and new ligands from the hybrid system old_ligand_atom_indices = [ From 18f6ba82a959a2bc21ed8c4298b65602e16830bf Mon Sep 17 00:00:00 2001 From: David Dotson Date: Mon, 14 Jun 2021 18:20:18 -0700 Subject: [PATCH 29/30] Small fixes and notes --- fah_xchem/analysis/structures.py | 11 ++++++++--- fah_xchem/schema.py | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fah_xchem/analysis/structures.py b/fah_xchem/analysis/structures.py index c7b4f8f5..af297712 100644 --- a/fah_xchem/analysis/structures.py +++ b/fah_xchem/analysis/structures.py @@ -172,7 +172,10 @@ def extract_snapshot( # Align the trajectory to the fragment (in place) #trajectory.image_molecules(inplace=True) # No need to image molecules anymore now that perses adds zero-energy bonds between protein and ligand! #trajectory.superpose(fragment, atom_indices=fragment.top.select("name CA")) - trajectory.superpose(fragment, atom_indices=fragment.top.select("(name CA) and (residue 145 or residue 41 or residue 164 or residue 165 or residue 142 or residue 163)")) # DEBUG : Mpro active site only + + # TODO: fix this hardcode for *MPro*! + trajectory.superpose(fragment, + atom_indices=fragment.top.select("(name CA) and (residue 145 or residue 41 or residue 164 or residue 165 or residue 142 or residue 163)")) # DEBUG : Mpro active site only # Extract the snapshot snapshot = trajectory[frame] @@ -361,14 +364,16 @@ def generate_representative_snapshot( run_id = transformation.transformation.run_id + gen_analysis, workpair = gen_work + # Extract representative snapshot try: sliced_snapshots, components = extract_snapshot( project_dir=project_dir, project_data_dir=project_data_dir, run=run_id, - clone=gen_work[1].clone, # TODO: Check index - gen=gen_work[0].gen, # TODO: Check index + clone=workpair.clone, + gen=gen_analysis.gen, frame=frame, fragment_id=transformation.transformation.xchem_fragment_id, cache_dir=cache_dir, diff --git a/fah_xchem/schema.py b/fah_xchem/schema.py index 783aea91..485a2c35 100644 --- a/fah_xchem/schema.py +++ b/fah_xchem/schema.py @@ -223,6 +223,7 @@ class FragalysisConfig(Model): upload_key: str = None new_upload: bool = Field(False) + class RunStatus(Model): run_id: int = Field( None, From cea69445de190ed8e8be4063c1887fc16736a636 Mon Sep 17 00:00:00 2001 From: David Dotson Date: Tue, 15 Jun 2021 14:52:43 -0700 Subject: [PATCH 30/30] Modifications in light of feedback from @glass-w, @jchodera --- fah_xchem/analysis/__init__.py | 79 ++------------------------ fah_xchem/analysis/website/__init__.py | 1 - 2 files changed, 6 insertions(+), 74 deletions(-) diff --git a/fah_xchem/analysis/__init__.py b/fah_xchem/analysis/__init__.py index f22ca45d..09cade30 100644 --- a/fah_xchem/analysis/__init__.py +++ b/fah_xchem/analysis/__init__.py @@ -36,6 +36,9 @@ from .website import generate_website +EXP_DDG_IJ_ERR = 0.2 # TODO check this is correct + + def analyze_phase(server: FahConfig, run: int, project: int, config: AnalysisConfig): paths = list_results(config=server, run=run, project=project) @@ -144,76 +147,6 @@ def analyze_transformation( ) -def calc_exp_ddg_DEPRECATED( - transformation: TransformationAnalysis, compounds: CompoundSeries -): - """ - Compute experimental free energy difference between two compounds, if available. - - Parameters - ---------- - transformation : TransformationAnalysis - The transformation of interest - compounds : CompoundSeries - Data for the compound series. - - """ - graph = nx.DiGraph() - - # make a simple two node graph - # NOTE there may be a faster way of doing this - graph.add_edge( - transformation.initial_microstate, - transformation.final_microstate, - ) - - for compound in compounds: - for microstate in compound.microstates: - node = CompoundMicrostate( - compound_id=compound.metadata.compound_id, - microstate_id=microstate.microstate_id, - ) - if node in graph: - graph.nodes[node]["compound"] = compound - graph.nodes[node]["microstate"] = microstate - - for node_1, node_2, edge in graph.edges(data=True): - # if both nodes contain exp pIC50 calculate the free energy difference between them - # NOTE assume star map (node 1 is our reference) - try: - node_1_pic50 = graph.nodes[node_1]["compound"].metadata.experimental_data[ - "pIC50" - ] # ref molecule - node_2_pic50 = graph.nodes[node_2]["compound"].metadata.experimental_data[ - "pIC50" - ] # new molecule - - n_microstates_node_1 = len(graph.nodes[node_1]["compound"].microstates) - n_microstates_node_2 = len(graph.nodes[node_2]["compound"].microstates) - - # Get experimental DeltaDeltaG by subtracting from experimental inspiration fragment (ref) - - node_1_DG = ( - pIC50_to_DG(node_1_pic50) - + (0.6 * np.log(n_microstates_node_1)) / KT_KCALMOL - ) # TODO check this is correct - node_2_DG = ( - pIC50_to_DG(node_2_pic50) - + (0.6 * np.log(n_microstates_node_2)) / KT_KCALMOL - ) # TODO check this is correct - - exp_ddg_ij = node_1_DG - node_2_DG - - exp_ddg_ij_err = 0.1 # TODO check this is correct - - except KeyError: - logging.info("Failed to get experimental pIC50 value") - exp_ddg_ij = None - exp_ddg_ij_err = None - - return PointEstimate(point=exp_ddg_ij, stderr=exp_ddg_ij_err) - - def calc_exp_ddg(transformation: TransformationAnalysis, compounds: CompoundSeries): """ Compute experimental free energy difference between two compounds, if available. @@ -250,12 +183,11 @@ def calc_exp_ddg(transformation: TransformationAnalysis, compounds: CompoundSeri ].metadata.experimental_data if ("pIC50" in initial_experimental_data) and ("pIC50" in final_experimental_data): - exp_ddg_ij_err = 0.2 # TODO check this is correct initial_dg = PointEstimate( - point=pIC50_to_DG(initial_experimental_data["pIC50"]), stderr=exp_ddg_ij_err + point=pIC50_to_DG(initial_experimental_data["pIC50"]), stderr=EXP_DDG_IJ_ERR ) final_dg = PointEstimate( - point=pIC50_to_DG(final_experimental_data["pIC50"]), stderr=exp_ddg_ij_err + point=pIC50_to_DG(final_experimental_data["pIC50"]), stderr=EXP_DDG_IJ_ERR ) error = final_dg - initial_dg return error @@ -273,6 +205,7 @@ def analyze_transformation_or_warn( logging.warning("Failed to analyze RUN%d: %s", transformation.run_id, exc) return None + def analyze_compound_series( series: CompoundSeries, config: AnalysisConfig, diff --git a/fah_xchem/analysis/website/__init__.py b/fah_xchem/analysis/website/__init__.py index f2ecaeeb..ae69476c 100644 --- a/fah_xchem/analysis/website/__init__.py +++ b/fah_xchem/analysis/website/__init__.py @@ -194,7 +194,6 @@ def generate_website( template_path = os.path.join(os.path.dirname(__file__), "templates") template_loader = jinja2.FileSystemLoader(searchpath=template_path) environment = jinja2.Environment(loader=template_loader) - environment.filters["np"] = np # numpy environment.filters["format_point"] = format_point environment.filters["format_stderr"] = format_stderr environment.filters["format_compound_id"] = format_compound_id