haddocking · rvhonorato · Oct 16, 2024 · Apr 16, 2024 · Apr 16, 2024 · Apr 16, 2024
diff --git a/devtools/build_defaults_rst.py b/devtools/build_defaults_rst.py
@@ -37,6 +37,7 @@
     "alascan": "Alanine Scanning module",
     "ilrmsdmatrix": "Interface Ligand RMSD Matrix calculation module",
     "exit": "Exit module",
+    "sasascore": "Surface Accessibility Scoring module",
 }
 
 CATEGORY_TITLE_DICT = {

diff --git a/examples/scoring/sasascore-test.cfg b/examples/scoring/sasascore-test.cfg
@@ -0,0 +1,29 @@
+# ====================================================================
+# Sasascore scoring example
+
+# directory in which the scoring will be done
+run_dir = "run1-sasascore-test"
+
+# execution mode
+ncores = 5
+mode = "local"
+
+# ensemble of different complexes to be scored
+molecules = "data/T161-rescoring-ens.pdb"
+
+# ====================================================================
+# Parameters for each stage are defined below
+
+[topoaa]
+
+# emscoring round to calculate the scores and declash the structures
+[emscoring]
+
+[sasascore]
+resdic_accessible_A = [1,2,3,4,5,6,7,8,9,10]
+resdic_buried_A = [29,30,31,32,33,34,35]
+resdic_accessible_B = [1,2,3,4,5,6,7,8,9,10]
+
+[caprieval]
+
+# ====================================================================
diff --git a/integration_tests/test_sasascore.py b/integration_tests/test_sasascore.py
@@ -0,0 +1,90 @@
+import tempfile
+from pathlib import Path
+
+import pytest
+import shutil
+import pandas as pd
+import numpy as np
+
+from haddock.modules.scoring.sasascore import DEFAULT_CONFIG as DEFAULT_SASASCORE_CONFIG
+from haddock.modules.scoring.sasascore import HaddockModule as SasascoreModule
+from haddock.libs.libontology import PDBFile
+from . import CNS_EXEC, DATA_DIR, has_cns
+from tests import golden_data
+
+@pytest.fixture
+def sasascore_module():
+    """Return a default sasascore module."""
+    with tempfile.TemporaryDirectory(dir=".") as tmpdir:
+        sasascore = SasascoreModule(
+            order=0, path=tmpdir, initial_params=DEFAULT_SASASCORE_CONFIG
+        )
+        # let's assume we know Val 39 should be buried upon complex formation 
+        sasascore.params["resdic_buried_A"] = [39]
+        # let's assume we know GLU 43 should remain accessible
+        sasascore.params["resdic_accessible_A"] = [43]
+        yield sasascore
+
+class MockPreviousIO():
+    def __init__(self, path):
+        self.path = path
+
+    def retrieve_models(self, individualize: bool = False):
+        shutil.copy(Path(golden_data, "protprot_complex_1.pdb"), Path(".", "protprot_complex_1.pdb"))
+        shutil.copy(Path(golden_data, "protprot_complex_2.pdb"), Path(".", "protprot_complex_2.pdb"))
+        model_list = [
+            PDBFile(file_name="protprot_complex_1.pdb", path="."),
+            PDBFile(file_name="protprot_complex_2.pdb", path="."),
+        ]
+
+        return model_list
+
+    def output(self):
+        return None
+
+
+def test_sasascore_default(sasascore_module, mocker):
+    """Test the sasascore module."""
+    sasascore_module.previous_io = MockPreviousIO(path=sasascore_module.path)
+    sasascore_module.run()
+
+    expected_sasascore_csv = Path(sasascore_module.path, "sasascore.tsv")
+    expected_violations_csv = Path(sasascore_module.path, "violations.tsv")
+    assert expected_sasascore_csv.exists(), f"{expected_sasascore_csv} does not exist"
+    assert expected_violations_csv.exists(), f"{expected_violations_csv} does not exist"
+
+    # check sasascore.tsv
+    exp_shape = (2, 4)
+    df = pd.read_csv(expected_sasascore_csv, sep="\t", comment="#")
+    assert df.shape == exp_shape, f"{expected_sasascore_csv} has wrong shape ({df.shape} instead of {exp_shape})"
+    assert df.loc[df["structure"] == "protprot_complex_2.pdb"].iloc[0,:]["score"] == 0
+    assert df.loc[df["structure"] == "protprot_complex_1.pdb"].iloc[0,:]["score"] == 1
+
+    # check violations.tsv
+    exp_shape = (2, 3)
+    df = pd.read_csv(expected_violations_csv, sep="\t", comment="#")
+    assert df.shape == exp_shape, f"{expected_violations_csv} has wrong shape ({df.shape} instead of {exp_shape})"
+    assert df.loc[df["structure"] == "protprot_complex_1.pdb"].iloc[0,:]["bur_A"] == "39"
+
+
+def test_sasascore_no_residues(sasascore_module, mocker):
+    "Test the sasascore module when a non-existing chain is added."
+    sasascore_module.previous_io = MockPreviousIO(path=sasascore_module.path)
+    # adding a non existing chain
+    sasascore_module.params["resdic_buried_C"] = [1]
+    sasascore_module.run()
+    expected_sasascore_csv = Path(sasascore_module.path, "sasascore.tsv")
+    exp_shape = (2, 4)
+    df = pd.read_csv(expected_sasascore_csv, sep="\t", comment="#")
+    assert df.shape == exp_shape, f"{expected_sasascore_csv} has wrong shape ({df.shape} instead of {exp_shape})"
+    assert df.loc[df["structure"] == "protprot_complex_2.pdb"].iloc[0,:]["score"] == 0
+    assert df.loc[df["structure"] == "protprot_complex_1.pdb"].iloc[0,:]["score"] == 1
+    # now the violations df
+    expected_violations_csv = Path(sasascore_module.path, "violations.tsv")
+    exp_shape = (2, 4)
+    df = pd.read_csv(expected_violations_csv, sep="\t", comment="#")
+    assert df.shape == exp_shape, f"{expected_violations_csv} has wrong shape ({df.shape} instead of {exp_shape})"
+    assert df.columns.tolist() == ["structure", "bur_A", "bur_C", "acc_A"]
+    assert df.loc[df["structure"] == "protprot_complex_1.pdb"].iloc[0,:]["bur_A"] == "39"
+    assert df.loc[df["structure"] == "protprot_complex_1.pdb"].iloc[0,:]["bur_C"] == "None"
+
diff --git a/src/haddock/clis/restraints/calc_accessibility.py b/src/haddock/clis/restraints/calc_accessibility.py
@@ -11,6 +11,7 @@
         [-c <cutoff>]               Relative side-chain accessibility cutoff
         [--log_level <log_level>]   DEBUG, INFO, WARNING, ERROR, or CRITICAL
         [--export_to_actpass]       Flag to export accessible resiudes
+        [--probe_radius <probe_radius>]   Probe radius for the accessibility calculation
 """
 
 
@@ -56,6 +57,14 @@ def add_calc_accessibility_arguments(calc_accessibility_subcommand):
         help="Export the exposed residues as passive to an actpass file",
         required=False,
         )
+
+    calc_accessibility_subcommand.add_argument(
+        "--probe_radius",
+        default=1.4,
+        type=float,
+        help="Probe radius for the accessibility calculation",
+        required=False,
+        )
 
     return calc_accessibility_subcommand
 
@@ -214,10 +223,12 @@ def add_calc_accessibility_arguments(calc_accessibility_subcommand):
             'PNS': 78.11,
             }
     }
+DEFAULT_PROBE_RADIUS = 1.4
 
 
 def get_accessibility(
-        pdb_f: Union[Path, str]
+        pdb_f: Union[Path, str],
+        probe_radius: float = DEFAULT_PROBE_RADIUS,
         ) -> dict[str, dict[int, dict[str, float]]]:
     """Compute per-residue accessibility values.
 
@@ -237,7 +248,7 @@ def get_accessibility(
     naccess_unsupported_aa = ['HEC', 'TIP', 'ACE', 'THP', 'HEB', 'CTN']
     logging.info("Calculate accessibility...")
     try:
-        from freesasa import Classifier, calc
+        from freesasa import Classifier, calc, Parameters
     except ImportError as err:
         logging.error("calc_accessibility requires the 'freesasa' Python API")
         raise ImportError(err)
@@ -265,7 +276,21 @@ def get_accessibility(
 
     struct = Structure(pdb_f, classifier, options={})
     struct.setRadiiWithClassifier(classifier)
-    result = calc(struct)
+    # if the probe_radius is different from the default value
+    # we need to redefine the parameters
+    if probe_radius != DEFAULT_PROBE_RADIUS:
+        new_parameters = Parameters(
+            {
+                'algorithm': 'LeeRichards',
+                'probe-radius': probe_radius,
+                'n-points': Parameters.defaultParameters['n-points'],
+                'n-slices': Parameters.defaultParameters['n-slices'],
+                'n-threads': Parameters.defaultParameters['n-threads'],
+                }
+            )
+        result = calc(struct, parameters=new_parameters)
+    else:
+        result = calc(struct)
 
     # iterate over all atoms to get SASA and residue name
     for idx in range(struct.nAtoms()):
@@ -382,6 +407,7 @@ def calc_accessibility(
         cutoff: float = 0.4,
         log_level: str = "INFO",
         export_to_actpass: bool = False,
+        probe_radius: float = DEFAULT_PROBE_RADIUS,
         ) -> None:
     """Calculate the accessibility of the side chains and apply a cutoff.
 
@@ -395,14 +421,16 @@ def calc_accessibility(
         Logging level.
     export_to_actpass : bool
         Export the exposed residues as passive to an actpass file.
+    probe_radius : float
+        Probe radius for the accessibility calculation.
     """
     logging.basicConfig(
         level=log_level,
         format='%(asctime)s L%(lineno)d %(levelname)s - %(message)s',
         datefmt='%d/%m/%Y %H:%M:%S',
         )
     # Compute per-residues accessibilities
-    access_dic = get_accessibility(input_pdb_file)
+    access_dic = get_accessibility(input_pdb_file, probe_radius)
     # Filter residues based on accessibility cutoff
     result_dict = apply_cutoff(access_dic, cutoff)
 

diff --git a/src/haddock/modules/analysis/__init__.py b/src/haddock/modules/analysis/__init__.py
@@ -3,7 +3,7 @@
 from typing import Iterable
 
 
-modules_using_resdic = ("caprieval", "rmsdmatrix", "alascan")
+modules_using_resdic = ("caprieval", "rmsdmatrix", "alascan", "sasascore")
 
 
 def confirm_resdic_chainid_length(params: Iterable[str]) -> None:

diff --git a/src/haddock/modules/scoring/sasascore/__init__.py b/src/haddock/modules/scoring/sasascore/__init__.py
@@ -0,0 +1,143 @@
+"""
+Surface accessibility scoring module.
+
+This module performs a solvent accessibility analysis based on 
+some user-defined residues that should be buried or accessible.
+
+If a supposedly buried (resp. accessible) residue is accessible (resp. buried),
+the score should increase by one. The lower the final score the more consistent
+the model with the user data.
+"""
+from pathlib import Path
+
+from haddock.core.typing import FilePath
+from haddock.modules import get_engine
+from haddock.modules import BaseHaddockModule
+from haddock.libs.libutil import parse_ncores
+from haddock.libs.libparallel import get_index_list
+from haddock.modules.scoring.sasascore.sasascore import (
+    AccScore,
+    AccScoreJob,
+    prettify_df,
+    )
+from haddock.modules.analysis import (
+    get_analysis_exec_mode,
+    )
+
+RECIPE_PATH = Path(__file__).resolve().parent
+DEFAULT_CONFIG = Path(RECIPE_PATH, "defaults.yaml")
+
+
+class HaddockModule(BaseHaddockModule):
+    """HADDOCK3 module to perform accessibility scoring."""
+
+    name = RECIPE_PATH.name
+
+    def __init__(self,
+                 order: int,
+                 path: Path,
+                 initial_params: FilePath = DEFAULT_CONFIG) -> None:
+        super().__init__(order, path, initial_params)
+
+    @classmethod
+    def confirm_installation(cls) -> None:
+        """Confirm module is installed."""
+        return
+
+    def _rearrange_output(self, output_name: FilePath, path: FilePath,
+                          ncores: int) -> None:
+        """Combine different tsv outputs in a single file."""
+        output_fname = Path(path, output_name)
+        self.log(f"rearranging output files into {output_fname}")
+        key = output_fname.stem.split(".")[0]
+        # Combine files
+        with open(output_fname, 'w') as out_file:
+            for core in range(ncores):
+                tmp_file = Path(path, f"{key}_" + str(core) + ".tsv")
+                with open(tmp_file) as infile:
+                    out_file.write(infile.read())
+                tmp_file.unlink()
+        self.log(f"Completed reconstruction of {key} files.")
+        self.log(f"{output_fname} created.")
+
+    def _run(self) -> None:
+        """Execute module."""
+        try:
+            models_to_score = self.previous_io.retrieve_models(
+                individualize=True
+                )
+        except Exception as e:
+            self.finish_with_error(e)
+
+        nmodels = len(models_to_score)
+        # index_list for the jobs with linear scaling
+        ncores = parse_ncores(n=self.params['ncores'], njobs=nmodels)
+        idx_list = get_index_list(nmodels, ncores)
+
+        # loading buried and accessible residue dictionaries
+        buried_resdic = {
+            key[-1]: value for key, value
+            in self.params.items()
+            if key.startswith("resdic_buried")
+            }
+        acc_resdic = {
+            key[-1]: value for key, value
+            in self.params.items()
+            if key.startswith("resdic_accessible")
+            }
+        # remove _
+        buried_resdic.pop("_")
+        acc_resdic.pop("_")
+        # finding the chains
+        buried_violations_chains = [f"acc_{ch}" for ch in acc_resdic.keys()]
+        acc_violations_chains = [f"bur_{ch}" for ch in buried_resdic.keys()]
+        violations_chains = acc_violations_chains + buried_violations_chains
+        # initialize jobs
+        sasascore_jobs: list[AccScoreJob] = []
+
+        for core in range(ncores):
+            output_name = Path("sasascore_" + str(core) + ".tsv")
+            viol_output_name = Path("violations_" + str(core) + ".tsv")
+            model_list = models_to_score[idx_list[core]:idx_list[core + 1]]
+            accscore_obj = AccScore(
+                model_list=model_list,
+                output_name=output_name,
+                core=core,
+                path=Path("."),
+                buried_resdic=buried_resdic,
+                acc_resdic=acc_resdic,
+                cutoff=self.params["cutoff"],
+                viol_output_name=viol_output_name,
+                probe_radius=self.params["probe_radius"],
+                )
+
+            job = AccScoreJob(
+                accscore_obj,
+                )
+            sasascore_jobs.append(job)
+
+        # Run sasascore Jobs
+        exec_mode = get_analysis_exec_mode(self.params["mode"])
+        Engine = get_engine(exec_mode, self.params)
+        engine = Engine(sasascore_jobs)
+        engine.run()
+
+        # rearrange output
+        output_name = Path("sasascore.tsv")
+        self._rearrange_output(
+            output_name,
+            path=Path("."),
+            ncores=ncores
+            )
+        score_columns = ["structure", "original_name", "md5", "score"]
+        prettify_df(output_name, columns=score_columns, sortby="score")
+        self._rearrange_output(
+            "violations.tsv",
+            path=Path("."),
+            ncores=ncores
+            )
+        prettify_df("violations.tsv",
+                    columns=["structure"] + violations_chains)
+
+        self.output_models = models_to_score
+        self.export_io_models()