Skip to content

Commit

Permalink
Merge pull request biotite-dev#14 from JHKru/master
Browse files Browse the repository at this point in the history
rpy2/r-bio3d integration for testing/R interop; memory efficient implementation for DCC computation
  • Loading branch information
padix-key authored Feb 25, 2024
2 parents 6ca31da + 9072241 commit 914b997
Show file tree
Hide file tree
Showing 26 changed files with 184 additions and 802 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ jobs:
auto-update-conda: true
python-version: '3.10'
- name: Installing dependencies
run: conda install -c conda-forge poetry prody pytest
run: conda install -c conda-forge poetry prody pytest r-bio3d rpy2
- name: Building distribution
run: poetry build -f wheel
- name: Installing distribution
run: pip install ./dist/*.whl
- name: Testing code
run: pytest
run: pytest
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ dmypy.json
# VS Code
*.code-workspace
.vscode/*
profile.json

# Temporarily disable freezing of dependency versions
poetry.lock
Expand Down
2 changes: 2 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ dependencies:
- sphinx-gallery =0.9.0
- numpydoc >=0.8
- ammolite >=0.8
- rpy2
- r-bio3d
74 changes: 47 additions & 27 deletions src/springcraft/nma.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def dcc(enm, mode_subset=None, norm=True, tem=None, tem_factors=K_B):
tem_factors : int, float, optional
Factors included in temperature weighting
(with :math:`k_B` as preset).
Returns
-------
dcc : ndarray, shape=(n, n), dtype=float
Expand All @@ -280,62 +280,82 @@ def dcc(enm, mode_subset=None, norm=True, tem=None, tem_factors=K_B):
.. math::
nDCC_{ij} = \frac{DCC_{ij}}{[DCC_{ii} DCC_{jj}]^{1/2}}
When all modes are considerered, the DCC is equal to the covariance matrix
of GNMs or to the trace of all supermatrices (3x3) of the
covariance matrix (3Nx3N) in the case of ANMs.
Consequently, these are returned if standard parameters
for 'mode_subset' and 'memory_efficient' are passed to the function.
"""

from .gnm import GNM
from .anm import ANM

eig_values, eig_vectors = enm.eigen()
n_nodes = len(enm._coord)

if isinstance(enm, ANM):
is_gnm = False
ntriv_modes = 6
num_dim = 3
elif isinstance(enm, GNM):
is_gnm = True
ntriv_modes = 1
num_dim = 1
else:
raise ValueError(
"Instance of GNM/ANM class expected."
)

eig_values, eig_vectors = eigen(enm)


# Choose modes included in computation; raise error, if trivial
# modes are included
all_modes = False
if mode_subset is None:
all_modes = True
mode_subset = np.arange(ntriv_modes, len(eig_values))
elif any(mode_subset <= (ntriv_modes-1)):
raise ValueError(
"Trivial modes are included in the current selection."
" Please check your input."
)

eig_values = eig_values[mode_subset]
eig_vectors = eig_vectors[mode_subset]

if isinstance(enm, GNM):
# Reshape array of eigenvectors (k,n) -> (k,n,1)
modes_reshaped = np.reshape(
eig_vectors, (eig_vectors.shape[0], -1, 1)
)
# -> ANM
## Shortcut if all modes are included in computations
# GNM -> DCC corresponds to inverted Kirchhoff
if is_gnm and all_modes:
dcc = enm.covariance
# ANM -> ...to the trace of the inverted Hessian's 3x3 superelements
elif all_modes:
# 3N x 3N -> N x 3 x N x 3 -> N x N x 3 x 3
cov = enm.covariance
reshaped = cov.reshape(
cov.shape[0]//3, 3, cov.shape[0]//3, 3
).swapaxes(1,2)
# Accept array of any dimension
# -> Sum over diagonals in last two dims
# -> Return any shape (in this case NxN)
dcc = np.einsum("...ii->...", reshaped)
# Slower method for custom mode range
else:
# Reshape array of eigenvectors (k,3n) -> (k,n,3)
eig_values = eig_values[mode_subset]
eig_vectors = eig_vectors[mode_subset]

# Reshape array of eigenvectors
# (k,3n) -> (k,n,3) for ANMs; (k,n) -> (k,n,1) for GNMs
modes_reshaped = np.reshape(
eig_vectors, (eig_vectors.shape[0], -1, 3)
eig_vectors, (len(mode_subset), -1, num_dim)
)
# Create residue modes matrix (3N -> N for ANMs)
modes_mat_n = modes_reshaped[:, :, np.newaxis, :] *\
modes_reshaped[:, np.newaxis, :, :]
modes_mat_n = np.sum(modes_mat_n, axis=-1)

modes_mat_n = modes_mat_n / eig_values[:, np.newaxis, np.newaxis]
dcc = np.sum(modes_mat_n, axis=0)

dcc = np.zeros((n_nodes, n_nodes))
for ev, evec in zip(eig_values, modes_reshaped):
dcc += (evec @ evec.T) / ev

# Compute the normalized DCC
if norm:
dcc_ii = np.diagonal(dcc)
dcc_ii = np.reshape(dcc_ii, (1, len(dcc_ii)))
dcc_ii = np.repeat(dcc_ii, repeats=len(dcc_ii), axis=0)

dcc_ii = np.reshape(dcc_ii, (1, len(dcc)))
dcc = dcc / np.sqrt(dcc_ii * dcc_ii.T)

# Temperature weighting
elif tem is not None:
if tem is not None:
dcc = dcc * tem * tem_factors

return dcc
Expand Down
21 changes: 0 additions & 21 deletions tests/data/1l2y_bio3d_masses.csv

This file was deleted.

21 changes: 0 additions & 21 deletions tests/data/dccm_calpha_bio3d.csv

This file was deleted.

21 changes: 0 additions & 21 deletions tests/data/dccm_calpha_subset_bio3d.csv

This file was deleted.

Loading

0 comments on commit 914b997

Please sign in to comment.