Skip to content

Commit

Permalink
Linting and fix tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
hmcezar committed Jan 12, 2024
1 parent 2bac548 commit 8e34cfc
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 29 deletions.
2 changes: 1 addition & 1 deletion clusttraj/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ def configure_runtime(args_in: List[str]) -> ClustOptions:
parser.add_argument(
"--metrics",
action="store_true",
help="compute metrics to evaluate the clustering procedure quality."
help="compute metrics to evaluate the clustering procedure quality.",
)

rmsd_criterion = parser.add_mutually_exclusive_group(required=True)
Expand Down
2 changes: 1 addition & 1 deletion clusttraj/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def main(args: List[str] = None) -> None:
outclust_str += f"\nSilhouette score: {ss:.3f}\n"
outclust_str += f"Calinski Harabsz score: {ch:.3f}\n"
outclust_str += f"Davies-Bouldin score: {db:.3f}\n"
outclust_str += f"Cophenetic correlation coefficient: {cpcc:.3f}\n\n"
outclust_str += f"Cophenetic correlation coefficient: {cpcc:.3f}\n\n"

# save summary
with open(clust_opt.summary_name, "w") as f:
Expand Down
36 changes: 21 additions & 15 deletions clusttraj/metrics.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
"""Functions to compute evaluation metrics of the clustering procedure"""

from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from sklearn.metrics import (
silhouette_score,
calinski_harabasz_score,
davies_bouldin_score,
)
from scipy.spatial.distance import squareform
from scipy.cluster.hierarchy import cophenet
from typing import Tuple
Expand All @@ -9,11 +13,14 @@


def compute_metrics(
clust_opt: ClustOptions, distmat: np.ndarray, z_matrix: np.ndarray, clusters: np.ndarray
clust_opt: ClustOptions,
distmat: np.ndarray,
z_matrix: np.ndarray,
clusters: np.ndarray,
) -> Tuple[np.float64, np.float64, np.float64, np.float64]:
"""Compute metrics to assess the performance of the clustering procedure.
"""Compute metrics to assess the performance of the clustering procedure.
Args:
Args:
clust_opt (ClustOptions): The clustering options.
z_matrix (np.ndarray): The Z-matrix from hierarchical clustering procedure.
Expand All @@ -22,19 +29,18 @@ def compute_metrics(
ch (np.float64): The Calinski Harabasz score.
db (np.float64): The Davies-Bouldin score.
cpcc (np.float64): The cophenetic correlation coefficient.
"""
"""

# Compute the silhouette score
ss = silhouette_score(squareform(distmat), clusters, metric="precomputed")
# Compute the silhouette score
ss = silhouette_score(squareform(distmat), clusters, metric="precomputed")

# Compute the Calinski Harabasz score
ch = calinski_harabasz_score(squareform(distmat), clusters)
# Compute the Calinski Harabasz score
ch = calinski_harabasz_score(squareform(distmat), clusters)

# Compute the Davies-Bouldin score
db = davies_bouldin_score(squareform(distmat), clusters)
# Compute the Davies-Bouldin score
db = davies_bouldin_score(squareform(distmat), clusters)

# Compute the cophenetic correlation coefficient
cpcc = cophenet(z_matrix)[0]

return ss, ch, db, cpcc
# Compute the cophenetic correlation coefficient
cpcc = cophenet(z_matrix)[0]

return ss, ch, db, cpcc
25 changes: 13 additions & 12 deletions clusttraj/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,20 +102,22 @@ def plot_mds(clust_opt: ClustOptions, clusters: np.ndarray, distmat: np.ndarray)
right=False,
labelbottom=False,
labelleft=False,
)
)

# Scatter plot the coordinates with cluster colors
plt.scatter(
coords[:, 0], coords[:, 1], marker="o", c=clusters, cmap=plt.cm.nipy_spectral
)

plt.title('MDS Visualization')
plt.title("MDS Visualization")

# Save the plot
plt.savefig(clust_opt.mds_name, bbox_inches="tight")


def plot_tsne(clust_opt: ClustOptions, clusters: np.ndarray, distmat: np.ndarray) -> None:
def plot_tsne(
clust_opt: ClustOptions, clusters: np.ndarray, distmat: np.ndarray
) -> None:
"""Plot the t-distributed Stochastic Neighbor Embedding 2D plot of the clustering.
Args:
Expand All @@ -129,14 +131,14 @@ def plot_tsne(clust_opt: ClustOptions, clusters: np.ndarray, distmat: np.ndarray

# Initialize the tSNE model
tsne = manifold.TSNE(
n_components=2,
perplexity=30,
learning_rate=200,
random_state=666,
n_jobs=clust_opt.n_workers,
n_components=2,
perplexity=30,
learning_rate=200,
random_state=666,
n_jobs=clust_opt.n_workers,
)

# Perform the t-SNE and get the 2D representation
# Perform the t-SNE and get the 2D representation
coords = tsne.fit_transform(squareform(distmat))

# Define a list of unique colors for each cluster
Expand All @@ -156,15 +158,14 @@ def plot_tsne(clust_opt: ClustOptions, clusters: np.ndarray, distmat: np.ndarray
right=False,
labelbottom=False,
labelleft=False,
)
)

# Create a scatter plot with different colors for each cluster
for i, cluster in enumerate(unique_clusters):
cluster_data = coords[clusters == cluster]
plt.scatter(cluster_data[:, 0], cluster_data[:, 1], color=colors[i])

plt.title('t-SNE Visualization')
plt.title("t-SNE Visualization")

# Save the plot
plt.savefig(clust_opt.mds_name[:-7] + "tsne.pdf", bbox_inches="tight")

2 changes: 2 additions & 0 deletions test/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def test_parse_args():
force=True,
final_kabsch=True,
silhouette_score=False,
metrics=False,
)
clust_opt = parse_args(args)

Expand All @@ -107,6 +108,7 @@ def test_parse_args():
force=True,
final_kabsch=True,
silhouette_score=False,
metrics=False,
)
clust_opt = parse_args(args)

Expand Down

0 comments on commit 8e34cfc

Please sign in to comment.