Skip to content

Commit

Permalink
feat: added tests and github action for setup and testing
Browse files Browse the repository at this point in the history
  • Loading branch information
Floris vanderFlier committed Oct 17, 2024
1 parent bf6d883 commit bce7eb5
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 4 deletions.
25 changes: 25 additions & 0 deletions .github/workflows/setup-and-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
on: "push"
name: setup-and-test

jobs:
uv-example:
name: python
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v3

- name: "Set up Python"
uses: actions/setup-python@v5
with:
python-version-file: ".python-version"

- name: Install the project
run: uv sync --all-extras --dev

- name: Run tests
# For example, using `pytest`
run: uv run pytest tests
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ dependencies = [
"biotite>=1.0.1",
"polyleven>=0.8",
"scikit-learn>=1.5.2",
"pytest>=8.3.3",
]

[build-system]
Expand Down
47 changes: 47 additions & 0 deletions src/utilin/constants.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,49 @@
AA_ALPHABET = list("ACDEFGHIKLMNPQRSTVWY")
AA_ALPHABET_GREMLIN = list("ARNDCQEGHILKMFPSTWYV-")
THREE_TO_SINGLE_LETTER_CODES = {
"ALA": "A",
"ARG": "R",
"ASN": "N",
"ASP": "D",
"CYS": "C",
"GLN": "Q",
"GLU": "E",
"GLY": "G",
"HIS": "H",
"ILE": "I",
"LEU": "L",
"LYS": "K",
"MET": "M",
"PHE": "F",
"PRO": "P",
"SER": "S",
"THR": "T",
"TRP": "W",
"TYR": "Y",
"VAL": "V",
}
SINGLE_TO_THREE_LETTER_CODES = {
"A": "ALA",
"R": "ARG",
"N": "ASN",
"D": "ASP",
"C": "CYS",
"Q": "GLN",
"E": "GLU",
"G": "GLY",
"H": "HIS",
"I": "ILE",
"L": "LEU",
"K": "LYS",
"M": "MET",
"F": "PHE",
"P": "PRO",
"S": "SER",
"T": "THR",
"W": "TRP",
"Y": "TYR",
"V": "VAL",
}
UNIPROT_ACCESSION_PATTERN = (
r"[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}"
)
13 changes: 10 additions & 3 deletions src/utilin/plotting/prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,30 @@ def plot_prediction_scatter(
y_label: str = "Prediction",
title: Optional[str] = None,
axis_label_fontsize: int = 16,
title_fontsize: int = 18,
margin_ratio: float = 0.15,
) -> Tuple[Figure, Axes]:
x_min, x_max = min(y_true), max(y_true)
y_min, y_max = min(y_pred), max(y_pred)
x_margin = margin_ratio * (x_max - x_min)
y_margin = margin_ratio * (y_max - y_min)
fig, ax = plt.subplots()
ax.scatter(y_true, y_pred, c="k", s=50, alpha=0.75)
ax.set_xlabel(x_label, fontsize=axis_label_fontsize)
ax.set_ylabel(y_label, fontsize=axis_label_fontsize)
ax.set_title(title)
ax.set_title(title, fontsize=title_fontsize)
ax.set_xlim(x_min, x_max + x_margin)
ax.set_ylim(y_min, y_max + y_margin)
spearman, _ = spearmanr(y_true, y_pred)
r2 = r2_score(y_true, y_pred)
print(spearman, r2)
metrics_str = f"Spearman: {spearman:.2f}\nR2: {r2:.2f}"
box_format = dict(boxstyle="round", facecolor="white", alpha=0.5)
ax.text(
0.05,
0.95,
metrics_str,
transform=ax.transAxes,
fontsize=14,
fontsize=12,
verticalalignment="top",
bbox=box_format,
)
Expand Down
58 changes: 58 additions & 0 deletions tests/utilin/encode/test_one_hot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import numpy as np
import pytest
import pandas as pd

from utilin.encode.one_hot import encode_sequences_one_hot


def test_encode_sequences_one_hot_default():
sequences = ["ACD", "GHK"]
expected_output = np.array([
[
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
],
[
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
]
])
output = encode_sequences_one_hot(sequences)
assert np.array_equal(output, expected_output)


def test_encode_sequences_one_hot_gremlin():
sequences = ["A-", "CY"]
expected_output = np.array([
[
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
],
[
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]
]
])
output = encode_sequences_one_hot(sequences, aa_alphabet="gremlin")
assert np.array_equal(output, expected_output)


def test_encode_empty_sequences():
sequences = []
expected_output = np.array([])
output = encode_sequences_one_hot(sequences)
assert np.array_equal(output, expected_output)


def test_encode_pandas_sequences_one_hot():
sequences = pd.Series(["AC"])
expected_output = np.array([
[
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
]
])
output = encode_sequences_one_hot(sequences)
assert np.array_equal(output, expected_output)
37 changes: 36 additions & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit bce7eb5

Please sign in to comment.