Skip to content

Commit

Permalink
Merge pull request #34 from CBICA/spiros-dev
Browse files Browse the repository at this point in the history
Test cases fixes and pre-commit update
  • Loading branch information
AlexanderGetka-cbica authored Jan 14, 2025
2 parents ed3b3a1 + 6195a55 commit 283046f
Show file tree
Hide file tree
Showing 9 changed files with 144 additions and 276 deletions.
3 changes: 1 addition & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@ spare scores

.. image:: https://codecov.io/gh/CBICA/spare_score/graph/badge.svg?token=7yk7pkydHE
:target: https://codecov.io/gh/CBICA/spare_score

:alt: Codecov
.. image:: https://github.com/CBICA/spare_score/actions/workflows/macos-tests-3.12.yml/badge.svg
:alt: macos tests

.. image:: https://github.com/CBICA/spare_score/actions/workflows/ubuntu-tests-3.12.yml/badge.svg
:alt: ubuntu tests

Expand Down
97 changes: 0 additions & 97 deletions merge_ROI_demo_and_test.py

This file was deleted.

4 changes: 1 addition & 3 deletions spare_scores/cli.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import argparse

import pkg_resources # type: ignore
from importlib.metadata import version

from .spare import spare_test, spare_train

from importlib.metadata import version
VERSION = version("spare_scores")


Expand Down
2 changes: 1 addition & 1 deletion spare_scores/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def age_sex_match(
df1, df2 = df1[df1[to_match] == grps[0]], df1[df1[to_match] == grps[1]]
no_df2 = True
else:
if to_match is not None:
if to_match != "":
logging.info('Two dataframes provided. "to_match" will be ignored.')
no_df2 = False

Expand Down
4 changes: 2 additions & 2 deletions spare_scores/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import os
import pickle
from typing import Any, Union
from typing import Any, Optional, Union

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -43,7 +43,7 @@ def add_file_extension(filename: str, extension: str) -> str:
return filename


def check_file_exists(filename: str, logger: Any) -> Any:
def check_file_exists(filename: Optional[str], logger: Any) -> Any:
"""
Checks if file exists
Expand Down
25 changes: 0 additions & 25 deletions tests/conftest.py

This file was deleted.

123 changes: 69 additions & 54 deletions tests/unit/test_data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,26 @@

class CheckDataPrep(unittest.TestCase):

def test_check_train(self):
def test_check_train(self) -> None:
# Test case 1: Valid input dataframe and predictors
self.df_fixture = load_df("../fixtures/sample_data.csv")
predictors = ["ROI1", "ROI2", "ROI3"]
to_predict = "Sex"
pos_group = "M"
filtered_df, filtered_predictors, mdl_type = check_train(
self.df_fixture, predictors, to_predict, pos_group=pos_group
)
self.assertTrue(
filtered_df.equals(self.df_fixture)
) # Check if filtered dataframe is the same as the input dataframe
self.assertTrue(
filtered_predictors == predictors
) # Check if filtered predictors are the same as the input predictors
self.assertTrue(
mdl_type == "Classification"
) # Check if the SPARE model type is correct
res = check_train(self.df_fixture, predictors, to_predict, pos_group=pos_group)

if isinstance(res, tuple) and len(res) == 3:
filtered_df, filtered_predictors, mdl_type = res

self.assertTrue(
filtered_df.equals(self.df_fixture)
) # Check if filtered dataframe is the same as the input dataframe
self.assertTrue(
filtered_predictors == predictors
) # Check if filtered predictors are the same as the input predictors
self.assertTrue(
mdl_type == "Classification"
) # Check if the SPARE model type is correct

# Test case 2: Missing required columns
df_missing_columns = pd.DataFrame(
Expand All @@ -42,8 +44,12 @@ def test_check_train(self):
predictors = ["Var1", "Var2"]
to_predict = "ToPredict"
pos_group = "1"
res = check_train(df_missing_columns, predictors, to_predict, pos_group)
self.assertTrue(res == "Variable to predict is not in the input dataframe.")
verbose = 1
res = check_train(
df_missing_columns, predictors, to_predict, verbose, pos_group
)
if isinstance(res, str):
self.assertTrue(res == "Variable to predict is not in the input dataframe.")

# Test case 3: Predictor not in input dataframe
df = pd.DataFrame(
Expand All @@ -57,10 +63,13 @@ def test_check_train(self):
predictors = ["Var1", "Var2"] # Var2 is not in the input dataframe
to_predict = "ToPredict"
pos_group = "1"
res = check_train(df, predictors, to_predict, pos_group)
self.assertTrue(res == "Not all predictors exist in the input dataframe.")
verbose = 1
res = check_train(df, predictors, to_predict, verbose, pos_group)

def test_check_test(self):
if isinstance(res, str):
self.assertTrue(res == "Not all predictors exist in the input dataframe.")

def test_check_test(self) -> None:
# Test case 1: Valid input dataframe and meta_data
df = pd.DataFrame(
{
Expand Down Expand Up @@ -121,9 +130,9 @@ def test_check_test(self):
),
}
res = check_test(df_age_outside_range, meta_data)
self.assertTrue(res[1] == None)
self.assertTrue(res[1] is None)

def test_smart_unique(self):
def test_smart_unique(self) -> None:
# test case 1: testing smart_unique with df2=None, to_predict=None
self.df_fixture = load_df("../fixtures/sample_data.csv")
result = smart_unique(self.df_fixture, None)
Expand All @@ -141,13 +150,17 @@ def test_smart_unique(self):
}
self.df_fixture = pd.DataFrame(data=df)
result = smart_unique(self.df_fixture, None, to_predict="ROI1")
err_msg = "Variable to predict has no variance."
self.assertTrue(result == err_msg)

if isinstance(result, str):
err_msg = "Variable to predict has no variance."
self.assertTrue(result == err_msg)

# test case 3: testing smart_unique with variance and no duplicate ID's. df2=None
self.df_fixture = load_df("../fixtures/sample_data.csv")
result = smart_unique(self.df_fixture, None, "ROI1")
self.assertTrue(result.equals(self.df_fixture))

if isinstance(result, pd.DataFrame):
self.assertTrue(result.equals(self.df_fixture))

# test case 4: testing smart_unique with variance and duplicate ID's. df2=None
self.df_fixture = pd.DataFrame(data=df)
Expand All @@ -162,31 +175,33 @@ def test_smart_unique(self):
}
self.df_fixture = self.df_fixture._append(new_row, ignore_index=True)
result = smart_unique(self.df_fixture, None, "ROI1")
correct_df = {
"Id": [1.0, 2.0, 3.0, 4.0, 5.0, float("nan")],
"ScanID": [
"Scan001",
"Scan002",
"Scan003",
"Scan004",
"Scan005",
"Scan006",
],
"Age": [35, 40, 45, 31, 45, 45],
"Sex": ["M", "F", "F", "M", "F", "F"],
"ROI1": [0.64, 0.64, 0.64, 0.64, 0.64, 0.84],
"ROI2": [0.73, 0.91, 0.64, 0.76, 0.78, 0.73],
"ID": [
float("nan"),
float("nan"),
float("nan"),
float("nan"),
float("nan"),
5.0,
],
}
correct_df = pd.DataFrame(data=correct_df)
self.assertTrue(result.equals(correct_df))

if isinstance(result, pd.DataFrame):
correct_df = {
"Id": [1.0, 2.0, 3.0, 4.0, 5.0, float("nan")],
"ScanID": [
"Scan001",
"Scan002",
"Scan003",
"Scan004",
"Scan005",
"Scan006",
],
"Age": [35, 40, 45, 31, 45, 45],
"Sex": ["M", "F", "F", "M", "F", "F"],
"ROI1": [0.64, 0.64, 0.64, 0.64, 0.64, 0.84],
"ROI2": [0.73, 0.91, 0.64, 0.76, 0.78, 0.73],
"ID": [
float("nan"),
float("nan"),
float("nan"),
float("nan"),
float("nan"),
5.0,
],
}
correct_df = pd.DataFrame(data=correct_df)
self.assertTrue(result.equals(correct_df))

# test case 5: testing df2 != None and no_df2=False
df1 = {
Expand All @@ -199,10 +214,10 @@ def test_smart_unique(self):
self.df_fixture1 = pd.DataFrame(data=df1)
self.df_fixture2 = pd.DataFrame(data=df2)

result = smart_unique(self.df_fixture1, self.df_fixture2, to_predict=None)
result = smart_unique(self.df_fixture1, self.df_fixture2, to_predict="")
self.assertTrue(result == (self.df_fixture1, self.df_fixture2))

def test_age_sex_match(self):
def test_age_sex_match(self) -> None:
# test case 1: testing df2=None and to_match=None
self.df_fixture = load_df("../fixtures/sample_data.csv")
result = age_sex_match(self.df_fixture, None)
Expand All @@ -220,7 +235,7 @@ def test_age_sex_match(self):
self.df_fixture1 = load_df("../fixtures/sample_data.csv")
self.df_fixture2 = self.df_fixture1
result = age_sex_match(
self.df_fixture1, self.df_fixture2, to_match=None, age_out_percentage=150
self.df_fixture1, self.df_fixture2, to_match="", age_out_percentage=150
) # Now no_df2=False
err_msg = "Age-out-percentage must be between 0 and 100"
self.assertTrue(result == err_msg)
Expand All @@ -237,7 +252,7 @@ def test_age_sex_match(self):
self.df_fixture1 = pd.DataFrame(data=df1)
self.df_fixture2 = self.df_fixture1
result = age_sex_match(
self.df_fixture1, self.df_fixture2, to_match=None
self.df_fixture1, self.df_fixture2, to_match=""
) # here sex_match=False
self.assertTrue(result == (self.df_fixture1, self.df_fixture2))

Expand Down Expand Up @@ -265,7 +280,7 @@ def test_age_sex_match(self):
print(result)
self.assertTrue(result.equals(correct_df))

def test_logging_basic_config(self):
def test_logging_basic_config(self) -> None:
logging_level = {
0: logging.WARNING,
1: logging.INFO,
Expand All @@ -291,5 +306,5 @@ def test_logging_basic_config(self):
self.assertTrue(os.path.exists("test_data_prep.py"))
self.assertTrue(result == logging.getLogger())

def test_convert_cat_variables(self):
def test_convert_cat_variables(self) -> None:
pass
Loading

0 comments on commit 283046f

Please sign in to comment.