From d94161113116ea2a37651674d87aebae0e629771 Mon Sep 17 00:00:00 2001 From: Spiros Maggioros Date: Tue, 30 Jul 2024 12:13:01 +0300 Subject: [PATCH 1/4] Added test case for MLPDataset class --- codecov.yml | 5 +++++ tests/unit/test_spare_scores.py | 18 +++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/codecov.yml b/codecov.yml index 657d8f7..4fa634e 100644 --- a/codecov.yml +++ b/codecov.yml @@ -18,3 +18,8 @@ comment: layout: "reach,diff,flags,tree" behavior: default require_changes: no + +ignore: + - "merge_ROI_demo_and_test.py" + - "setup.py" + - "spare_scores/cli.py" diff --git a/tests/unit/test_spare_scores.py b/tests/unit/test_spare_scores.py index 46857c9..48ac68d 100644 --- a/tests/unit/test_spare_scores.py +++ b/tests/unit/test_spare_scores.py @@ -1,14 +1,30 @@ import sys import unittest from pathlib import Path - +import numpy as np import pandas as pd sys.path.append("../../spare_scores") from util import load_df, load_model +from mlp_torch import MLPDataset from spare_scores import spare_test, spare_train +class CheckMLPDataset(unittest.TestCase): + def test_len(self): + # test case 1: testing length + self.X = np.array([1, 2, 3, 4, 5, 6, 7, 8]) + self.Y = np.array([1, 2, 3, 4, 5, 6, 7, 8]) + self.Dataset = MLPDataset(self.X, self.Y) + self.assertTrue(len(self.Dataset) == 8) + + def test_idx(self): + # test case 2: testing getter + self.X = np.array([1, 2, 3, 4, 5, 6, 7, 8]) + self.Y = np.array([1, 2, 3, 4, 5, 6, 7, 8]) + self.Dataset = MLPDataset(self.X, self.Y) + self.assertTrue(self.Dataset[0] == (1, 1)) + self.assertTrue(self.Dataset[len(self.Dataset) - 1] == (8, 8)) class CheckSpareScores(unittest.TestCase): From 47084c5284a49e2619bc8ba721f96c24ce57c55d Mon Sep 17 00:00:00 2001 From: Spiros Maggioros Date: Tue, 30 Jul 2024 21:53:43 +0300 Subject: [PATCH 2/4] Major rework to includes | added test cases | workflows update --- .github/workflows/macos_test_cases.yml | 6 +- .github/workflows/ubuntu_test_cases.yml | 15 +-- codecov.yml | 1 + spare_scores/classes.py | 11 +- spare_scores/cli.py | 3 +- spare_scores/data_prep.py | 3 +- spare_scores/mlp.py | 3 +- spare_scores/mlp_torch.py | 3 +- spare_scores/spare.py | 18 +-- spare_scores/svm.py | 5 +- tests/unit/test_data_prep.py | 8 +- tests/unit/test_spare_scores.py | 146 +++++++++++++++++++++++- tests/unit/test_util.py | 11 +- 13 files changed, 189 insertions(+), 44 deletions(-) diff --git a/.github/workflows/macos_test_cases.yml b/.github/workflows/macos_test_cases.yml index e01cb98..2d7946c 100644 --- a/.github/workflows/macos_test_cases.yml +++ b/.github/workflows/macos_test_cases.yml @@ -23,7 +23,11 @@ jobs: - name: Install pip run: conda run -n spare conda install pip - name: Install spare scores - run: conda run -n spare pip install spare_scores + run: | + python setup.py bdist_wheel + cd dist + WHEEL_FILE=$(ls spare_scores*) + pip install "$WHEEL_FILE" - name: Download dependencies run: pip install setuptools && pip install . - name: Run unit tests diff --git a/.github/workflows/ubuntu_test_cases.yml b/.github/workflows/ubuntu_test_cases.yml index 7ae22ea..6b276c2 100644 --- a/.github/workflows/ubuntu_test_cases.yml +++ b/.github/workflows/ubuntu_test_cases.yml @@ -23,20 +23,15 @@ jobs: - name: Install pip run: conda run -n spare conda install pip - name: Install spare scores - run: conda run -n spare pip install spare_scores + run: | + python setup.py bdist_wheel + cd dist + WHEEL_FILE=$(ls spare_scores*) + pip install "$WHEEL_FILE" - name: Download dependencies run: pip install setuptools && pip install . - name: Run unit tests run: | cd tests/unit && python -m unittest discover -s . -p "*.py" - - name: Generate Coverage Report - run: | - pip install pytest-cov - cd tests/unit && pytest --cov=../../ --cov-report=xml - - name: Upload Coverage to Codecov - uses: codecov/codecov-action@v4.0.1 - with: - token: ${{ secrets.CODECOV_TOKEN }} - slug: CBICA/spare_score diff --git a/codecov.yml b/codecov.yml index 4fa634e..c82f0ee 100644 --- a/codecov.yml +++ b/codecov.yml @@ -23,3 +23,4 @@ ignore: - "merge_ROI_demo_and_test.py" - "setup.py" - "spare_scores/cli.py" + - "tests/conftest.py" diff --git a/spare_scores/classes.py b/spare_scores/classes.py index 1a11fff..f072b76 100644 --- a/spare_scores/classes.py +++ b/spare_scores/classes.py @@ -2,10 +2,11 @@ from typing import Any import pandas as pd -from data_prep import logging_basic_config -from mlp import MLPModel -from mlp_torch import MLPTorchModel -from svm import SVMModel + +from .data_prep import logging_basic_config +from .mlp import MLPModel +from .mlp_torch import MLPTorchModel +from .svm import SVMModel class SpareModel: @@ -77,7 +78,7 @@ def __init__( predictors, target, key_var, verbose, **parameters, **kwargs ) else: - logger.err(f"Model type {self.model_type} not supported.") + logger.error(f"Model type {self.model_type} not supported.") raise NotImplementedError("Only SVM is supported currently.") def set_parameters(self, **parameters: Any) -> None: diff --git a/spare_scores/cli.py b/spare_scores/cli.py index 93848c4..598d6ab 100644 --- a/spare_scores/cli.py +++ b/spare_scores/cli.py @@ -1,7 +1,8 @@ import argparse import pkg_resources # type: ignore -from spare import spare_test, spare_train + +from .spare import spare_test, spare_train VERSION = pkg_resources.require("spare_scores")[0].version diff --git a/spare_scores/data_prep.py b/spare_scores/data_prep.py index a262c70..a7e3127 100644 --- a/spare_scores/data_prep.py +++ b/spare_scores/data_prep.py @@ -6,7 +6,8 @@ import numpy as np import pandas as pd from scipy import stats -from util import convert_to_number_if_possible + +from .util import convert_to_number_if_possible def check_train( diff --git a/spare_scores/mlp.py b/spare_scores/mlp.py index d101563..afb233a 100644 --- a/spare_scores/mlp.py +++ b/spare_scores/mlp.py @@ -4,7 +4,6 @@ import numpy as np import pandas as pd -from data_prep import logging_basic_config from sklearn import metrics from sklearn.exceptions import ConvergenceWarning from sklearn.model_selection import GridSearchCV, KFold @@ -13,6 +12,8 @@ from sklearn.preprocessing import StandardScaler from sklearn.utils._testing import ignore_warnings +from .data_prep import logging_basic_config + class MLPModel: """ diff --git a/spare_scores/mlp_torch.py b/spare_scores/mlp_torch.py index 3715432..b37a0ab 100644 --- a/spare_scores/mlp_torch.py +++ b/spare_scores/mlp_torch.py @@ -9,7 +9,6 @@ import torch import torch.nn as nn import torch.optim as optim -from data_prep import logging_basic_config from sklearn.exceptions import ConvergenceWarning from sklearn.metrics import ( accuracy_score, @@ -29,6 +28,8 @@ from sklearn.utils._testing import ignore_warnings from torch.utils.data import DataLoader, Dataset +from .data_prep import logging_basic_config + os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0" # for MPS backend device = ( "cuda" diff --git a/spare_scores/spare.py b/spare_scores/spare.py index 7caddc8..ae5dcaa 100644 --- a/spare_scores/spare.py +++ b/spare_scores/spare.py @@ -3,14 +3,21 @@ import numpy as np import pandas as pd -from classes import MetaData, SpareModel -from data_prep import ( + +from .classes import MetaData, SpareModel +from .data_prep import ( check_test, check_train, convert_cat_variables, logging_basic_config, ) -from util import check_file_exists, is_unique_identifier, load_df, load_model, save_file +from .util import ( + check_file_exists, + is_unique_identifier, + load_df, + load_model, + save_file, +) def spare_train( @@ -105,7 +112,7 @@ def spare_train( # Check if it contains any errors. try: - df, predictors, mdl_task = check_train( + df, predictors, mdl_task = check_train( # type: ignore df, predictors, to_predict, verbose, pos_group ) except Exception as e: @@ -200,9 +207,6 @@ def spare_train( if output != "" and output is not None: save_file(result, output, "train", logger) - print("###### PRINTING ########") - print(result) - print("####### END ###########") res["status"] = "OK" res["data"] = result res["status_code"] = 0 diff --git a/spare_scores/svm.py b/spare_scores/svm.py index 5734927..a3a7b30 100644 --- a/spare_scores/svm.py +++ b/spare_scores/svm.py @@ -4,12 +4,13 @@ import numpy as np import pandas as pd -from data_prep import logging_basic_config from sklearn import metrics from sklearn.model_selection import GridSearchCV, RepeatedKFold from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC, LinearSVC, LinearSVR -from util import expspace + +from .data_prep import logging_basic_config +from .util import expspace class SVMModel: diff --git a/tests/unit/test_data_prep.py b/tests/unit/test_data_prep.py index 591f8fa..8f07ac0 100644 --- a/tests/unit/test_data_prep.py +++ b/tests/unit/test_data_prep.py @@ -1,21 +1,17 @@ import logging import os -import sys import unittest import pandas as pd -sys.path.append( - "../../spare_scores/" -) # check_test and check_train were imported from the build, but now they are updated -from data_prep import ( # If updates go through, it can be updated to spare_scores.data_prep +from spare_scores.data_prep import ( # If updates go through, it can be updated to spare_scores.data_prep age_sex_match, check_test, check_train, logging_basic_config, smart_unique, ) -from util import load_df +from spare_scores.util import load_df class CheckDataPrep(unittest.TestCase): diff --git a/tests/unit/test_spare_scores.py b/tests/unit/test_spare_scores.py index 48ac68d..149ddf8 100644 --- a/tests/unit/test_spare_scores.py +++ b/tests/unit/test_spare_scores.py @@ -1,14 +1,12 @@ -import sys import unittest from pathlib import Path import numpy as np import pandas as pd +import os +from spare_scores.util import load_df, load_model +from spare_scores.mlp_torch import MLPDataset -sys.path.append("../../spare_scores") -from util import load_df, load_model -from mlp_torch import MLPDataset - -from spare_scores import spare_test, spare_train +from spare_scores.spare import spare_test, spare_train class CheckMLPDataset(unittest.TestCase): def test_len(self): @@ -101,6 +99,32 @@ def test_spare_train_MLP(self): set(metadata["predictors"]) == set(self.model_fixture[1]["predictors"]) ) self.assertTrue(metadata["to_predict"] == self.model_fixture[1]["to_predict"]) + + # test case 2: testing MLP regression model + result = spare_train( + self.df_fixture, + "ROI1", + model_type="MLP", + data_vars = [ + "ROI2", + "ROI3", + "ROI4", + "ROI5", + "ROI6", + "ROI7", + "ROI8", + "ROI9", + "ROI10" + ] + ) + status, result_data = result["status"], result["data"] + metadata = result_data[1] + print(f"######## {result_data} #########") + print(f"######## {metadata} ########") + self.assertTrue(status == "OK") + self.assertTrue(metadata["mdl_type"] == "MLP") + self.assertTrue(metadata["kernel"] == "linear") + # self.assertTrue(metadata["to_predict"] == "to_predict") def test_spare_train_MLPTorch(self): self.df_fixture = load_df("../fixtures/sample_data.csv") @@ -134,6 +158,30 @@ def test_spare_train_MLPTorch(self): set(metadata["predictors"]) == set(self.model_fixture[1]["predictors"]) ) self.assertTrue(metadata["to_predict"] == self.model_fixture[1]["to_predict"]) + + # test case 2: testing MLPTorch regression model + result = spare_train( + self.df_fixture, + "ROI1", + model_type="MLPTorch", + data_vars = [ + "ROI2", + "ROI3", + "ROI4", + "ROI5", + "ROI6", + "ROI7", + "ROI8", + "ROI9", + "ROI10", + ] + ) + status, result_data = result["status"], result["data"] + metadata = result_data[1] + self.assertTrue(status == "OK") + self.assertTrue(metadata["mdl_type"] == "MLPTorch") + self.assertTrue(metadata["kernel"] == "linear") + # self.assertTrue(metadata["to_predict"] == "to_predict") def test_spare_train_SVM(self): self.df_fixture = load_df("../fixtures/sample_data.csv") @@ -171,3 +219,89 @@ def test_spare_train_SVM(self): metadata["categorical_var_map"] == self.model_fixture[1]["categorical_var_map"] ) + + # test case 2: testing SVM regression model + result = spare_train( + self.df_fixture, + "ROI1", + data_vars = [ + "ROI2", + "ROI3", + "ROI4", + "ROI5", + "ROI6", + "ROI7", + "ROI8", + "ROI9", + "ROI10" + ] + ) + status, result_data = result["status"], result["data"] + metadata = result_data[1] + self.assertTrue(status == "OK") + self.assertTrue(metadata["mdl_type"] == "SVM") + self.assertTrue(metadata["kernel"] == "linear") + # self.assertTrue(metadata["to_predict"] == "to_predict") + + def test_spare_train_SVM_None(self): + self.df_fixture = load_df("../fixtures/sample_data.csv") + # Test case 1: Training with no data vars + result = spare_train( + self.df_fixture, + "Age" + ) + self.assertTrue(result is not None) + + + def test_spare_train_SVM2(self): + self.df_fixture = load_df("../fixtures/sample_data.csv") + # Test case 1: Test overwrites + result = spare_train( + self.df_fixture, + "Age", + output="test_util.py" + ) + self.assertTrue(result["status_code"] == 2) + + # Test case 2: Train with non existing output file + result = spare_train( + self.df_fixture, + "Age", + data_vars=[ + "ROI1", + "ROI2", + "ROI3", + "ROI4", + "ROI5", + "ROI6", + "ROI7", + "ROI8", + "ROI9", + "ROI10", + ], + output="results" + ) + self.assertTrue(os.path.isfile("results.pkl.gz") == True) + os.remove("results.pkl.gz") + + def test_spare_train_non_existing_model(self): + self.df_fixture = load_df("../fixtures/sample_data.csv") + # Test case 1: training with non existing model type + result = spare_train( + self.df_fixture, + "Age", + model_type="CNN", + data_vars=[ + "ROI1", + "ROI2", + "ROI3", + "ROI4", + "ROI5", + "ROI6", + "ROI7", + "ROI8", + "ROI9", + "ROI10", + ], + ) + self.assertTrue(result["status_code"] == 2) diff --git a/tests/unit/test_util.py b/tests/unit/test_util.py index 2265f36..b3178f2 100644 --- a/tests/unit/test_util.py +++ b/tests/unit/test_util.py @@ -1,14 +1,12 @@ import logging import os -import sys import unittest from pathlib import Path import numpy as np import pandas as pd -sys.path.append("../../spare_scores") -from util import ( +from spare_scores.util import ( add_file_extension, check_file_exists, convert_to_number_if_possible, @@ -137,6 +135,12 @@ def test_load_examples(self): result = load_examples(file_name) self.assertFalse(result is None and isinstance(result, pd.DataFrame)) + # test case 3: testing with non existant filename + file_name = "non_existant" + result = load_examples(file_name) + self.assertTrue(result is None) + + def test_convert_to_number_if_possible(self): # test case 1: valid convertion to integer num = "254" @@ -200,3 +204,4 @@ def test_add_file_extension(self): filename = "file.tar.gz" extension = ".gz" self.assertTrue(add_file_extension(filename, extension) == "file.tar.gz") + From cd8b1cfd35b630716142e820e5f98f2610fd3576 Mon Sep 17 00:00:00 2001 From: Spiros Maggioros Date: Tue, 30 Jul 2024 21:57:53 +0300 Subject: [PATCH 3/4] Forgot dependencies for wheel --- .github/workflows/macos_test_cases.yml | 1 + .github/workflows/ubuntu_test_cases.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/macos_test_cases.yml b/.github/workflows/macos_test_cases.yml index 2d7946c..a8d6733 100644 --- a/.github/workflows/macos_test_cases.yml +++ b/.github/workflows/macos_test_cases.yml @@ -24,6 +24,7 @@ jobs: run: conda run -n spare conda install pip - name: Install spare scores run: | + pip install setuptools twine wheel python setup.py bdist_wheel cd dist WHEEL_FILE=$(ls spare_scores*) diff --git a/.github/workflows/ubuntu_test_cases.yml b/.github/workflows/ubuntu_test_cases.yml index 6b276c2..3a17143 100644 --- a/.github/workflows/ubuntu_test_cases.yml +++ b/.github/workflows/ubuntu_test_cases.yml @@ -24,6 +24,7 @@ jobs: run: conda run -n spare conda install pip - name: Install spare scores run: | + pip install setuptools twine wheel python setup.py bdist_wheel cd dist WHEEL_FILE=$(ls spare_scores*) From f60329ef38c5cc68e3f1cccbe9e29778a7eb0d91 Mon Sep 17 00:00:00 2001 From: Spiros Maggioros Date: Wed, 31 Jul 2024 01:35:22 +0300 Subject: [PATCH 4/4] More test cases --- tests/unit/test_spare_scores.py | 113 +++++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_spare_scores.py b/tests/unit/test_spare_scores.py index 149ddf8..af11d8b 100644 --- a/tests/unit/test_spare_scores.py +++ b/tests/unit/test_spare_scores.py @@ -3,9 +3,9 @@ import numpy as np import pandas as pd import os +from spare_scores.data_prep import check_test from spare_scores.util import load_df, load_model from spare_scores.mlp_torch import MLPDataset - from spare_scores.spare import spare_test, spare_train class CheckMLPDataset(unittest.TestCase): @@ -119,8 +119,6 @@ def test_spare_train_MLP(self): ) status, result_data = result["status"], result["data"] metadata = result_data[1] - print(f"######## {result_data} #########") - print(f"######## {metadata} ########") self.assertTrue(status == "OK") self.assertTrue(metadata["mdl_type"] == "MLP") self.assertTrue(metadata["kernel"] == "linear") @@ -305,3 +303,112 @@ def test_spare_train_non_existing_model(self): ], ) self.assertTrue(result["status_code"] == 2) + + def test_spare_test_exceptions(self): + self.df_fixture = load_df("../fixtures/sample_data.csv") + self.model_fixture = load_model("../fixtures/sample_model.pkl.gz") + + # Test case 1: Test with existing output path + if(not os.path.isfile("output.csv")): + f = open("output.csv", "x") + result = spare_test(self.df_fixture, self.model_fixture, output="output") + self.assertTrue(result["status_code"] == 0) + os.remove("output.csv") + + # Test case 2: Test with predictors not existing in the original dataframe + data = { + "Var1": [x for x in range(100)], + "Var2": [x for x in range(100)], + "label": [x**2 for x in range(100)] + } + self.df_fixture = pd.DataFrame(data=data) + meta_data = { + "predictors": "Not_existing" + } + err, cols_not_found = check_test(self.df_fixture, meta_data) + self.assertTrue(len(err) != 0) + self.assertTrue(cols_not_found is not None) + + + def test_spare_train_regression_error(self): + self.df_fixture = load_df("../fixtures/sample_data.csv") + # Test case 1: testing with non-integer like as predictor + result = spare_train( + self.df_fixture, + "ScanID", + data_vars=[ + "ROI1", + "ROI2", + "ROI3", + "ROI4", + "ROI5", + "ROI6", + "ROI7", + "ROI8", + "ROI9", + "ROI10", + ] + ) + + self.assertTrue(result["status_code"] == 2) + self.assertTrue(result["status"] == "Dataset check failed before training was initiated.") + + # Test case 2: testing with a too-small dataset + data = { + "Var1": [1,2,3,4,5], + "Var2": [2,4,6,8,10], + "label": [1.5,2.4,3.2,4.5,5.5] + } + self.df_fixture = pd.DataFrame(data=data) + result = spare_train( + self.df_fixture, + "label", + data_vars=[ + "Var1", + "Var2" + ] + ) + + self.assertTrue(result["status_code"] == 2) + self.assertTrue(result["status"] == "Dataset check failed before training was initiated.") + + # Test case 3: testing with a label that has to variance + data = { + "Var1": [1,2,3,4,5], + "Var2": [2,4,6,8,10], + "label": [1,1,1,1,1] + } + self.df_fixture = pd.DataFrame(data=data) + result = spare_train( + self.df_fixture, + "label", + data_vars=[ + "Var1", + "Var2" + ] + ) + self.assertTrue(result["status_code"] == 2) + self.assertTrue(result["status"] == "Dataset check failed before training was initiated.") + + # Test case 4: testing with a dataset that may be too small + data = { + "Var1": [x for x in range(80)], + "Var2": [x for x in range(80)], + "Var3": [x for x in range(80)], + "label": [x*2 for x in range(80)] + } + + self.df_fixture = pd.DataFrame(data=data) + result = spare_train( + self.df_fixture, + "label", + data_vars=[ + "Var1", + "Var2" + ] + ) + + self.assertTrue(result is not None) + + +