From 2b125d881f0f019d8b8cce78a9ae7202bee59e56 Mon Sep 17 00:00:00 2001
From: Fede <fraimondo@proton.me>
Date: Mon, 29 Apr 2024 18:08:45 +0300
Subject: [PATCH 1/2] Fix typing and warnings as much as we can

---
 julearn/api.py                                |  25 ++-
 julearn/base/estimators.py                    |  34 +++-
 julearn/base/tests/test_base_estimators.py    |   2 +-
 julearn/inspect/_cv.py                        |  26 +--
 julearn/inspect/_preprocess.py                |   2 +-
 julearn/inspect/tests/test_cv.py              |   6 +-
 julearn/inspect/tests/test_inspector.py       |  21 +--
 julearn/inspect/tests/test_pipeline.py        |  41 +++--
 julearn/inspect/tests/test_preprocess.py      |   9 +-
 .../model_selection/available_searchers.py    |   6 +-
 .../model_selection/stratified_bootstrap.py   |  12 +-
 julearn/models/dynamic.py                     |   2 +-
 julearn/models/tests/test_models.py           |  12 +-
 julearn/pipeline/pipeline_creator.py          |  43 +++--
 julearn/pipeline/target_pipeline.py           |   2 +-
 julearn/pipeline/tests/test_merger.py         |  29 ++--
 .../pipeline/tests/test_pipeline_creator.py   |   2 +-
 julearn/prepare.py                            |   6 +-
 julearn/scoring/available_scorers.py          |  18 ++-
 julearn/stats/corrected_ttest.py              |  12 +-
 julearn/stats/tests/test_corrected_ttest.py   |  27 +++-
 julearn/tests/test_api.py                     | 148 ++++++++++++------
 julearn/transformers/confound_remover.py      |   4 +-
 .../dataframe/tests/test_drop_columns.py      |   2 +-
 .../dataframe/tests/test_filter_columns.py    |   2 +-
 .../dataframe/tests/test_set_column_types.py  |  16 +-
 julearn/transformers/ju_column_transformer.py |   2 +-
 .../target/ju_transformed_target_model.py     |   2 +-
 .../target/target_confound_remover.py         |   2 +-
 .../tests/test_ju_transformed_target_model.py |   4 +-
 julearn/transformers/tests/test_cbpm.py       |   6 +-
 julearn/transformers/tests/test_confounds.py  |  23 ++-
 .../tests/test_jucolumntransformers.py        |  14 +-
 julearn/utils/checks.py                       |   4 +-
 julearn/utils/logging.py                      |   2 +-
 julearn/utils/testing.py                      |  32 ++--
 julearn/utils/typing.py                       |  58 ++++---
 julearn/viz/_scores.py                        |   9 +-
 pyproject.toml                                |  16 +-
 39 files changed, 454 insertions(+), 229 deletions(-)

diff --git a/julearn/api.py b/julearn/api.py
index b89b42ba7..039254dfc 100644
--- a/julearn/api.py
+++ b/julearn/api.py
@@ -4,13 +4,13 @@
 #          Sami Hamdan <s.hamdan@fz-juelich.de>
 # License: AGPL
 
-from typing import Dict, Iterable, List, Optional, Union
+from typing import Dict, List, Optional, Union
 
 import numpy as np
 import pandas as pd
+import sklearn
 from sklearn.base import BaseEstimator
 from sklearn.model_selection import (
-    BaseCrossValidator,
     check_cv,
     cross_validate,
 )
@@ -23,6 +23,7 @@
 from .prepare import check_consistency, prepare_input_data
 from .scoring import check_scoring
 from .utils import _compute_cvmdsum, logger, raise_error
+from .utils.typing import CVLike
 
 
 def run_cross_validation(  # noqa: C901
@@ -36,7 +37,7 @@ def run_cross_validation(  # noqa: C901
     return_estimator: Optional[str] = None,
     return_inspector: bool = False,
     return_train_score: bool = False,
-    cv: Optional[Union[int, BaseCrossValidator, Iterable]] = None,
+    cv: Optional[CVLike] = None,
     groups: Optional[str] = None,
     scoring: Union[str, List[str], None] = None,
     pos_labels: Union[str, List[str], None] = None,
@@ -357,20 +358,32 @@ def run_cross_validation(  # noqa: C901
 
     # Prepare cross validation
     cv_outer = check_cv(
-        cv, classifier=problem_type == "classification"  # type: ignore
+        cv,  # type: ignore
+        classifier=problem_type == "classification",
     )
     logger.info(f"Using outer CV scheme {cv_outer}")
 
     check_consistency(df_y, cv, groups, problem_type)  # type: ignore
 
     cv_return_estimator = return_estimator in ["cv", "all"]
-    scoring = check_scoring(pipeline, scoring, wrap_score=wrap_score)
+    scoring = check_scoring(
+        pipeline,  # type: ignore
+        scoring,
+        wrap_score=wrap_score,
+    )
 
     cv_mdsum = _compute_cvmdsum(cv_outer)
     fit_params = {}
     if df_groups is not None:
         if isinstance(pipeline, BaseSearchCV):
             fit_params["groups"] = df_groups.values
+
+    _sklearn_deprec_fit_params = {}
+    if sklearn.__version__ >= "1.4.0":
+        _sklearn_deprec_fit_params["params"] = fit_params
+    else:
+        _sklearn_deprec_fit_params["fit_params"] = fit_params
+
     scores = cross_validate(
         pipeline,
         df_X,
@@ -382,7 +395,7 @@ def run_cross_validation(  # noqa: C901
         n_jobs=n_jobs,
         return_train_score=return_train_score,
         verbose=verbose,  # type: ignore
-        fit_params=fit_params,
+        **_sklearn_deprec_fit_params,
     )
 
     n_repeats = getattr(cv_outer, "n_repeats", 1)
diff --git a/julearn/base/estimators.py b/julearn/base/estimators.py
index 7296e5927..e198c4874 100644
--- a/julearn/base/estimators.py
+++ b/julearn/base/estimators.py
@@ -13,11 +13,11 @@
 
 
 try:  # sklearn < 1.4.0
-    from sklearn.utils.validation import _check_fit_params
+    from sklearn.utils.validation import _check_fit_params  # type: ignore
 
     fit_params_checker = _check_fit_params
 except ImportError:  # sklearn >= 1.4.0
-    from sklearn.utils.validation import _check_method_params
+    from sklearn.utils.validation import _check_method_params  # type: ignore
 
     fit_params_checker = _check_method_params
 
@@ -180,7 +180,12 @@ def __init__(
         self.row_select_col_type = row_select_col_type
         self.row_select_vals = row_select_vals
 
-    def fit(self, X, y=None, **fit_params):  # noqa: N803
+    def fit(
+        self,
+        X: pd.DataFrame,  # noqa: N803
+        y: Optional[pd.Series] = None,
+        **fit_params,
+    ):
         """Fit the model.
 
         This method will fit the model using only the columns selected by
@@ -217,8 +222,21 @@ def fit(self, X, y=None, **fit_params):  # noqa: N803
             self.row_select_vals = [self.row_select_vals]
         return self._fit(**self._select_rows(X, y, **fit_params))
 
+    def _fit(
+        self,
+        X: pd.DataFrame,  # noqa: N803,
+        y: Optional[pd.Series],
+        **kwargs,
+    ) -> None:
+        raise_error(
+            "This method should be implemented in the concrete class",
+            klass=NotImplementedError,
+        )
+
     def _add_backed_filtered(
-        self, X: pd.DataFrame, X_trans: pd.DataFrame  # noqa: N803
+        self,
+        X: pd.DataFrame,  # noqa: N803
+        X_trans: pd.DataFrame,  # noqa: N803
     ) -> pd.DataFrame:
         """Add the left-out columns back to the transformed data.
 
@@ -301,7 +319,7 @@ def __init__(
 
     def fit(
         self,
-        X: pd.DataFrame,  # noqa: N803
+        X: DataLike,  # noqa: N803
         y: Optional[DataLike] = None,
         **fit_params: Any,
     ) -> "WrapModel":
@@ -312,7 +330,7 @@ def fit(
 
         Parameters
         ----------
-        X : pd.DataFrame
+        X : DataLike
             The data to fit the model on.
         y : DataLike, optional
             The target data (default is None).
@@ -329,9 +347,9 @@ def fit(
         if self.needed_types is not None:
             self.needed_types = ensure_column_types(self.needed_types)
 
-        Xt = self.filter_columns(X)
+        Xt = self.filter_columns(X)  # type: ignore
         self.model_ = self.model
-        self.model_.fit(Xt, y, **fit_params)
+        self.model_.fit(Xt, y, **fit_params)  # type: ignore
         return self
 
     def predict(self, X: pd.DataFrame) -> DataLike:  # noqa: N803
diff --git a/julearn/base/tests/test_base_estimators.py b/julearn/base/tests/test_base_estimators.py
index 1f85b2223..67cd546ca 100644
--- a/julearn/base/tests/test_base_estimators.py
+++ b/julearn/base/tests/test_base_estimators.py
@@ -110,7 +110,7 @@ def test_WrapModel(
 
     np.random.seed(42)
     lr = model()
-    lr.fit(X_iris_selected, y_iris)
+    lr.fit(X_iris_selected, y_iris)  # type: ignore
     pred_sk = lr.predict(X_iris_selected)
 
     np.random.seed(42)
diff --git a/julearn/inspect/_cv.py b/julearn/inspect/_cv.py
index c8ecded72..a22869493 100644
--- a/julearn/inspect/_cv.py
+++ b/julearn/inspect/_cv.py
@@ -4,13 +4,14 @@
 #          Sami Hamdan <s.hamdan@fz-juelich.de>
 # License: AGPL
 
-from typing import List, Optional, Union
+from typing import Optional, Union
 
 import pandas as pd
 from sklearn.model_selection import BaseCrossValidator, check_cv
 from sklearn.utils.metaestimators import available_if
 
 from ..utils import _compute_cvmdsum, is_nonoverlapping_cv, raise_error
+from ..utils.typing import DataLike
 from ._pipeline import PipelineInspector
 
 
@@ -60,14 +61,13 @@ class FoldsInspector:
     def __init__(
         self,
         scores: pd.DataFrame,
-        cv: BaseCrossValidator,
-        X: Union[str, List[str]],  # noqa: N803
-        y: str,
+        cv: Union[BaseCrossValidator, int],
+        X: DataLike,  # noqa: N803
+        y: pd.Series,
         func: str = "predict",
-        groups: Optional[str] = None,
+        groups: Optional[pd.Series] = None,
     ):
         self._scores = scores
-        self._cv = cv
         self._X = X
         self._y = y
         self._func = func
@@ -92,7 +92,7 @@ def __init__(
             )
 
         cv = check_cv(cv)
-
+        self._cv = cv
         t_cv_mdsum = _compute_cvmdsum(cv)
         if t_cv_mdsum != cv_mdsums[0]:
             raise_error(
@@ -120,10 +120,16 @@ def _get_predictions(self, func):
 
         predictions = []
         for i_fold, (_, test) in enumerate(
-            self._cv.split(self._X, self._y, groups=self._groups)
+            self._cv.split(
+                self._X,  # type: ignore
+                self._y,
+                groups=self._groups,
+            )
         ):
             t_model = self._scores["estimator"][i_fold]
-            t_values = getattr(t_model, func)(self._X.iloc[test])
+            t_values = getattr(t_model, func)(
+                self._X.iloc[test]  # type: ignore
+            )
             if t_values.ndim == 1:
                 t_values = t_values[:, None]
             column_names = [f"p{i}" for i in range(t_values.shape[1])]
@@ -152,7 +158,7 @@ def _get_predictions(self, func):
                 t_df.columns = [f"fold{i_fold}_{x}" for x in t_df.columns]
         predictions = pd.concat(predictions, axis=1)
         predictions = predictions.sort_index()
-        predictions["target"] = self._y.values
+        predictions["target"] = self._y.values  # type: ignore
         return predictions
 
     def __getitem__(self, key):
diff --git a/julearn/inspect/_preprocess.py b/julearn/inspect/_preprocess.py
index 38bd2f4bb..27e2a76ef 100644
--- a/julearn/inspect/_preprocess.py
+++ b/julearn/inspect/_preprocess.py
@@ -53,7 +53,7 @@ def preprocess(
         else:
             raise_error(f"No step named {until} found.")
     df_out = pipeline[:i].transform(_X)
-
+    df_out = df_out.copy()
     if not isinstance(df_out, pd.DataFrame) and with_column_types is False:
         raise_error(
             "The output of the pipeline is not a DataFrame. Cannot remove "
diff --git a/julearn/inspect/tests/test_cv.py b/julearn/inspect/tests/test_cv.py
index cced789ea..012d59f25 100644
--- a/julearn/inspect/tests/test_cv.py
+++ b/julearn/inspect/tests/test_cv.py
@@ -3,7 +3,6 @@
 # Authors: Federico Raimondo <f.raimondo@fz-juelich.de>
 #          Sami Hamdan <s.hamdan@fz-juelich.de>
 # License: AGPL
-
 import numpy as np
 import pandas as pd
 import pytest
@@ -70,7 +69,10 @@ def scores(df_typed_iris, n_iters=5, mock_model=None):
     if mock_model is None:
         mock_model = MockModelReturnsIndex
 
-    estimators = [WrapModel(mock_model()).fit(X, y) for _ in range(n_iters)]
+    estimators = [
+        WrapModel(mock_model()).fit(X, y)  # type: ignore
+        for _ in range(n_iters)
+    ]
 
     return pd.DataFrame(
         {
diff --git a/julearn/inspect/tests/test_inspector.py b/julearn/inspect/tests/test_inspector.py
index 6bd5d1487..8643cee1d 100644
--- a/julearn/inspect/tests/test_inspector.py
+++ b/julearn/inspect/tests/test_inspector.py
@@ -18,28 +18,28 @@
 
 def test_no_cv() -> None:
     """Test inspector with no cross-validation."""
-    inspector = Inspector({})
+    inspector = Inspector({})  # type: ignore
     with pytest.raises(ValueError, match="No cv"):
         _ = inspector.folds
 
 
 def test_no_X() -> None:
     """Test inspector with no features."""
-    inspector = Inspector({}, cv=5)
+    inspector = Inspector({}, cv=5)  # type: ignore
     with pytest.raises(ValueError, match="No X"):
         _ = inspector.folds
 
 
 def test_no_y() -> None:
     """Test inspector with no targets."""
-    inspector = Inspector({}, cv=5, X=[1, 2, 3])
+    inspector = Inspector({}, cv=5, X=[1, 2, 3])  # type: ignore
     with pytest.raises(ValueError, match="No y"):
         _ = inspector.folds
 
 
 def test_no_model() -> None:
     """Test inspector with no model."""
-    inspector = Inspector({})
+    inspector = Inspector({})  # type: ignore
     with pytest.raises(ValueError, match="No model"):
         _ = inspector.model
 
@@ -63,8 +63,11 @@ def test_normal_usage(df_iris: "pd.DataFrame") -> None:
         return_inspector=True,
         problem_type="classification",
     )
-    assert pipe == inspect.model._model
-    for (_, score), inspect_fold in zip(scores.iterrows(), inspect.folds):
+    assert pipe == inspect.model._model  # type: ignore
+    for (_, score), inspect_fold in zip(
+        scores.iterrows(),  # type: ignore
+        inspect.folds,  # type: ignore
+    ):
         assert score["estimator"] == inspect_fold.model._model
 
 
@@ -88,6 +91,6 @@ def test_normal_usage_with_search(df_iris: "pd.DataFrame") -> None:
         return_estimator="all",
         return_inspector=True,
     )
-    assert pipe == inspect.model._model
-    inspect.model.get_fitted_params()
-    inspect.model.get_params()
+    assert pipe == inspect.model._model  # type: ignore
+    inspect.model.get_fitted_params()  # type: ignore
+    inspect.model.get_params()  # type: ignore
diff --git a/julearn/inspect/tests/test_pipeline.py b/julearn/inspect/tests/test_pipeline.py
index afbe631c4..73b1d9690 100644
--- a/julearn/inspect/tests/test_pipeline.py
+++ b/julearn/inspect/tests/test_pipeline.py
@@ -4,8 +4,9 @@
 #          Sami Hamdan <s.hamdan@fz-juelich.de>
 # License: AGPL
 
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type
+from typing import Any, Dict, List, Optional, Type
 
+import pandas as pd
 import pytest
 from sklearn.base import BaseEstimator
 from sklearn.decomposition import PCA
@@ -17,10 +18,6 @@
 from julearn.transformers import JuColumnTransformer
 
 
-if TYPE_CHECKING:
-    import pandas as pd
-
-
 class MockTestEst(BaseEstimator):
     """Class for estimator tests.
 
@@ -39,8 +36,8 @@ def __init__(self, hype_0: int = 0, hype_1: int = 1) -> None:
 
     def fit(
         self,
-        X: List[str],  # noqa: N803
-        y: Optional[str] = None,
+        X: pd.DataFrame,  # noqa: N803
+        y: Optional[pd.Series] = None,
         **fit_params: Any,
     ) -> "MockTestEst":
         """Fit the estimator.
@@ -64,7 +61,7 @@ def fit(
         self.param_1_ = 1
         return self
 
-    def transform(self, X: List[str]) -> List[str]:  # noqa: N803
+    def transform(self, X: pd.DataFrame) -> pd.DataFrame:  # noqa: N803
         """Transform the estimator.
 
         Parameters
@@ -90,7 +87,7 @@ def transform(self, X: List[str]) -> List[str]:  # noqa: N803
         ["zscore", "pca", "svm"],
     ],
 )
-def test_get_stepnames(steps: List[str], df_iris: "pd.DataFrame") -> None:
+def test_get_stepnames(steps: List[str], df_iris: pd.DataFrame) -> None:
     """Test step names fetch.
 
     Parameters
@@ -157,7 +154,11 @@ def test_steps(
     [
         [MockTestEst(), {"param_0_": 0, "param_1_": 1}],
         [
-            JuColumnTransformer("test", MockTestEst(), "continuous"),
+            JuColumnTransformer(
+                "test",
+                MockTestEst(),  # type: ignore
+                "continuous",
+            ),
             {"param_0_": 0, "param_1_": 1},
         ],
     ],
@@ -201,8 +202,14 @@ def test_inspect_pipeline(df_iris: "pd.DataFrame") -> None:
 
     pipe = (
         PipelineCreator(problem_type="classification")
-        .add(JuColumnTransformer("test", MockTestEst(), "continuous"))
-        .add(SVC())
+        .add(
+            JuColumnTransformer(
+                "test",
+                MockTestEst(),  # type: ignore
+                "continuous",
+            )
+        )
+        .add(SVC())  # type: ignore TODO: fix typing hints
         .to_pipeline()
     )
     pipe.fit(df_iris.iloc[:, :-1], df_iris.species)
@@ -230,8 +237,14 @@ def test_get_estimator(df_iris: "pd.DataFrame") -> None:
     """
     pipe = (
         PipelineCreator(problem_type="classification")
-        .add(JuColumnTransformer("test", MockTestEst(), "continuous"))
-        .add(SVC())
+        .add(
+            JuColumnTransformer(
+                "test",
+                MockTestEst(),  # type: ignore
+                "continuous",
+            )
+        )
+        .add(SVC())  # type: ignore TODO: fix typing hints
         .to_pipeline()
     )
     pipe.fit(df_iris.iloc[:, :-1], df_iris.species)
diff --git a/julearn/inspect/tests/test_preprocess.py b/julearn/inspect/tests/test_preprocess.py
index ec281c519..df38c4c14 100644
--- a/julearn/inspect/tests/test_preprocess.py
+++ b/julearn/inspect/tests/test_preprocess.py
@@ -157,13 +157,20 @@ def test_preprocess_with_column_types(df_iris: pd.DataFrame) -> None:
 
     X = list(df_iris.iloc[:, :-1].columns)
     y = "species"
+    X_types = {"continuous": X}
     _, model = run_cross_validation(
         X=X,
         y=y,
         data=df_iris,
+        X_types=X_types,
         problem_type="classification",
         model="rf",
         return_estimator="final",
     )
-    X_t = preprocess(model, X=X, data=df_iris, with_column_types=False)
+    X_t = preprocess(
+        model,  # type: ignore
+        X=X,
+        data=df_iris,
+        with_column_types=False,
+    )
     assert list(X_t.columns) == X
diff --git a/julearn/model_selection/available_searchers.py b/julearn/model_selection/available_searchers.py
index 499ba2edc..9da12e601 100644
--- a/julearn/model_selection/available_searchers.py
+++ b/julearn/model_selection/available_searchers.py
@@ -46,7 +46,7 @@ def list_searchers() -> List[str]:
     return list(_available_searchers)
 
 
-def get_searcher(name: str) -> object:
+def get_searcher(name: str) -> Type:
     """Get a searcher by name.
 
     Parameters
@@ -56,8 +56,8 @@ def get_searcher(name: str) -> object:
 
     Returns
     -------
-    obj
-        scikit-learn compatible searcher.
+    out
+        scikit-learn compatible searcher class.
 
     Raises
     ------
diff --git a/julearn/model_selection/stratified_bootstrap.py b/julearn/model_selection/stratified_bootstrap.py
index 5206b532a..29f186328 100644
--- a/julearn/model_selection/stratified_bootstrap.py
+++ b/julearn/model_selection/stratified_bootstrap.py
@@ -9,7 +9,9 @@
 import numpy as np
 from numpy.random import RandomState
 from sklearn.model_selection import BaseShuffleSplit
-from sklearn.model_selection._split import _validate_shuffle_split
+from sklearn.model_selection._split import (
+    _validate_shuffle_split,  # type: ignore
+)
 
 
 class StratifiedBootstrap(BaseShuffleSplit):
@@ -87,13 +89,13 @@ def _iter_indices(
         n_samples = [
             _validate_shuffle_split(
                 len(t_inds),
-                self.test_size,
-                self.train_size,
-                default_test_size=self._default_test_size,
+                self.test_size,  # type: ignore
+                self.train_size,  # type: ignore
+                default_test_size=self._default_test_size,  # type: ignore
             )
             for t_inds in y_inds
         ]
-        for _ in range(self.n_splits):
+        for _ in range(self.n_splits):  # type: ignore
             train = []
             test = []
             for t_inds, (n_train, _) in zip(y_inds, n_samples):
diff --git a/julearn/models/dynamic.py b/julearn/models/dynamic.py
index 38b9eff6e..a04629c47 100644
--- a/julearn/models/dynamic.py
+++ b/julearn/models/dynamic.py
@@ -132,7 +132,7 @@ def fit(
                 y_train = y[train]
                 y_dsel = y[test]
 
-        self.ensemble.fit(X_train, y_train)
+        self.ensemble.fit(X_train, y_train)  # type: ignore
         self._dsmodel = self._get_algorithm()
         self._dsmodel.fit(X_dsel, y_dsel)
 
diff --git a/julearn/models/tests/test_models.py b/julearn/models/tests/test_models.py
index 870fca671..011986cc8 100644
--- a/julearn/models/tests/test_models.py
+++ b/julearn/models/tests/test_models.py
@@ -113,7 +113,7 @@ def test_naive_bayes_estimators(
         "preprocess": None,
         "problem_type": "classification",
     }
-    clf = make_pipeline(clone(t_model))
+    clf = make_pipeline(clone(t_model))  # type: ignore
     do_scoring_test(
         X,
         y,
@@ -129,7 +129,7 @@ def test_naive_bayes_estimators(
         "preprocess": None,
         "problem_type": "classification",
     }
-    clf = make_pipeline(clone(t_model))
+    clf = make_pipeline(clone(t_model))  # type: ignore
     do_scoring_test(
         X,
         y,
@@ -150,7 +150,7 @@ def test_naive_bayes_estimators(
             "preprocess": None,
             "problem_type": "classification",
         }
-        clf = make_pipeline(clone(t_model))
+        clf = make_pipeline(clone(t_model))  # type: ignore
         do_scoring_test(
             X,
             y,
@@ -236,7 +236,7 @@ def test_classificationestimators(
         "problem_type": "classification",
         "preprocess": "zscore",
     }
-    clf = make_pipeline(StandardScaler(), clone(t_model))
+    clf = make_pipeline(StandardScaler(), clone(t_model))  # type: ignore
     do_scoring_test(
         X,
         y,
@@ -258,7 +258,7 @@ def test_classificationestimators(
             "problem_type": "classification",
             "preprocess": "zscore",
         }
-        clf = make_pipeline(StandardScaler(), clone(t_model))
+        clf = make_pipeline(StandardScaler(), clone(t_model))  # type: ignore
         do_scoring_test(
             X,
             y,
@@ -333,7 +333,7 @@ def test_regression_estimators(
         "preprocess": "zscore",
         "problem_type": "regression",
     }
-    clf = make_pipeline(StandardScaler(), clone(t_model))
+    clf = make_pipeline(StandardScaler(), clone(t_model))  # type: ignore
     do_scoring_test(
         X,
         y,
diff --git a/julearn/pipeline/pipeline_creator.py b/julearn/pipeline/pipeline_creator.py
index 77f54189e..652e060d2 100644
--- a/julearn/pipeline/pipeline_creator.py
+++ b/julearn/pipeline/pipeline_creator.py
@@ -4,6 +4,7 @@
 #          Sami Hamdan <s.hamdan@fz-juelich.de>
 # License: AGPL
 
+import typing
 from dataclasses import dataclass, field
 from typing import Any, Dict, List, Optional, Tuple, Union
 
@@ -216,7 +217,7 @@ def add(
                     "TargetPipelineCreator can only be added to the target."
                 )
             step = step.to_pipeline()  # type: ignore
-
+            step = typing.cast(JuTargetPipeline, step)
         # Validate the step
         self._validate_step(step, apply_to)
 
@@ -266,7 +267,7 @@ def add(
                 step, self.problem_type, **params_to_set
             )
         elif len(params_to_set) > 0:
-            step.set_params(**params_to_set)
+            step.set_params(**params_to_set)  # type: ignore
 
         # JuEstimators accept the apply_to parameter and return needed types
         if isinstance(step, JuEstimatorLike):
@@ -301,7 +302,7 @@ def add(
         self._steps.append(
             Step(
                 name=name,
-                estimator=step,
+                estimator=step,  # type: ignore
                 apply_to=apply_to,
                 needed_types=needed_types,
                 params_to_tune=params_to_tune,
@@ -480,12 +481,12 @@ def to_pipeline(
         X_types = self._check_X_types(X_types)
         model_step = self._steps[-1]
 
-        target_transformer_step = None
+        target_trans_step = None
         transformer_steps = []
 
         for _step in self._steps[:-1]:
             if "target" in _step.apply_to:
-                target_transformer_step = _step
+                target_trans_step = _step
             else:
                 transformer_steps.append(_step)
 
@@ -543,12 +544,12 @@ def to_pipeline(
             target_model_step = self._wrap_target_model(
                 model_name,
                 model_estimator,  # type: ignore
-                target_transformer_step,  # type: ignore
+                target_trans_step,  # type: ignore
             )
             target_step_to_tune = {
                 f"{model_name}_target_transform__transformer__{param}": val
                 for param, val in (
-                    target_transformer_step.params_to_tune.items()
+                    target_trans_step.params_to_tune.items()  # type: ignore
                 )
             }
             step_params_to_tune = {
@@ -563,7 +564,7 @@ def to_pipeline(
             params_to_tune.update(step_params_to_tune)
             pipeline_steps.append((model_name, model_estimator))
         pipeline = Pipeline(pipeline_steps).set_output(transform="pandas")
-
+        pipeline = typing.cast(Pipeline, pipeline)  # damn typing..
         # Deal with the Hyperparameter tuning
         out = _prepare_hyperparameter_tuning(
             params_to_tune, search_params, pipeline
@@ -573,7 +574,7 @@ def to_pipeline(
 
     @staticmethod
     def _wrap_target_model(
-        model_name: str, model: ModelLike, target_transformer_step: Step
+        model_name: str, model: ModelLike, target_trans_step: Step
     ) -> Tuple[str, JuTransformedTargetModel]:
         """Wrap the model in a JuTransformedTargetModel.
 
@@ -583,7 +584,7 @@ def _wrap_target_model(
             The name of the model
         model : ModelLike
             The model to wrap
-        target_transformer_step : Step
+        target_trans_step : Step
             The step with the target transformer.
 
         Returns
@@ -599,7 +600,7 @@ def _wrap_target_model(
             If the target transformer is not a JuTargetPipeline.
 
         """
-        transformer = target_transformer_step.estimator
+        transformer = target_trans_step.estimator
         if not isinstance(transformer, JuTargetPipeline):
             raise_error(
                 "The target transformer should be a JuTargetPipeline. "
@@ -639,7 +640,9 @@ def _validate_model_params(
                     )
 
     def _get_step_name(
-        self, name: Optional[str], step: Union[EstimatorLike, str]
+        self,
+        name: Optional[str],
+        step: Union[EstimatorLike, str, TargetPipelineCreator],
     ) -> str:
         """Get the name of a step, with a count if it is repeated.
 
@@ -657,7 +660,7 @@ def _get_step_name(
 
         """
         out = name
-        if name is None:
+        if out is None:
             name = (
                 step
                 if isinstance(step, str)
@@ -670,7 +673,9 @@ def _get_step_name(
         return out
 
     def _validate_step(
-        self, step: Union[EstimatorLike, str], apply_to: ColumnTypesLike
+        self,
+        step: Union[EstimatorLike, str, TargetPipelineCreator],
+        apply_to: ColumnTypesLike,
     ) -> None:
         """Validate a step.
 
@@ -689,7 +694,7 @@ def _validate_step(
             transformer.
 
         """
-        if self._is_transfromer_step(step):
+        if self._is_transformer_step(step):
             if self._added_model:
                 raise_error("Cannot add a transformer after adding a model")
             if self._added_target_transformer and not self._is_model_step(
@@ -778,7 +783,9 @@ def _check_X_types(
         return X_types
 
     @staticmethod
-    def _is_transfromer_step(step: Union[str, EstimatorLike]) -> bool:
+    def _is_transformer_step(
+        step: Union[str, EstimatorLike, TargetPipelineCreator]
+    ) -> bool:
         """Check if a step is a transformer."""
         if step in list_transformers():
             return True
@@ -787,7 +794,9 @@ def _is_transfromer_step(step: Union[str, EstimatorLike]) -> bool:
         return False
 
     @staticmethod
-    def _is_model_step(step: Union[EstimatorLike, str]) -> bool:
+    def _is_model_step(
+        step: Union[EstimatorLike, str, TargetPipelineCreator]
+    ) -> bool:
         """Check if a step is a model."""
         if step in list_models():
             return True
diff --git a/julearn/pipeline/target_pipeline.py b/julearn/pipeline/target_pipeline.py
index 415f34c6d..ff4cf5e8e 100644
--- a/julearn/pipeline/target_pipeline.py
+++ b/julearn/pipeline/target_pipeline.py
@@ -169,6 +169,6 @@ def needed_types(self):
         needed_types = []
         for _, t_step in self.steps:
             if getattr(t_step, "needed_types", None) is not None:
-                needed_types.extend(t_step.needed_types)
+                needed_types.extend(t_step.needed_types)  # type: ignore
         needed_types = set(needed_types)
         return needed_types if len(needed_types) > 0 else None
diff --git a/julearn/pipeline/tests/test_merger.py b/julearn/pipeline/tests/test_merger.py
index 96caaca17..54468ebf0 100644
--- a/julearn/pipeline/tests/test_merger.py
+++ b/julearn/pipeline/tests/test_merger.py
@@ -26,15 +26,15 @@ def test_merger_pipelines() -> None:
     pipe1 = creator1.to_pipeline()
     pipe2 = creator2.to_pipeline()
 
-    merged = merge_pipelines(pipe1, pipe2, search_params=None)
+    merged = merge_pipelines(pipe1, pipe2, search_params=None)  # type: ignore
 
     assert isinstance(merged, GridSearchCV)
-    assert isinstance(merged.estimator, Pipeline)
-    assert len(merged.estimator.named_steps) == 3
-    named_steps = list(merged.estimator.named_steps.keys())
+    assert isinstance(merged.estimator, Pipeline)  # type: ignore
+    assert len(merged.estimator.named_steps) == 3  # type: ignore
+    named_steps = list(merged.estimator.named_steps.keys())  # type: ignore
     assert "scaler" == named_steps[1]
     assert "rf" == named_steps[2]
-    assert len(merged.param_grid) == 2
+    assert len(merged.param_grid) == 2  # type: ignore
 
     search_params = {"kind": "random"}
     creator3 = PipelineCreator(problem_type="classification")
@@ -45,13 +45,16 @@ def test_merger_pipelines() -> None:
     merged = merge_pipelines(pipe1, pipe2, pipe3, search_params=search_params)
 
     assert isinstance(merged, RandomizedSearchCV)
-    assert isinstance(merged.estimator, Pipeline)
-    assert len(merged.estimator.named_steps) == 3
-    named_steps = list(merged.estimator.named_steps.keys())
+    assert isinstance(merged.estimator, Pipeline)  # type: ignore
+    assert len(merged.estimator.named_steps) == 3  # type: ignore
+    named_steps = list(merged.estimator.named_steps.keys())  # type: ignore
     assert "scaler" == named_steps[1]
     assert "rf" == named_steps[2]
-    assert len(merged.param_distributions) == 3
-    assert merged.param_distributions[-1]["rf__max_features"] == [2, 3, 7, 42]
+    assert len(merged.param_distributions) == 3  # type: ignore
+    assert (
+        merged.param_distributions[-1]["rf__max_features"]  # type: ignore
+        == [2, 3, 7, 42]
+    )
 
 
 def test_merger_errors() -> None:
@@ -68,7 +71,7 @@ def test_merger_errors() -> None:
     pipe2 = creator2.to_pipeline(search_params={"kind": "grid"})
 
     with pytest.raises(ValueError, match="Only pipelines and searchers"):
-        merge_pipelines(pipe1, SVC(), search_params=None)
+        merge_pipelines(pipe1, SVC(), search_params=None)  # type: ignore
 
     search_params = {"kind": "random"}
 
@@ -92,7 +95,7 @@ def test_merger_errors() -> None:
         ValueError,
         match="All searchers must use a pipeline.",
     ):
-        merge_pipelines(pipe1, pipe3, search_params=None)
+        merge_pipelines(pipe1, pipe3, search_params=None)  # type: ignore
 
     creator4 = PipelineCreator(problem_type="classification")
     creator4.add("scaler_robust", name="scaler", apply_to="continuous")
@@ -103,7 +106,7 @@ def test_merger_errors() -> None:
         ValueError,
         match="must have the same named steps.",
     ):
-        merge_pipelines(pipe1, pipe4, search_params=None)
+        merge_pipelines(pipe1, pipe4, search_params=None)  # type: ignore
 
     search_params = {"kind": "grid"}
     pipe5 = creator2.to_pipeline(search_params={"kind": "bayes"})
diff --git a/julearn/pipeline/tests/test_pipeline_creator.py b/julearn/pipeline/tests/test_pipeline_creator.py
index e431a6264..e299b1008 100644
--- a/julearn/pipeline/tests/test_pipeline_creator.py
+++ b/julearn/pipeline/tests/test_pipeline_creator.py
@@ -378,7 +378,7 @@ def test_hyperparameter_tuning_distributions_bayes(
     )
 
     assert isinstance(pipeline, BayesSearchCV)
-    _compare_param_grids(pipeline.search_spaces, param_grid)
+    _compare_param_grids(pipeline.search_spaces, param_grid)  # type: ignore
 
 
 @pytest.mark.parametrize(
diff --git a/julearn/prepare.py b/julearn/prepare.py
index 0f4359145..5001520f2 100644
--- a/julearn/prepare.py
+++ b/julearn/prepare.py
@@ -11,15 +11,12 @@
 import numpy as np
 import pandas as pd
 from sklearn.model_selection import (
-    BaseCrossValidator,
-    BaseShuffleSplit,
     GroupKFold,
     GroupShuffleSplit,
     LeaveOneGroupOut,
     LeavePGroupsOut,
     StratifiedGroupKFold,
 )
-from sklearn.model_selection._split import _RepeatedSplits
 
 from .config import get_config
 from .model_selection import (
@@ -27,6 +24,7 @@
     RepeatedContinuousStratifiedGroupKFold,
 )
 from .utils import logger, raise_error, warn_with_log
+from .utils.typing import CVLike
 
 
 def _validate_input_data_df(
@@ -335,7 +333,7 @@ def prepare_input_data(
 
 def check_consistency(
     y: pd.Series,
-    cv: Union[int, BaseCrossValidator, BaseShuffleSplit, _RepeatedSplits],
+    cv: CVLike,
     groups: Optional[pd.Series],
     problem_type: str,
 ) -> None:
diff --git a/julearn/scoring/available_scorers.py b/julearn/scoring/available_scorers.py
index 6254d5429..a1dca3c9f 100644
--- a/julearn/scoring/available_scorers.py
+++ b/julearn/scoring/available_scorers.py
@@ -10,7 +10,7 @@
 from typing import Callable, Dict, List, Optional, Union
 
 from sklearn.metrics import _scorer, get_scorer_names, make_scorer
-from sklearn.metrics._scorer import _check_multimetric_scoring
+from sklearn.metrics._scorer import _check_multimetric_scoring  # type: ignore
 from sklearn.metrics._scorer import check_scoring as sklearn_check_scoring
 
 from ..transformers.target.ju_transformed_target_model import (
@@ -29,7 +29,7 @@
 _extra_available_scorers_reset = deepcopy(_extra_available_scorers)
 
 
-def get_scorer(name: str) -> ScorerLike:
+def get_scorer(name: str) -> ScorerLike:  # type: ignore TODO: deprecate sklearn < 1.4.0
     """Get available scorer by name.
 
     Parameters
@@ -72,7 +72,9 @@ def list_scorers() -> List[str]:
 
 
 def register_scorer(
-    scorer_name: str, scorer: ScorerLike, overwrite: Optional[bool] = None
+    scorer_name: str,
+    scorer: ScorerLike,  # type: ignore TODO: deprecate sklearn < 1.4.0
+    overwrite: Optional[bool] = None,
 ) -> None:
     """Register a scorer, so that it can be accessed by name.
 
@@ -130,9 +132,9 @@ def reset_scorer_register():
 
 def check_scoring(
     estimator: EstimatorLike,
-    scoring: Union[ScorerLike, str, Callable, List[str], None],
+    scoring: Union[ScorerLike, str, Callable, List[str], None],  # type: ignore
     wrap_score: bool,
-) -> Union[None, ScorerLike, Callable, Dict[str, ScorerLike]]:
+) -> Union[None, ScorerLike, Callable, Dict[str, ScorerLike]]:  # type: ignore
     """Check the scoring.
 
     Parameters
@@ -152,7 +154,11 @@ def check_scoring(
         scoring = _extend_scorer(get_scorer(scoring), wrap_score)
     if callable(scoring):
         return _extend_scorer(
-            sklearn_check_scoring(estimator, scoring=scoring), wrap_score
+            sklearn_check_scoring(
+                estimator,  # type: ignore
+                scoring=scoring,
+            ),
+            wrap_score,
         )
     if isinstance(scoring, list):
         scorer_names = typing.cast(List[str], scoring)
diff --git a/julearn/stats/corrected_ttest.py b/julearn/stats/corrected_ttest.py
index 4b7040967..d4d7fbf53 100644
--- a/julearn/stats/corrected_ttest.py
+++ b/julearn/stats/corrected_ttest.py
@@ -5,6 +5,7 @@
 #         Federico Raimondo <f.raimondo@fz-juelich.de>
 # License: BSD 3 clause
 
+import typing
 from itertools import combinations
 from typing import Optional, Tuple
 
@@ -18,7 +19,7 @@
 
 
 def _corrected_std(
-    differences: np.ndarray, n_train: int, n_test: int
+    differences: pd.DataFrame, n_train: int, n_test: int
 ) -> float:
     """Corrects standard deviation using Nadeau and Bengio's approach.
 
@@ -48,12 +49,12 @@ def _corrected_std(
 
 
 def _compute_corrected_ttest(
-    differences: np.ndarray,
+    differences: pd.DataFrame,
     n_train: int,
     n_test: int,
     df: Optional[int] = None,
     alternative: str = "two-sided",
-) -> Tuple[float, float]:
+) -> Tuple[pd.Series, pd.Series]:
     """Compute paired t-test with corrected variance.
 
     Parameters
@@ -167,12 +168,15 @@ def corrected_ttest(
         n_train = i_scores["n_train"].values
         n_test = i_scores["n_test"].values
 
+        n_train = typing.cast(np.ndarray, n_train)
+        n_test = typing.cast(np.ndarray, n_test)
+
         if np.unique(n_train).size > 1:
             warn_with_log(
                 "The training set sizes are not the same. Will use a rounded "
                 "average."
             )
-            n_train = int(np.mean(n_train).round())
+            n_train = int(np.mean(n_train).round())  # type: ignore
         else:
             n_train = n_train[0]
 
diff --git a/julearn/stats/tests/test_corrected_ttest.py b/julearn/stats/tests/test_corrected_ttest.py
index 9aec2fd18..221c240a1 100644
--- a/julearn/stats/tests/test_corrected_ttest.py
+++ b/julearn/stats/tests/test_corrected_ttest.py
@@ -21,27 +21,44 @@ def test__compute_corrected_ttest_alternatives():
     rvs1 = stats.norm.rvs(loc=0.5, scale=0.2, size=20, random_state=42)
     rvs2 = stats.norm.rvs(loc=0.51, scale=0.2, size=20, random_state=45)
     rvs3 = stats.norm.rvs(loc=0.9, scale=0.2, size=20, random_state=50)
-    _, p1 = _compute_corrected_ttest(rvs1 - rvs2, n_train=70, n_test=30)
+    _, p1 = _compute_corrected_ttest(
+        rvs1 - rvs2,  # type: ignore
+        n_train=70,
+        n_test=30,
+    )
 
     assert p1 > 0.7
 
-    _, p2 = _compute_corrected_ttest(rvs1 - rvs3, n_train=70, n_test=30)
+    _, p2 = _compute_corrected_ttest(
+        rvs1 - rvs3,  # type: ignore
+        n_train=70,
+        n_test=30,
+    )
 
     assert p2 < 0.1
 
     _, p3 = _compute_corrected_ttest(
-        rvs1 - rvs3, n_train=70, n_test=30, alternative="less"
+        rvs1 - rvs3,  # type: ignore
+        n_train=70,
+        n_test=30,
+        alternative="less",
     )
     assert p3 < 0.05  # rvs1 is less than rvs3
 
     _, p4 = _compute_corrected_ttest(
-        rvs1 - rvs3, n_train=70, n_test=30, alternative="greater"
+        rvs1 - rvs3,  # type: ignore
+        n_train=70,
+        n_test=30,
+        alternative="greater",
     )
     assert p4 > 0.90  # rvs1 is less than rvs3, so this should be high
 
     with pytest.raises(ValueError, match="Invalid alternative"):
         _compute_corrected_ttest(
-            rvs1 - rvs3, n_train=70, n_test=30, alternative="not_valid"
+            rvs1 - rvs3,  # type: ignore
+            n_train=70,
+            n_test=30,
+            alternative="not_valid",
         )
 
 
diff --git a/julearn/tests/test_api.py b/julearn/tests/test_api.py
index c12300388..5f9def69b 100644
--- a/julearn/tests/test_api.py
+++ b/julearn/tests/test_api.py
@@ -79,7 +79,7 @@ def test_run_cv_simple_binary(
 
     # now let"s try target-dependent scores
     scorers = ["recall", "precision", "f1"]
-    sk_y = (df_iris[y].values == "virginica").astype(int)
+    sk_y = (df_iris[y].values == "virginica").astype(int)  # type: ignore
 
     model = PipelineCreator(apply_to="features", problem_type="classification")
     model.add("svm")
@@ -95,7 +95,7 @@ def test_run_cv_simple_binary(
         data=df_iris,
         api_params=api_params,
         X_types=X_types,
-        sklearn_model=sklearn_model,
+        sklearn_model=sklearn_model,  # type: ignore
         scorers=scorers,
         sk_y=sk_y,
     )
@@ -103,7 +103,7 @@ def test_run_cv_simple_binary(
     # now let"s try proba-dependent scores
     X = ["sepal_length", "petal_length"]
     scorers = ["accuracy", "roc_auc"]
-    sk_y = (df_iris[y].values == "virginica").astype(int)
+    sk_y = (df_iris[y].values == "virginica").astype(int)  # type: ignore
     with pytest.warns(RuntimeWarning, match="treated as continuous"):
         api_params = {
             "model": "svm",
@@ -126,7 +126,7 @@ def test_run_cv_simple_binary(
     # e.g. svm with probability=False
     X = ["sepal_length", "petal_length"]
     scorers = ["accuracy", "roc_auc"]
-    sk_y = (df_iris[y].values == "virginica").astype(int)
+    sk_y = (df_iris[y].values == "virginica").astype(int)  # type: ignore
     with pytest.warns(RuntimeWarning, match="treated as continuous"):
         api_params = {
             "model": "svm",
@@ -284,7 +284,7 @@ def test_run_cv_errors(df_iris: pd.DataFrame) -> None:
             y=y,
             data=df_iris,
             X_types=X_types,
-            model=model,
+            model=model,  # type: ignore
         )
 
     model = "svm"
@@ -305,7 +305,7 @@ def test_run_cv_errors(df_iris: pd.DataFrame) -> None:
             data=df_iris,
             X_types=X_types,
             model=model,
-            preprocess=2,
+            preprocess=2,  # type: ignore
             problem_type="classification",
         )
 
@@ -440,19 +440,35 @@ def test_tune_hyperparam_gridsearch(df_iris: pd.DataFrame) -> None:
     cv_inner = RepeatedKFold(n_splits=2, n_repeats=1)
 
     clf = make_pipeline(SVC())
-    gs = GridSearchCV(clf, {"svc__C": [0.01, 0.001]}, cv=cv_inner)
+    gs = GridSearchCV(
+        clf,
+        {"svc__C": [0.01, 0.001]},
+        cv=cv_inner,  # type: ignore
+    )
 
-    expected = cross_validate(gs, sk_X, sk_y, cv=cv_outer, scoring=[scoring])
+    expected = cross_validate(
+        gs,
+        sk_X,
+        sk_y,  # type: ignore
+        cv=cv_outer,  # type: ignore
+        scoring=[scoring],
+    )
 
-    assert len(actual.columns) == len(expected) + 5
-    assert len(actual["test_accuracy"]) == len(expected["test_accuracy"])
+    assert len(actual.columns) == len(expected) + 5  # type: ignore
+    assert (
+        len(actual["test_accuracy"])  # type: ignore
+        == len(expected["test_accuracy"])
+    )
     assert all(
         a == b
-        for a, b in zip(actual["test_accuracy"], expected["test_accuracy"])
+        for a, b in zip(
+            actual["test_accuracy"],  # type: ignore
+            expected["test_accuracy"],
+        )
     )
 
     # Compare the models
-    clf1 = actual_estimator.best_estimator_.steps[-1][1]
+    clf1 = actual_estimator.best_estimator_.steps[-1][1]  # type: ignore
     clf2 = clone(gs).fit(sk_X, sk_y).best_estimator_.steps[-1][1]
     compare_models(clf1, clf2)
 
@@ -516,22 +532,28 @@ def test_tune_hyperparam_gridsearch_groups(df_iris: pd.DataFrame) -> None:
     expected = cross_validate(
         gs,
         sk_X,
-        sk_y,
+        sk_y,  # type: ignore
         cv=cv_outer,
         scoring=[scoring],
-        groups=sk_groups,
+        groups=sk_groups,  # type: ignore
         fit_params={"groups": sk_groups},
     )
 
-    assert len(actual.columns) == len(expected) + 5
-    assert len(actual["test_accuracy"]) == len(expected["test_accuracy"])
+    assert len(actual.columns) == len(expected) + 5  # type: ignore
+    assert (
+        len(actual["test_accuracy"])  # type: ignore
+        == len(expected["test_accuracy"])
+    )
     assert all(
         a == b
-        for a, b in zip(actual["test_accuracy"], expected["test_accuracy"])
+        for a, b in zip(
+            actual["test_accuracy"],  # type: ignore
+            expected["test_accuracy"],
+        )
     )
 
     # Compare the models
-    clf1 = actual_estimator.best_estimator_.steps[-1][1]
+    clf1 = actual_estimator.best_estimator_.steps[-1][1]  # type: ignore
     clf2 = (
         clone(gs)
         .fit(sk_X, sk_y, groups=sk_groups)
@@ -593,20 +615,35 @@ def test_tune_hyperparam_randomsearch(df_iris: pd.DataFrame) -> None:
 
     clf = make_pipeline(SVC())
     gs = RandomizedSearchCV(
-        clf, {"svc__C": [0.01, 0.001]}, cv=cv_inner, n_iter=2
+        clf,
+        {"svc__C": [0.01, 0.001]},
+        cv=cv_inner,  # type: ignore
+        n_iter=2,
     )
 
-    expected = cross_validate(gs, sk_X, sk_y, cv=cv_outer, scoring=[scoring])
+    expected = cross_validate(
+        gs,
+        sk_X,
+        sk_y,  # type: ignore
+        cv=cv_outer,  # type: ignore
+        scoring=[scoring],
+    )
 
-    assert len(actual.columns) == len(expected) + 5
-    assert len(actual["test_accuracy"]) == len(expected["test_accuracy"])
+    assert len(actual.columns) == len(expected) + 5  # type: ignore
+    assert (
+        len(actual["test_accuracy"])  # type: ignore
+        == len(expected["test_accuracy"])
+    )
     assert all(
         a == b
-        for a, b in zip(actual["test_accuracy"], expected["test_accuracy"])
+        for a, b in zip(
+            actual["test_accuracy"],  # type: ignore
+            expected["test_accuracy"],
+        )
     )
 
     # Compare the models
-    clf1 = actual_estimator.best_estimator_.steps[-1][1]
+    clf1 = actual_estimator.best_estimator_.steps[-1][1]  # type: ignore
     clf2 = clone(gs).fit(sk_X, sk_y).best_estimator_.steps[-1][1]
     compare_models(clf1, clf2)
 
@@ -697,25 +734,43 @@ def test_tune_hyperparams_multiple_grid(df_iris: pd.DataFrame) -> None:
             "svc__C": [0.01, 0.1],
         },
     ]
-    gs = GridSearchCV(clf, grid, cv=cv_inner)
+    gs = GridSearchCV(clf, grid, cv=cv_inner)  # type: ignore
 
-    expected = cross_validate(gs, sk_X, sk_y, cv=cv_outer, scoring=[scoring])
+    expected = cross_validate(
+        gs,
+        sk_X,
+        sk_y,  # type: ignore
+        cv=cv_outer,  # type: ignore
+        scoring=[scoring],
+    )
 
-    assert len(actual1.columns) == len(expected) + 5
-    assert len(actual2.columns) == len(expected) + 5
-    assert len(actual1["test_accuracy"]) == len(expected["test_accuracy"])
-    assert len(actual2["test_accuracy"]) == len(expected["test_accuracy"])
+    assert len(actual1.columns) == len(expected) + 5  # type: ignore
+    assert len(actual2.columns) == len(expected) + 5  # type: ignore
+    assert (
+        len(actual1["test_accuracy"])  # type: ignore
+        == len(expected["test_accuracy"])
+    )
+    assert (
+        len(actual2["test_accuracy"])  # type: ignore
+        == len(expected["test_accuracy"])
+    )
     assert all(
         a == b
-        for a, b in zip(actual1["test_accuracy"], expected["test_accuracy"])
+        for a, b in zip(
+            actual1["test_accuracy"],  # type: ignore
+            expected["test_accuracy"],
+        )
     )
     assert all(
         a == b
-        for a, b in zip(actual2["test_accuracy"], expected["test_accuracy"])
+        for a, b in zip(
+            actual2["test_accuracy"],  # type: ignore
+            expected["test_accuracy"],
+        )
     )
     # Compare the models
-    clf1 = actual_estimator1.best_estimator_.steps[-1][1]
-    clf2 = actual_estimator2.best_estimator_.steps[-1][1]
+    clf1 = actual_estimator1.best_estimator_.steps[-1][1]  # type: ignore
+    clf2 = actual_estimator2.best_estimator_.steps[-1][1]  # type: ignore
     clf3 = clone(gs).fit(sk_X, sk_y).best_estimator_.steps[-1][1]
     compare_models(clf1, clf2)
     compare_models(clf1, clf3)
@@ -746,7 +801,7 @@ def test_return_estimators(df_iris: pd.DataFrame) -> None:
             model="svm",
             problem_type="classification",
             cv=cv,
-            return_estimator=True,
+            return_estimator=True,  # type: ignore
         )
 
     scores = run_cross_validation(
@@ -776,7 +831,7 @@ def test_return_estimators(df_iris: pd.DataFrame) -> None:
 
     assert isinstance(scores, pd.DataFrame)
     assert "estimator" not in scores
-    assert isinstance(final["svm"], SVC)
+    assert isinstance(final["svm"], SVC)  # type: ignore
 
     scores = run_cross_validation(
         X=X,
@@ -805,7 +860,7 @@ def test_return_estimators(df_iris: pd.DataFrame) -> None:
 
     assert isinstance(scores, pd.DataFrame)
     assert "estimator" in scores
-    assert isinstance(final["svm"], SVC)
+    assert isinstance(final["svm"], SVC)  # type: ignore
 
 
 def test_return_train_scores(df_iris: pd.DataFrame) -> None:
@@ -838,8 +893,8 @@ def test_return_train_scores(df_iris: pd.DataFrame) -> None:
     train_scores = [f"train_{s}" for s in scoring]
     test_scores = [f"test_{s}" for s in scoring]
 
-    assert all(s not in scores.columns for s in train_scores)
-    assert all(s in scores.columns for s in test_scores)
+    assert all(s not in scores.columns for s in train_scores)  # type: ignore
+    assert all(s in scores.columns for s in test_scores)  # type: ignore
 
     with pytest.warns(RuntimeWarning, match="treated as continuous"):
         scores = run_cross_validation(
@@ -856,8 +911,8 @@ def test_return_train_scores(df_iris: pd.DataFrame) -> None:
     train_scores = [f"train_{s}" for s in scoring]
     test_scores = [f"test_{s}" for s in scoring]
 
-    assert all(s in scores.columns for s in train_scores)
-    assert all(s in scores.columns for s in test_scores)
+    assert all(s in scores.columns for s in train_scores)  # type: ignore
+    assert all(s in scores.columns for s in test_scores)  # type: ignore
 
 
 @pytest.mark.parametrize(
@@ -1174,7 +1229,10 @@ def test_api_stacking_models() -> None:
 
     # The final model should be a stacking model im which the first estimator
     # is a grid search
-    assert isinstance(final.steps[1][1].model.estimators[0][1], GridSearchCV)
+    assert isinstance(
+        final.steps[1][1].model.estimators[0][1],  # type: ignore
+        GridSearchCV,
+    )
 
 
 def test_inspection_error(df_iris: pd.DataFrame) -> None:
@@ -1253,10 +1311,12 @@ def test_inspector_picklable(tmp_path: Path, df_iris: pd.DataFrame) -> None:
     X = ["sepal_length", "sepal_width", "petal_length"]
     y = "species"
     pickled_file = tmp_path / "inspector.joblib"
+    X_types = {"continuous": X}
     _, _, inspector = run_cross_validation(
         X=X,
         y=y,
         data=df_iris,
+        X_types=X_types,
         model="rf",
         problem_type="classification",
         return_estimator="all",
@@ -1289,9 +1349,7 @@ def test_tune_hyperparam_target(df_iris: pd.DataFrame) -> None:
     }
 
     target_pipeline = TargetPipelineCreator()
-    model = PipelineCreator(
-        problem_type="regression", apply_to="continuous"
-    )
+    model = PipelineCreator(problem_type="regression", apply_to="continuous")
     target_pipeline.add("confound_removal", confounds="confounds")
     model.add(target_pipeline, apply_to="target")
     model.add("svm", C=[1, 2])
diff --git a/julearn/transformers/confound_remover.py b/julearn/transformers/confound_remover.py
index ce6c8d093..c7d522c83 100644
--- a/julearn/transformers/confound_remover.py
+++ b/julearn/transformers/confound_remover.py
@@ -107,7 +107,7 @@ def _fit(
         self.support_mask_ = self.support_mask_.values
 
         def fit_confound_models(X: Scalar) -> ModelLike:  # noqa: N803
-            _model = clone(self.model_confound)
+            _model = clone(self.model_confound)  # type: ignore
             _model.fit(ser_confound.values, X)  # type: ignore
             return _model  # type: ignore
 
@@ -256,7 +256,7 @@ def _apply_threshold(self, residuals: pd.DataFrame) -> pd.DataFrame:
         if self.threshold is not None:
             # Accounting for correlated rounding errors for very small
             # residuals
-            residuals = residuals.applymap(
+            residuals = residuals.map(  # type: ignore
                 lambda x: 0 if abs(x) <= self.threshold else x
             )
         return residuals
diff --git a/julearn/transformers/dataframe/tests/test_drop_columns.py b/julearn/transformers/dataframe/tests/test_drop_columns.py
index 898d2059c..b8280bd40 100644
--- a/julearn/transformers/dataframe/tests/test_drop_columns.py
+++ b/julearn/transformers/dataframe/tests/test_drop_columns.py
@@ -45,4 +45,4 @@ def test_DropColumns() -> None:
         ),
         X_trans,
     )
-    assert all(support == [1, 1, 0, 0, 1, 1])
+    assert all(support == [1, 1, 0, 0, 1, 1])  # type: ignore
diff --git a/julearn/transformers/dataframe/tests/test_filter_columns.py b/julearn/transformers/dataframe/tests/test_filter_columns.py
index ff7e823ee..b0842c567 100644
--- a/julearn/transformers/dataframe/tests/test_filter_columns.py
+++ b/julearn/transformers/dataframe/tests/test_filter_columns.py
@@ -30,7 +30,7 @@ def test_FilterColumns() -> None:
         "a__:type:__continuous",
         "b__:type:__continuous",
     ]
-    filter.set_output(transform="pandas").fit(X_with_types)
+    filter.set_output(transform="pandas").fit(X_with_types)  # type: ignore
     X_expected = X_with_types.copy()[kept_columns]
     X_trans = filter.transform(X_with_types)
     assert isinstance(X_expected, pd.DataFrame)
diff --git a/julearn/transformers/dataframe/tests/test_set_column_types.py b/julearn/transformers/dataframe/tests/test_set_column_types.py
index e2e2a6798..e2895690c 100644
--- a/julearn/transformers/dataframe/tests/test_set_column_types.py
+++ b/julearn/transformers/dataframe/tests/test_set_column_types.py
@@ -13,7 +13,8 @@
 
 
 def test_SetColumnTypes(
-    X_iris: pd.DataFrame, X_types_iris: Optional[Dict]  # noqa: N803
+    X_iris: pd.DataFrame,  # noqa: N803
+    X_types_iris: Optional[Dict],  # noqa: N803
 ) -> None:
     """Test SetColumnTypes.
 
@@ -40,8 +41,8 @@ def test_SetColumnTypes(
         )
     )
     st = SetColumnTypes(X_types_iris).set_output(transform="pandas")
-    Xt = st.fit_transform(X_iris)
-    Xt_iris_with_types = st.fit_transform(X_iris_with_types)
+    Xt = st.fit_transform(X_iris)  # type: ignore
+    Xt_iris_with_types = st.fit_transform(X_iris_with_types)  # type: ignore
     assert_frame_equal(Xt, X_iris_with_types)
     assert_frame_equal(Xt_iris_with_types, X_iris_with_types)
 
@@ -64,7 +65,8 @@ def test_SetColumnTypes_input_validation(
 
 
 def test_SetColumnTypes_array(
-    X_iris: pd.DataFrame, X_types_iris: Optional[Dict]  # noqa:  N803
+    X_iris: pd.DataFrame,  # noqa: N803
+    X_types_iris: Optional[Dict],  # noqa:  N803
 ) -> None:
     """Test SetColumnTypes.
 
@@ -92,6 +94,8 @@ def test_SetColumnTypes_array(
     }
     X_iris_with_types.rename(columns=to_rename)
     st = SetColumnTypes(X_types_iris).set_output(transform="pandas")
-    Xt = st.fit_transform(X_iris.values)
-    Xt_iris_with_types = st.fit_transform(X_iris_with_types.values)
+    Xt = st.fit_transform(X_iris.values)  # type: ignore
+    Xt_iris_with_types = st.fit_transform(  # type: ignore
+        X_iris_with_types.values
+    )
     assert_frame_equal(Xt, Xt_iris_with_types)
diff --git a/julearn/transformers/ju_column_transformer.py b/julearn/transformers/ju_column_transformer.py
index 07034b916..d4c0537d3 100644
--- a/julearn/transformers/ju_column_transformer.py
+++ b/julearn/transformers/ju_column_transformer.py
@@ -154,7 +154,7 @@ def get_feature_names_out(
                 klass=ValueError,
                 exception=e,
             )
-        if self.column_transformer_.verbose_feature_names_out:
+        if self.column_transformer_.verbose_feature_names_out:  # type: ignore
             out = [
                 x.replace("remainder__", "") if "remainder__" in x else x
                 for x in out
diff --git a/julearn/transformers/target/ju_transformed_target_model.py b/julearn/transformers/target/ju_transformed_target_model.py
index 41649038b..32c8aa79b 100644
--- a/julearn/transformers/target/ju_transformed_target_model.py
+++ b/julearn/transformers/target/ju_transformed_target_model.py
@@ -96,7 +96,7 @@ def fit(
 
         """
         y = self.transformer.fit_transform(X, y)
-        self.model_ = clone(self.model)
+        self.model_ = clone(self.model)  # type: ignore
         self.model_.fit(X, y, **fit_params)  # type: ignore
         return self
 
diff --git a/julearn/transformers/target/target_confound_remover.py b/julearn/transformers/target/target_confound_remover.py
index 67a209444..8b102de60 100644
--- a/julearn/transformers/target/target_confound_remover.py
+++ b/julearn/transformers/target/target_confound_remover.py
@@ -71,7 +71,7 @@ def fit(
             The fitted target confound remover.
 
         """
-        self.model_confounds_ = clone(self.model_confound)
+        self.model_confounds_ = clone(self.model_confound)  # type: ignore
         self.detected_confounds_ = self.confounds.to_type_selector()(X)
         X_confounds = X.loc[:, self.detected_confounds_]
         self.model_confounds_.fit(X_confounds.values, y)  # type: ignore
diff --git a/julearn/transformers/target/tests/test_ju_transformed_target_model.py b/julearn/transformers/target/tests/test_ju_transformed_target_model.py
index 727797479..15dc8a1d0 100644
--- a/julearn/transformers/target/tests/test_ju_transformed_target_model.py
+++ b/julearn/transformers/target/tests/test_ju_transformed_target_model.py
@@ -38,7 +38,9 @@ def test_JuTransformedTargetModel(
     y_scaled = scaler_sk.fit_transform(y_iris.values[:, None])[:, 0]
     model_sk.fit(X_iris, y_scaled)
     y_pred_sk = model_sk.predict(X_iris)
-    y_inverse_sk = scaler_sk.inverse_transform(y_pred_sk[:, None])[:, 0]
+    y_inverse_sk = scaler_sk.inverse_transform(
+        y_pred_sk[:, None]  # type: ignore
+    )[:, 0]
     assert_array_equal(y_pred, y_inverse_sk)
 
 
diff --git a/julearn/transformers/tests/test_cbpm.py b/julearn/transformers/tests/test_cbpm.py
index ddd195af4..7126e51e2 100644
--- a/julearn/transformers/tests/test_cbpm.py
+++ b/julearn/transformers/tests/test_cbpm.py
@@ -278,7 +278,7 @@ def test_CBPM_set_output_posneg(
     trans_posneg = (
         CBPM(corr_method=spearmanr, agg_method=np.mean, corr_sign="posneg")
         .set_output(transform="pandas")
-        .fit_transform(X_iris, y_iris)
+        .fit_transform(X_iris, y_iris)  # type: ignore
     )
 
     trans_man_pos = X_iris[X_pos].values.mean(axis=1)
@@ -311,7 +311,7 @@ def test_CBPM_set_output_pos(
     trans_pos = (
         CBPM(corr_method=spearmanr, agg_method=np.mean, corr_sign="pos")
         .set_output(transform="pandas")
-        .fit_transform(X_iris, y_iris)
+        .fit_transform(X_iris, y_iris)  # type: ignore
     )
 
     trans_man_pos = X_iris[X_pos].values.mean(axis=1)
@@ -340,7 +340,7 @@ def test_CBPM_set_output_neg(
     trans_neg = (
         CBPM(corr_method=spearmanr, agg_method=np.mean, corr_sign="neg")
         .set_output(transform="pandas")
-        .fit_transform(X_iris, y_iris)
+        .fit_transform(X_iris, y_iris)  # type: ignore
     )
 
     trans_man_neg = X_iris[X_neg].values.mean(axis=1)
diff --git a/julearn/transformers/tests/test_confounds.py b/julearn/transformers/tests/test_confounds.py
index 3de4793f2..b1bd00edb 100644
--- a/julearn/transformers/tests/test_confounds.py
+++ b/julearn/transformers/tests/test_confounds.py
@@ -167,10 +167,14 @@ def test_ConfoundRemover_confound_auto_find_conf(
 
     # After confound removal the confound should be removed
     assert (
-        df_cofound_removed.columns == df_X.drop(columns=confounds).columns
+        df_cofound_removed.columns  # type: ignore
+        == df_X.drop(columns=confounds).columns
     ).all()
 
-    assert_frame_equal(df_cofound_removed, df_confound_removed_manual)
+    assert_frame_equal(
+        df_cofound_removed,  # type: ignore
+        df_confound_removed_manual,
+    )
 
 
 @pytest.mark.parametrize(
@@ -216,7 +220,8 @@ def test_confound_set_confounds(
     df_confounds = df_X_confounds.loc[:, conf_as_feat]  # type: ignore
     confound_regressions = [
         model_class().fit(
-            df_confounds, df_X_confounds.loc[:, feature]  # type: ignore
+            df_confounds,
+            df_X_confounds.loc[:, feature],  # type: ignore
         )
         for feature in features
     ]
@@ -244,11 +249,14 @@ def test_confound_set_confounds(
     )
     # After confound removal the confound should be removed
     assert (
-        df_cofound_removed.columns
+        df_cofound_removed.columns  # type: ignore
         == df_X_confounds.drop(columns=confounds).columns
     ).all()
 
-    assert_frame_equal(df_cofound_removed, df_confound_removed_manual)
+    assert_frame_equal(
+        df_cofound_removed,  # type: ignore
+        df_confound_removed_manual,
+    )
 
 
 def test_return_confound(df_X_confounds: pd.DataFrame) -> None:  # noqa: N803
@@ -264,7 +272,10 @@ def test_return_confound(df_X_confounds: pd.DataFrame) -> None:  # noqa: N803
         apply_to=["categorical", "continuous"], keep_confounds=True
     )
     X_trans = remover.fit_transform(df_X_confounds)
-    assert_array_equal(X_trans.columns, df_X_confounds.columns)
+    assert_array_equal(
+        X_trans.columns,  # type: ignore
+        df_X_confounds.columns,
+    )
 
 
 def test_no_confound_found() -> None:
diff --git a/julearn/transformers/tests/test_jucolumntransformers.py b/julearn/transformers/tests/test_jucolumntransformers.py
index 4c4ca83b9..7bf8309fa 100644
--- a/julearn/transformers/tests/test_jucolumntransformers.py
+++ b/julearn/transformers/tests/test_jucolumntransformers.py
@@ -123,7 +123,7 @@ def test_JuColumnTransformer_row_select():
 
     transformer_healthy = JuColumnTransformer(
         name="zscore",
-        transformer=StandardScaler(),
+        transformer=StandardScaler(),  # type: ignore
         apply_to="continuous",
         row_select_col_type=["healthy"],
         row_select_vals=1,
@@ -131,7 +131,7 @@ def test_JuColumnTransformer_row_select():
 
     transformer_unhealthy = JuColumnTransformer(
         name="zscore",
-        transformer=StandardScaler(),
+        transformer=StandardScaler(),  # type: ignore
         apply_to="continuous",
         row_select_col_type=["healthy"],
         row_select_vals=0,
@@ -139,24 +139,26 @@ def test_JuColumnTransformer_row_select():
 
     transformer_both = JuColumnTransformer(
         name="zscore",
-        transformer=StandardScaler(),
+        transformer=StandardScaler(),  # type: ignore
         apply_to="continuous",
         row_select_col_type=["healthy"],
         row_select_vals=[0, 1],
     )
     mean_healthy = (
         transformer_healthy.fit(X)
-        .column_transformer_.transformers_[0][1]
+        .column_transformer_.transformers_[0][1]  # type: ignore
         .mean_
     )
     mean_unhealthy = (
         transformer_unhealthy.fit(X)
-        .column_transformer_.transformers_[0][1]
+        .column_transformer_.transformers_[0][1]  # type: ignore
         .mean_
     )
 
     mean_both = (
-        transformer_both.fit(X).column_transformer_.transformers_[0][1].mean_
+        transformer_both.fit(
+            X
+        ).column_transformer_.transformers_[0][1].mean_  # type: ignore
     )
 
     assert_almost_equal(
diff --git a/julearn/utils/checks.py b/julearn/utils/checks.py
index 14f7207ff..919e796dc 100644
--- a/julearn/utils/checks.py
+++ b/julearn/utils/checks.py
@@ -2,6 +2,8 @@
 # Author: Federico Raimondo <f.raimondo@fz-juelich.de>
 # License: BSD 3 clause
 
+from typing import List
+
 import numpy as np
 import pandas as pd
 
@@ -10,7 +12,7 @@
 
 def check_scores_df(
     *scores: pd.DataFrame, same_cv: bool = False
-) -> pd.DataFrame:
+) -> List[pd.DataFrame]:
     """Check the output of `run_cross_validation`.
 
     Parameters
diff --git a/julearn/utils/logging.py b/julearn/utils/logging.py
index 094d6c179..c7c7516e9 100644
--- a/julearn/utils/logging.py
+++ b/julearn/utils/logging.py
@@ -223,7 +223,7 @@ def raise_error(
 
 
 def warn_with_log(
-    msg: str, category: Optional[Type[Warning]] = RuntimeWarning
+    msg: str, category: Type[Warning] = RuntimeWarning
 ) -> None:
     """Warn, but first log it.
 
diff --git a/julearn/utils/testing.py b/julearn/utils/testing.py
index 5c6145098..7afd9d2ab 100644
--- a/julearn/utils/testing.py
+++ b/julearn/utils/testing.py
@@ -5,7 +5,7 @@
 # License: AGPL
 
 import warnings
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 
 import numpy as np
 import pandas as pd
@@ -38,7 +38,7 @@
     SGDClassifier,
     SGDRegressor,
 )
-from sklearn.model_selection import KFold, cross_validate
+from sklearn.model_selection import BaseCrossValidator, KFold, cross_validate
 from sklearn.naive_bayes import (
     BernoulliNB,
     CategoricalNB,
@@ -51,7 +51,7 @@
 
 from julearn import run_cross_validation
 from julearn.base import WrapModel
-from julearn.utils.typing import DataLike, EstimatorLike
+from julearn.utils.typing import DataLike, EstimatorLike, ModelLike
 
 
 def compare_models(  # noqa: C901, pragma: no cover
@@ -108,7 +108,8 @@ def compare_models(  # noqa: C901, pragma: no cover
             assert clf1.strategy == clf2.strategy  # type: ignore
         if hasattr(clf1, "class_prior_"):
             assert_array_equal(
-                clf1.class_prior_, clf2.class_prior_  # type: ignore
+                clf1.class_prior_,  # type: ignore
+                clf2.class_prior_,  # type: ignore
             )
         if hasattr(clf1, "constant_"):
             assert clf1.constant_ == clf2.constant_  # type: ignore
@@ -180,11 +181,11 @@ def do_scoring_test(
     y: str,
     data: pd.DataFrame,
     api_params: Dict[str, Any],
-    sklearn_model: EstimatorLike,
+    sklearn_model: Union[EstimatorLike, ModelLike, Any],  # TODO: fix
     scorers: List[str],
     groups: Optional[str] = None,
     X_types: Optional[Dict[str, List[str]]] = None,  # noqa: N803
-    cv: int = 5,
+    cv: Union[int, BaseCrossValidator]  = 5,
     sk_y: Optional[np.ndarray] = None,
     decimal: int = 5,
 ):
@@ -245,7 +246,12 @@ def do_scoring_test(
 
     np.random.seed(42)
     expected = cross_validate(
-        sklearn_model, sk_X, sk_y, cv=sk_cv, scoring=scorers, groups=sk_groups
+        sklearn_model,  # type: ignore
+        sk_X,
+        sk_y,
+        cv=sk_cv,
+        scoring=scorers,
+        groups=sk_groups,  # type: ignore
     )
 
     # Compare the models
@@ -257,8 +263,8 @@ def do_scoring_test(
     if isinstance(sklearn_model, Pipeline):
         clf2 = clone(sklearn_model).fit(sk_X, sk_y).steps[-1][1]
     else:
-        clf2 = clone(sklearn_model).fit(sk_X, sk_y)
-    compare_models(clf1, clf2)
+        clf2 = clone(sklearn_model).fit(sk_X, sk_y)  # type: ignore
+    compare_models(clf1, clf2)  # type: ignore
 
     if decimal > 0:
         for scoring in scorers:
@@ -266,7 +272,9 @@ def do_scoring_test(
             assert len(actual.columns) == len(expected) + 5  # type: ignore
             assert len(actual[s_key]) == len(expected[s_key])  # type: ignore
             assert_array_almost_equal(
-                actual[s_key], expected[s_key], decimal=decimal  # type: ignore
+                actual[s_key],  # type: ignore
+                expected[s_key],
+                decimal=decimal,  # type: ignore
             )
 
 
@@ -277,7 +285,9 @@ def __init__(self):
         pass
 
     def fit(
-        self, X: DataLike, y: Optional[DataLike] = None  # noqa: N803
+        self,
+        X: DataLike,  # noqa: N803
+        y: Optional[DataLike] = None,
     ) -> "PassThroughTransformer":
         """Fit the transformer.
 
diff --git a/julearn/utils/typing.py b/julearn/utils/typing.py
index eb886b865..ed0f1a191 100644
--- a/julearn/utils/typing.py
+++ b/julearn/utils/typing.py
@@ -7,7 +7,7 @@
 from typing import (
     Any,
     Dict,
-    List,
+    Iterable,
     Optional,
     Protocol,
     Union,
@@ -16,9 +16,16 @@
 
 import numpy as np
 import pandas as pd
+from numpy.typing import ArrayLike
+from sklearn.model_selection import BaseCrossValidator, BaseShuffleSplit
+from sklearn.model_selection._split import _RepeatedSplits
 
 
-try:  # sklearn < 1.4.0
+try:  # sklearn >= 1.4.0
+    from sklearn.metrics._scorer import _Scorer  # type: ignore
+
+    ScorerLike = Union[_Scorer, _Scorer]
+except ImportError:  # sklearn < 1.4.0
     from sklearn.metrics._scorer import (
         _PredictScorer,
         _ProbaScorer,
@@ -26,10 +33,6 @@
     )
 
     ScorerLike = Union[_ProbaScorer, _ThresholdScorer, _PredictScorer]
-except ImportError:  # sklearn >= 1.4.0
-    from sklearn.metrics._scorer import _Scorer
-
-    ScorerLike = _Scorer
 
 
 from ..base import ColumnTypes
@@ -43,15 +46,18 @@ class EstimatorLikeFit1(Protocol):
     """Class for estimator-like fit 1."""
 
     def fit(
-        self, X: List[str], y: str, **kwargs: Any  # noqa: N803
+        self,
+        X: DataLike,  # noqa: N803
+        y: pd.Series,
+        **kwargs: Any,
     ) -> "EstimatorLikeFit1":
         """Fit estimator.
 
         Parameters
         ----------
-        X : list of str
+        X : DataLike
             The features to use.
-        y : str
+        y : pd.Series
             The target to use.
         **kwargs : dict
             Extra keyword arguments.
@@ -101,15 +107,22 @@ def set_params(self, **params: Any) -> "EstimatorLikeFit1":
 class EstimatorLikeFit2(Protocol):
     """Class for estimator-like fit 2."""
 
-    def fit(self, X: List[str], y: str) -> "EstimatorLikeFit2":  # noqa: N803
+    def fit(
+        self,
+        X: DataLike,  # noqa: N803
+        y: ArrayLike,
+        **kwargs: Any,
+    ) -> "EstimatorLikeFit2":
         """Fit estimator.
 
         Parameters
         ----------
-        X : list of str
+        X : DataLike
             The features to use.
-        y : str
+        y : DataLike
             The target to use.
+        **kwargs : dict
+            Extra keyword arguments.
 
         Returns
         -------
@@ -156,12 +169,12 @@ def set_params(self, **params: Any) -> "EstimatorLikeFit2":
 class EstimatorLikeFity(Protocol):
     """Class for estimator-like fit y."""
 
-    def fit(self, y: str) -> "EstimatorLikeFity":
+    def fit(self, y: DataLike) -> "EstimatorLikeFity":
         """Fit estimator.
 
         Parameters
         ----------
-        y : str
+        y : DataLike
             The target to use.
 
         Returns
@@ -214,17 +227,17 @@ class TransformerLike(EstimatorLikeFit1, Protocol):
 
     def fit(
         self,
-        X: List[str],  # noqa: N803
-        y: Optional[str] = None,
+        X: DataLike,  # noqa: N803
+        y: DataLike,
         **fit_params: Any,
     ) -> None:
         """Fit transformer.
 
         Parameters
         ----------
-        X : list of str
+        X : DataLike
             The features to use.
-        y : str, optional
+        y : DataLike
             The target to use (default None).
         **fit_params : dict
             Fit parameters.
@@ -249,7 +262,9 @@ def transform(self, X: DataLike) -> DataLike:  # noqa: N803
         return X
 
     def fit_transform(
-        self, X: DataLike, y: Optional[DataLike] = None  # noqa: N803
+        self,
+        X: DataLike,  # noqa: N803
+        y: Optional[DataLike] = None,
     ) -> DataLike:
         """Fit and transform.
 
@@ -369,3 +384,8 @@ def get_apply_to(self) -> ColumnTypes:
 
         """
         return ColumnTypes("placeholder")
+
+
+CVLike = Union[
+    int, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit, Iterable
+]
diff --git a/julearn/viz/_scores.py b/julearn/viz/_scores.py
index d155d6caa..a1a99020f 100644
--- a/julearn/viz/_scores.py
+++ b/julearn/viz/_scores.py
@@ -2,6 +2,7 @@
 #          Sami Hamdan <s.hamdan@fz-juelich.de>
 # License: AGPL
 from pathlib import Path
+from typing import Sequence
 
 import pandas as pd
 import panel as pn
@@ -43,10 +44,10 @@ class _JulearnScoresViewer(param.Parameterized):
 
     """
 
-    metric = param.Selector([], default=None)
-    models = param.ListSelector(default=None, objects=[])
-    sets = param.ListSelector(default=None, objects=[])
-    show_stats = param.Boolean(False)
+    metric = param.Selector([], default=None)  # type: ignore
+    models: Sequence = param.ListSelector(default=None, objects=[])
+    sets: Sequence = param.ListSelector(default=None, objects=[])
+    show_stats: bool = param.Boolean(False)
     group_repeats = param.Selector(
         objects=["mean", "median", "no"], default="no"
     )
diff --git a/pyproject.toml b/pyproject.toml
index b6215fac8..320d256d0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -222,4 +222,18 @@ showcontent = true
 
 ## Configure pyright to ignore assigment types until scikit-learn stubs are updated
 [tool.pyright]
-reportAssignmentType = "none"
\ No newline at end of file
+reportAssignmentType = "none"
+exclude = [
+    "docs/auto_examples/",
+    "*.html",
+    ".git/",
+    "*.pyc,",
+    "*/_build/*",
+    "*/api/generated/*.examples",
+    "build/",
+    "examples/XX_disabled/",
+    ".tox",
+    ".eggs",
+    "examples/",  # Lots of problems due to bad stubs, avoid filling the example with # type:ignore
+    "scratch/",  # place to prototype, not to be checked
+]
\ No newline at end of file

From e27eb1fbdc0e71491e5223eb89716e92a9af0362 Mon Sep 17 00:00:00 2001
From: Fede <fraimondo@proton.me>
Date: Mon, 29 Apr 2024 18:15:45 +0300
Subject: [PATCH 2/2] Rollback pandas map to applymap until we remove support
 for py3.8

---
 julearn/transformers/confound_remover.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/julearn/transformers/confound_remover.py b/julearn/transformers/confound_remover.py
index c7d522c83..4a9327ebe 100644
--- a/julearn/transformers/confound_remover.py
+++ b/julearn/transformers/confound_remover.py
@@ -256,7 +256,7 @@ def _apply_threshold(self, residuals: pd.DataFrame) -> pd.DataFrame:
         if self.threshold is not None:
             # Accounting for correlated rounding errors for very small
             # residuals
-            residuals = residuals.map(  # type: ignore
+            residuals = residuals.applymap(  # type: ignore
                 lambda x: 0 if abs(x) <= self.threshold else x
             )
         return residuals