diff --git a/aeon/base/estimators/interval_based/base_interval_forest.py b/aeon/base/estimators/interval_based/base_interval_forest.py
index 69998e34e4..ac2c57d5a5 100644
--- a/aeon/base/estimators/interval_based/base_interval_forest.py
+++ b/aeon/base/estimators/interval_based/base_interval_forest.py
@@ -1154,6 +1154,10 @@ def temporal_importance_curves(
         curves : list of np.ndarray
             The temporal importance curves for each feature.
         """
+        if is_regressor(self):
+            raise NotImplementedError(
+                "Temporal importance curves are not available for regression."
+            )
         if not isinstance(self._base_estimator, ContinuousIntervalTree):
             raise ValueError(
                 "base_estimator for temporal importance curves must"
diff --git a/aeon/classification/interval_based/_drcif.py b/aeon/classification/interval_based/_drcif.py
index 90811f2539..64780842ef 100644
--- a/aeon/classification/interval_based/_drcif.py
+++ b/aeon/classification/interval_based/_drcif.py
@@ -196,10 +196,7 @@ def __init__(
         n_jobs=1,
         parallel_backend=None,
     ):
-        d = []
         self.use_pycatch22 = use_pycatch22
-        if use_pycatch22:
-            d.append("pycatch22")
 
         if isinstance(base_estimator, ContinuousIntervalTree):
             replace_nan = "nan"
@@ -241,8 +238,8 @@ def __init__(
             parallel_backend=parallel_backend,
         )
 
-        if d:
-            self.set_tags(**{"python_dependencies": d})
+        if use_pycatch22:
+            self.set_tags(**{"python_dependencies": "pycatch22"})
 
     def _fit(self, X, y):
         return super()._fit(X, y)
diff --git a/aeon/classification/interval_based/_interval_pipelines.py b/aeon/classification/interval_based/_interval_pipelines.py
index d29044e40b..8804f43aec 100644
--- a/aeon/classification/interval_based/_interval_pipelines.py
+++ b/aeon/classification/interval_based/_interval_pipelines.py
@@ -163,8 +163,8 @@ def _fit(self, X, y):
             ),
             self.random_state,
         )
-        m = hasattr(self._estimator, "n_jobs")
-        if m:
+
+        if hasattr(self._estimator, "n_jobs"):
             self._estimator.n_jobs = self._n_jobs
 
         X_t = self._transformer.fit_transform(X, y)
@@ -401,8 +401,8 @@ def _fit(self, X, y):
             ),
             self.random_state,
         )
-        m = hasattr(self._estimator, "n_jobs")
-        if m:
+
+        if hasattr(self._estimator, "n_jobs"):
             self._estimator.n_jobs = self._n_jobs
 
         X_t = self._transformer.fit_transform(X, y)
diff --git a/aeon/classification/interval_based/_rise.py b/aeon/classification/interval_based/_rise.py
index e17ce0ff7f..0542f39428 100644
--- a/aeon/classification/interval_based/_rise.py
+++ b/aeon/classification/interval_based/_rise.py
@@ -194,6 +194,14 @@ def _fit_predict(self, X, y) -> np.ndarray:
     def _fit_predict_proba(self, X, y) -> np.ndarray:
         return super()._fit_predict_proba(X, y)
 
+    def temporal_importance_curves(
+        self, return_dict=False, normalise_time_points=False
+    ):
+        raise NotImplementedError(
+            "No temporal importance curves available for "
+            "RandomIntervalSpectralEnsemble."
+        )
+
     @classmethod
     def _get_test_params(cls, parameter_set="default"):
         """Return testing parameter settings for the estimator.
diff --git a/aeon/classification/interval_based/tests/__init__.py b/aeon/classification/interval_based/tests/__init__.py
index 9a18fdcaf1..e9dc0b9e7e 100644
--- a/aeon/classification/interval_based/tests/__init__.py
+++ b/aeon/classification/interval_based/tests/__init__.py
@@ -1 +1 @@
-"""Tests for interval based classifiers."""
+"""Tests for interval-based classifiers."""
diff --git a/aeon/classification/interval_based/tests/test_cif.py b/aeon/classification/interval_based/tests/test_cif.py
deleted file mode 100644
index 3c1316d3a3..0000000000
--- a/aeon/classification/interval_based/tests/test_cif.py
+++ /dev/null
@@ -1,14 +0,0 @@
-"""Test the CIF classifier."""
-
-from aeon.classification.interval_based import CanonicalIntervalForestClassifier
-from aeon.classification.sklearn import ContinuousIntervalTree
-
-
-def test_cif():
-    """Test nans correct with ContinuousIntervalTree."""
-    cif = CanonicalIntervalForestClassifier(
-        base_estimator=ContinuousIntervalTree(), use_pycatch22=True
-    )
-    assert cif.replace_nan == "nan"
-    d = cif.get_tag("python_dependencies")
-    assert d == "pycatch22"
diff --git a/aeon/classification/interval_based/tests/test_dr_cif.py b/aeon/classification/interval_based/tests/test_dr_cif.py
deleted file mode 100644
index 6c4ed46ba7..0000000000
--- a/aeon/classification/interval_based/tests/test_dr_cif.py
+++ /dev/null
@@ -1,12 +0,0 @@
-"""Test the DrCIF classifier."""
-
-from aeon.classification.interval_based import DrCIFClassifier
-from aeon.classification.sklearn import ContinuousIntervalTree
-
-
-def test_dr_cif():
-    """Test nans correct with ContinuousIntervalTree."""
-    cif = DrCIFClassifier(base_estimator=ContinuousIntervalTree(), use_pycatch22=True)
-    assert cif.replace_nan == "nan"
-    d = cif.get_tag("python_dependencies")
-    assert d == ["pycatch22"]
diff --git a/aeon/classification/interval_based/tests/test_interval_forests.py b/aeon/classification/interval_based/tests/test_interval_forests.py
new file mode 100644
index 0000000000..a9e96a447a
--- /dev/null
+++ b/aeon/classification/interval_based/tests/test_interval_forests.py
@@ -0,0 +1,80 @@
+"""Test interval forest classifiers."""
+
+import pytest
+
+from aeon.classification.interval_based import (
+    CanonicalIntervalForestClassifier,
+    DrCIFClassifier,
+    RandomIntervalSpectralEnsembleClassifier,
+    SupervisedTimeSeriesForest,
+    TimeSeriesForestClassifier,
+)
+from aeon.classification.sklearn import ContinuousIntervalTree
+from aeon.testing.testing_data import EQUAL_LENGTH_UNIVARIATE_CLASSIFICATION
+from aeon.testing.utils.estimator_checks import _assert_predict_probabilities
+from aeon.utils.validation._dependencies import _check_soft_dependencies
+from aeon.visualisation import plot_temporal_importance_curves
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies(["matplotlib", "seaborn"], severity="none"),
+    reason="skip test if required soft dependency not available",
+)
+@pytest.mark.parametrize(
+    "cls",
+    [
+        CanonicalIntervalForestClassifier,
+        DrCIFClassifier,
+        SupervisedTimeSeriesForest,
+        TimeSeriesForestClassifier,
+    ],
+)
+def test_tic_curves(cls):
+    """Test whether temporal_importance_curves runs without error."""
+    import matplotlib
+
+    matplotlib.use("Agg")
+
+    X_train, y_train = EQUAL_LENGTH_UNIVARIATE_CLASSIFICATION["numpy3D"]["train"]
+
+    params = cls._get_test_params()
+    if isinstance(params, list):
+        params = params[0]
+    params.update({"base_estimator": ContinuousIntervalTree()})
+
+    clf = cls(**params)
+    clf.fit(X_train, y_train)
+
+    names, curves = clf.temporal_importance_curves()
+    plot_temporal_importance_curves(curves, names)
+
+
+@pytest.mark.parametrize("cls", [RandomIntervalSpectralEnsembleClassifier])
+def test_tic_curves_invalid(cls):
+    """Test whether temporal_importance_curves raises an error."""
+    clf = cls()
+    with pytest.raises(
+        NotImplementedError, match="No temporal importance curves available."
+    ):
+        clf.temporal_importance_curves()
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies(["pycatch22"], severity="none"),
+    reason="skip test if required soft dependency not available",
+)
+@pytest.mark.parametrize("cls", [CanonicalIntervalForestClassifier, DrCIFClassifier])
+def test_forest_pycatch22(cls):
+    """Test whether the forest classifiers with pycatch22 run without error."""
+    X_train, y_train = EQUAL_LENGTH_UNIVARIATE_CLASSIFICATION["numpy3D"]["train"]
+    X_test, _ = EQUAL_LENGTH_UNIVARIATE_CLASSIFICATION["numpy3D"]["test"]
+
+    params = cls._get_test_params()
+    if isinstance(params, list):
+        params = params[0]
+    params.update({"use_pycatch22": True})
+
+    clf = cls(**params)
+    clf.fit(X_train, y_train)
+    prob = clf.predict_proba(X_test)
+    _assert_predict_probabilities(prob, X_test, n_classes=2)
diff --git a/aeon/classification/interval_based/tests/test_interval_pipelines.py b/aeon/classification/interval_based/tests/test_interval_pipelines.py
index 4e0f120f4e..7e9b77803f 100644
--- a/aeon/classification/interval_based/tests/test_interval_pipelines.py
+++ b/aeon/classification/interval_based/tests/test_interval_pipelines.py
@@ -7,31 +7,24 @@
     RandomIntervalClassifier,
     SupervisedIntervalClassifier,
 )
-from aeon.testing.data_generation import make_example_3d_numpy
+from aeon.testing.testing_data import EQUAL_LENGTH_UNIVARIATE_CLASSIFICATION
+from aeon.testing.utils.estimator_checks import _assert_predict_probabilities
 
 
 @pytest.mark.parametrize(
     "cls", [SupervisedIntervalClassifier, RandomIntervalClassifier]
 )
-def test_random_interval_classifier(cls):
+def test_interval_pipeline_classifiers(cls):
     """Test the random interval classifiers."""
-    X, y = make_example_3d_numpy(n_cases=5, n_channels=1, n_timepoints=12)
-    r = cls(estimator=SVC())
-    r.fit(X, y)
-    p = r.predict_proba(X)
-    assert p.shape == (5, 2)
-    r = cls(n_jobs=2)
-    r.fit(X, y)
-    assert r._estimator.n_jobs == 2
+    X_train, y_train = EQUAL_LENGTH_UNIVARIATE_CLASSIFICATION["numpy3D"]["train"]
+    X_test, y_test = EQUAL_LENGTH_UNIVARIATE_CLASSIFICATION["numpy3D"]["test"]
 
+    params = cls._get_test_params()
+    if isinstance(params, list):
+        params = params[0]
+    params.update({"estimator": SVC()})
 
-def test_parameter_sets():
-    """Test results comparison parameter sets."""
-    paras = SupervisedIntervalClassifier._get_test_params(
-        parameter_set="results_comparison"
-    )
-    assert paras["n_intervals"] == 2
-    paras = RandomIntervalClassifier._get_test_params(
-        parameter_set="results_comparison"
-    )
-    assert paras["n_intervals"] == 3
+    clf = cls(**params)
+    clf.fit(X_train, y_train)
+    prob = clf.predict_proba(X_test)
+    _assert_predict_probabilities(prob, X_test, n_classes=2)
diff --git a/aeon/classification/interval_based/tests/test_quant.py b/aeon/classification/interval_based/tests/test_quant.py
index d0cf3fe9ce..b092b8c356 100644
--- a/aeon/classification/interval_based/tests/test_quant.py
+++ b/aeon/classification/interval_based/tests/test_quant.py
@@ -1,12 +1,11 @@
 """Tests for the QUANTClassifier class."""
 
-import numpy as np
 import pytest
-from sklearn.linear_model import RidgeClassifierCV
 from sklearn.svm import SVC
 
 from aeon.classification.interval_based import QUANTClassifier
-from aeon.testing.data_generation import make_example_3d_numpy
+from aeon.testing.testing_data import EQUAL_LENGTH_UNIVARIATE_CLASSIFICATION
+from aeon.testing.utils.estimator_checks import _assert_predict_probabilities
 from aeon.utils.validation._dependencies import _check_soft_dependencies
 
 
@@ -16,13 +15,12 @@
 )
 def test_alternative_estimator():
     """Test QUANTClassifier with an alternative estimator."""
-    X, y = make_example_3d_numpy()
-    clf = QUANTClassifier(estimator=RidgeClassifierCV())
-    clf.fit(X, y)
-    pred = clf.predict(X)
+    X, y = EQUAL_LENGTH_UNIVARIATE_CLASSIFICATION["numpy3D"]["train"]
 
-    assert isinstance(pred, np.ndarray)
-    assert pred.shape[0] == X.shape[0]
+    clf = QUANTClassifier(estimator=SVC())
+    clf.fit(X, y)
+    prob = clf.predict_proba(X)
+    _assert_predict_probabilities(prob, X, n_classes=2)
 
 
 @pytest.mark.skipif(
@@ -31,7 +29,7 @@ def test_alternative_estimator():
 )
 def test_invalid_inputs():
     """Test handling of invalid inputs by QUANTClassifier."""
-    X, y = make_example_3d_numpy()
+    X, y = EQUAL_LENGTH_UNIVARIATE_CLASSIFICATION["numpy3D"]["train"]
 
     with pytest.raises(ValueError, match="quantile_divisor must be >= 1"):
         quant = QUANTClassifier(quantile_divisor=0)
@@ -40,16 +38,3 @@ def test_invalid_inputs():
     with pytest.raises(ValueError, match="interval_depth must be >= 1"):
         quant = QUANTClassifier(interval_depth=0)
         quant.fit(X, y)
-
-
-@pytest.mark.skipif(
-    not _check_soft_dependencies("torch", severity="none"),
-    reason="skip test if required soft dependency tsfresh not available",
-)
-def test_predict_proba():
-    """Test predict proba with a sklearn classifier without predict proba."""
-    X, y = make_example_3d_numpy(n_cases=5, n_channels=1, n_timepoints=12)
-    r = QUANTClassifier(estimator=SVC())
-    r.fit(X, y)
-    p = r.predict_proba(X)
-    assert p.shape == (5, 2)
diff --git a/aeon/classification/interval_based/tests/test_rise.py b/aeon/classification/interval_based/tests/test_rise.py
deleted file mode 100644
index 06be56a535..0000000000
--- a/aeon/classification/interval_based/tests/test_rise.py
+++ /dev/null
@@ -1,12 +0,0 @@
-"""Test the RISE classifier."""
-
-from aeon.classification.interval_based import RandomIntervalSpectralEnsembleClassifier
-from aeon.classification.sklearn import ContinuousIntervalTree
-
-
-def test_with_nan():
-    """Test nans correct with ContinuousIntervalTree."""
-    r = RandomIntervalSpectralEnsembleClassifier(
-        base_estimator=ContinuousIntervalTree()
-    )
-    assert r.replace_nan == "nan"
diff --git a/aeon/classification/interval_based/tests/test_tsf.py b/aeon/classification/interval_based/tests/test_tsf.py
deleted file mode 100644
index d586c59be8..0000000000
--- a/aeon/classification/interval_based/tests/test_tsf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-"""Test the CIF classifier."""
-
-from aeon.classification.interval_based import TimeSeriesForestClassifier
-from aeon.classification.sklearn import ContinuousIntervalTree
-
-
-def test_cif():
-    """Test nans correct with ContinuousIntervalTree."""
-    tsf = TimeSeriesForestClassifier(base_estimator=ContinuousIntervalTree())
-    assert tsf.replace_nan == "nan"
diff --git a/aeon/regression/interval_based/_cif.py b/aeon/regression/interval_based/_cif.py
index 4899f39cab..6892e83051 100644
--- a/aeon/regression/interval_based/_cif.py
+++ b/aeon/regression/interval_based/_cif.py
@@ -193,6 +193,15 @@ def __init__(
         if use_pycatch22:
             self.set_tags(**{"python_dependencies": "pycatch22"})
 
+    def _fit(self, X, y):
+        return super()._fit(X, y)
+
+    def _predict(self, X) -> np.ndarray:
+        return super()._predict(X)
+
+    def _fit_predict(self, X, y) -> np.ndarray:
+        return super()._fit_predict(X, y)
+
     @classmethod
     def _get_test_params(cls, parameter_set="default"):
         """Return testing parameter settings for the estimator.
diff --git a/aeon/regression/interval_based/_drcif.py b/aeon/regression/interval_based/_drcif.py
index 843bb3c7b4..6247d682f3 100644
--- a/aeon/regression/interval_based/_drcif.py
+++ b/aeon/regression/interval_based/_drcif.py
@@ -4,6 +4,7 @@
 periodogram and differences representations as well as the base series.
 """
 
+import numpy as np
 from sklearn.preprocessing import FunctionTransformer
 
 from aeon.base.estimators.interval_based import BaseIntervalForest
@@ -176,10 +177,7 @@ def __init__(
         n_jobs=1,
         parallel_backend=None,
     ):
-        d = []
         self.use_pycatch22 = use_pycatch22
-        if use_pycatch22:
-            d.append("pycatch22")
 
         series_transformers = [
             None,
@@ -216,8 +214,17 @@ def __init__(
             parallel_backend=parallel_backend,
         )
 
-        if d:
-            self.set_tags(**{"python_dependencies": d})
+        if use_pycatch22:
+            self.set_tags(**{"python_dependencies": "pycatch22"})
+
+    def _fit(self, X, y):
+        return super()._fit(X, y)
+
+    def _predict(self, X) -> np.ndarray:
+        return super()._predict(X)
+
+    def _fit_predict(self, X, y) -> np.ndarray:
+        return super()._fit_predict(X, y)
 
     @classmethod
     def _get_test_params(cls, parameter_set="default"):
diff --git a/aeon/regression/interval_based/_interval_forest.py b/aeon/regression/interval_based/_interval_forest.py
index aa0195298f..c155a23271 100644
--- a/aeon/regression/interval_based/_interval_forest.py
+++ b/aeon/regression/interval_based/_interval_forest.py
@@ -200,6 +200,15 @@ def __init__(
             parallel_backend=parallel_backend,
         )
 
+    def _fit(self, X, y):
+        return super()._fit(X, y)
+
+    def _predict(self, X) -> np.ndarray:
+        return super()._predict(X)
+
+    def _fit_predict(self, X, y) -> np.ndarray:
+        return super()._fit_predict(X, y)
+
     @classmethod
     def _get_test_params(cls, parameter_set="default"):
         """Return testing parameter settings for the estimator.
diff --git a/aeon/regression/interval_based/_rise.py b/aeon/regression/interval_based/_rise.py
index ef1d34d8bb..b9999a4305 100644
--- a/aeon/regression/interval_based/_rise.py
+++ b/aeon/regression/interval_based/_rise.py
@@ -161,6 +161,15 @@ def __init__(
             parallel_backend=parallel_backend,
         )
 
+    def _fit(self, X, y):
+        return super()._fit(X, y)
+
+    def _predict(self, X) -> np.ndarray:
+        return super()._predict(X)
+
+    def _fit_predict(self, X, y) -> np.ndarray:
+        return super()._fit_predict(X, y)
+
     @classmethod
     def _get_test_params(cls, parameter_set="default"):
         """Return testing parameter settings for the estimator.
diff --git a/aeon/regression/interval_based/_tsf.py b/aeon/regression/interval_based/_tsf.py
index c15da5a3ad..b75982a7f2 100644
--- a/aeon/regression/interval_based/_tsf.py
+++ b/aeon/regression/interval_based/_tsf.py
@@ -161,6 +161,15 @@ def __init__(
             parallel_backend=parallel_backend,
         )
 
+    def _fit(self, X, y):
+        return super()._fit(X, y)
+
+    def _predict(self, X) -> np.ndarray:
+        return super()._predict(X)
+
+    def _fit_predict(self, X, y) -> np.ndarray:
+        return super()._fit_predict(X, y)
+
     @classmethod
     def _get_test_params(cls, parameter_set="default"):
         """Return testing parameter settings for the estimator.
@@ -182,7 +191,14 @@ def _get_test_params(cls, parameter_set="default"):
             Each dict are parameters to construct an "interesting" test instance, i.e.,
             `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
         """
-        return {
-            "n_estimators": 2,
-            "n_intervals": 2,
-        }
+        if parameter_set == "contracting":
+            return {
+                "time_limit_in_minutes": 5,
+                "contract_max_n_estimators": 2,
+                "n_intervals": 2,
+            }
+        else:
+            return {
+                "n_estimators": 2,
+                "n_intervals": 2,
+            }
diff --git a/aeon/regression/interval_based/tests/__init__.py b/aeon/regression/interval_based/tests/__init__.py
index 4be71f9fe2..09143e388f 100644
--- a/aeon/regression/interval_based/tests/__init__.py
+++ b/aeon/regression/interval_based/tests/__init__.py
@@ -1 +1 @@
-"""Test interval-based regression models."""
+"""Tests for interval-based regressors."""
diff --git a/aeon/regression/interval_based/tests/test_cif.py b/aeon/regression/interval_based/tests/test_cif.py
deleted file mode 100644
index db5d811ffa..0000000000
--- a/aeon/regression/interval_based/tests/test_cif.py
+++ /dev/null
@@ -1,15 +0,0 @@
-"""Test DrCIF regressor."""
-
-from aeon.regression.interval_based import CanonicalIntervalForestRegressor
-
-
-def test_cif():
-    """Test with catch22 enabled."""
-    dr = CanonicalIntervalForestRegressor(use_pycatch22=True)
-    d = dr.get_tag("python_dependencies")
-    assert d == "pycatch22"
-    paras = CanonicalIntervalForestRegressor._get_test_params(
-        parameter_set="contracting"
-    )
-    assert paras["time_limit_in_minutes"] == 5
-    assert paras["att_subsample_size"] == 2
diff --git a/aeon/regression/interval_based/tests/test_dr_cif.py b/aeon/regression/interval_based/tests/test_dr_cif.py
deleted file mode 100644
index d6cf83d36f..0000000000
--- a/aeon/regression/interval_based/tests/test_dr_cif.py
+++ /dev/null
@@ -1,13 +0,0 @@
-"""Test DrCIF regressor."""
-
-from aeon.regression.interval_based import DrCIFRegressor
-
-
-def test_dr_cif():
-    """Test with pycatch22 enabled."""
-    dr = DrCIFRegressor(use_pycatch22=True)
-    d = dr.get_tag("python_dependencies")
-    assert d[0] == "pycatch22"
-    paras = DrCIFRegressor._get_test_params(parameter_set="contracting")
-    assert paras["time_limit_in_minutes"] == 5
-    assert paras["att_subsample_size"] == 2
diff --git a/aeon/regression/interval_based/tests/test_interval_forest.py b/aeon/regression/interval_based/tests/test_interval_forest.py
deleted file mode 100644
index 173fe7dfa2..0000000000
--- a/aeon/regression/interval_based/tests/test_interval_forest.py
+++ /dev/null
@@ -1,10 +0,0 @@
-"""Test Inter regressor."""
-
-from aeon.regression.interval_based import IntervalForestRegressor
-
-
-def test_cif():
-    """Test with IntervalForestRegressor contracting."""
-    paras = IntervalForestRegressor._get_test_params(parameter_set="contracting")
-    assert paras["time_limit_in_minutes"] == 5
-    assert paras["n_intervals"] == 2
diff --git a/aeon/regression/interval_based/tests/test_interval_forests.py b/aeon/regression/interval_based/tests/test_interval_forests.py
new file mode 100644
index 0000000000..e96dd539e8
--- /dev/null
+++ b/aeon/regression/interval_based/tests/test_interval_forests.py
@@ -0,0 +1,52 @@
+"""Test interval forest regressors."""
+
+import pytest
+
+from aeon.regression.interval_based import (
+    CanonicalIntervalForestRegressor,
+    DrCIFRegressor,
+    RandomIntervalSpectralEnsembleRegressor,
+    TimeSeriesForestRegressor,
+)
+from aeon.testing.testing_data import EQUAL_LENGTH_UNIVARIATE_REGRESSION
+from aeon.testing.utils.estimator_checks import _assert_predict_labels
+from aeon.utils.validation._dependencies import _check_soft_dependencies
+
+
+@pytest.mark.parametrize(
+    "cls",
+    [
+        CanonicalIntervalForestRegressor,
+        DrCIFRegressor,
+        TimeSeriesForestRegressor,
+        RandomIntervalSpectralEnsembleRegressor,
+    ],
+)
+def test_tic_curves_invalid(cls):
+    """Test whether temporal_importance_curves raises an error."""
+    reg = cls()
+    with pytest.raises(
+        NotImplementedError, match="Temporal importance curves are not available."
+    ):
+        reg.temporal_importance_curves()
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies(["pycatch22"], severity="none"),
+    reason="skip test if required soft dependency not available",
+)
+@pytest.mark.parametrize("cls", [CanonicalIntervalForestRegressor, DrCIFRegressor])
+def test_forest_pycatch22(cls):
+    """Test whether the forest regressors with pycatch22 run without error."""
+    X_train, y_train = EQUAL_LENGTH_UNIVARIATE_REGRESSION["numpy3D"]["train"]
+    X_test, _ = EQUAL_LENGTH_UNIVARIATE_REGRESSION["numpy3D"]["test"]
+
+    params = cls._get_test_params()
+    if isinstance(params, list):
+        params = params[0]
+    params.update({"use_pycatch22": True})
+
+    reg = cls(**params)
+    reg.fit(X_train, y_train)
+    prob = reg.predict(X_test)
+    _assert_predict_labels(prob, X_test)
diff --git a/aeon/regression/interval_based/tests/test_interval_pipelines.py b/aeon/regression/interval_based/tests/test_interval_pipelines.py
new file mode 100644
index 0000000000..854c93006c
--- /dev/null
+++ b/aeon/regression/interval_based/tests/test_interval_pipelines.py
@@ -0,0 +1,25 @@
+"""Test interval pipelines."""
+
+import pytest
+from sklearn.svm import SVR
+
+from aeon.regression.interval_based import RandomIntervalRegressor
+from aeon.testing.testing_data import EQUAL_LENGTH_UNIVARIATE_REGRESSION
+from aeon.testing.utils.estimator_checks import _assert_predict_labels
+
+
+@pytest.mark.parametrize("cls", [RandomIntervalRegressor])
+def test_interval_pipeline_classifiers(cls):
+    """Test the random interval regressors."""
+    X_train, y_train = EQUAL_LENGTH_UNIVARIATE_REGRESSION["numpy3D"]["train"]
+    X_test, y_test = EQUAL_LENGTH_UNIVARIATE_REGRESSION["numpy3D"]["test"]
+
+    params = cls._get_test_params()
+    if isinstance(params, list):
+        params = params[0]
+    params.update({"estimator": SVR()})
+
+    reg = cls(**params)
+    reg.fit(X_train, y_train)
+    prob = reg.predict(X_test)
+    _assert_predict_labels(prob, X_test)
diff --git a/aeon/regression/interval_based/tests/test_rand_interval.py b/aeon/regression/interval_based/tests/test_rand_interval.py
deleted file mode 100644
index 1dbe60a50e..0000000000
--- a/aeon/regression/interval_based/tests/test_rand_interval.py
+++ /dev/null
@@ -1,18 +0,0 @@
-"""Test interval-based regression models."""
-
-from sklearn.ensemble import RandomForestRegressor
-
-from aeon.regression.interval_based import RandomIntervalRegressor
-from aeon.testing.data_generation import make_example_3d_numpy
-
-
-def test_cif():
-    """Test with IntervalForestRegressor contracting."""
-    cls = RandomIntervalRegressor(
-        n_jobs=1,
-        n_intervals=5,
-        estimator=RandomForestRegressor(n_estimators=10, n_jobs=2),
-    )
-    X, y = make_example_3d_numpy()
-    cls.fit(X, y)
-    assert cls._estimator.n_jobs == 1
diff --git a/aeon/testing/estimator_checking/_estimator_checking.py b/aeon/testing/estimator_checking/_estimator_checking.py
index a6ba9dc130..2211e659cb 100644
--- a/aeon/testing/estimator_checking/_estimator_checking.py
+++ b/aeon/testing/estimator_checking/_estimator_checking.py
@@ -351,9 +351,12 @@ def _get_check_estimator_ids(obj):
         if not obj.keywords:
             return obj.func.__name__
 
-        kwstring = ",".join(
-            [f"{k}={_get_check_estimator_ids(v)}" for k, v in obj.keywords.items()]
-        )
+        kwlist = []
+        for k, v in obj.keywords.items():
+            v = _get_check_estimator_ids(v)
+            if v is not None:
+                kwlist.append(f"{k}={v}")
+        kwstring = ",".join(kwlist) if kwlist else ""
         return f"{obj.func.__name__}({kwstring})"
     elif isclass(obj):
         return obj.__name__
@@ -363,5 +366,7 @@ def _get_check_estimator_ids(obj):
             s = re.sub(r"<function[^)]*>", "func", s)
             s = re.sub(r"<boundmethodrv[^)]*>", "boundmethod", s)
             return s
-    else:
+    elif isinstance(obj, str):
         return obj
+    else:
+        return None
diff --git a/aeon/testing/estimator_checking/_yield_classification_checks.py b/aeon/testing/estimator_checking/_yield_classification_checks.py
index 1a019b3d5c..f23e2b6105 100644
--- a/aeon/testing/estimator_checking/_yield_classification_checks.py
+++ b/aeon/testing/estimator_checking/_yield_classification_checks.py
@@ -2,12 +2,13 @@
 
 import inspect
 import os
+import sys
 import tempfile
 import time
 from functools import partial
-from sys import platform
 
 import numpy as np
+from numpy.testing import assert_array_equal
 from sklearn.utils._testing import set_random_state
 
 from aeon.base._base import _clone_estimator
@@ -18,21 +19,43 @@
     unit_test_proba,
 )
 from aeon.testing.testing_data import FULL_TEST_DATA_DICT
-from aeon.testing.utils.estimator_checks import _assert_array_almost_equal, _get_tag
+from aeon.testing.utils.estimator_checks import (
+    _assert_array_almost_equal,
+    _assert_predict_labels,
+    _assert_predict_probabilities,
+    _get_tag,
+)
+from aeon.utils import COLLECTIONS_DATA_TYPES
 from aeon.utils.validation import get_n_cases
 
 
 def _yield_classification_checks(estimator_class, estimator_instances, datatypes):
     """Yield all classification checks for an aeon classifier."""
     # only class required
-    yield partial(
-        check_classifier_against_expected_results, estimator_class=estimator_class
-    )
-    yield partial(check_classifier_tags_consistent, estimator_class=estimator_class)
-    yield partial(
-        check_classifier_does_not_override_final_methods,
-        estimator_class=estimator_class,
-    )
+    if sys.platform != "darwin":  # We cannot guarantee same results on ARM macOS
+        # Compare against results for both UnitTest and BasicMotions if available
+        yield partial(
+            check_classifier_against_expected_results,
+            estimator_class=estimator_class,
+            data_name="UnitTest",
+            data_loader=load_unit_test,
+            results_dict=unit_test_proba,
+            resample_seed=0,
+        )
+        # the test currently fails when numba is disabled. See issue #622
+        if (
+            estimator_class.__name__ != "HIVECOTEV2"
+            or os.environ.get("NUMBA_DISABLE_JIT") != "1"
+        ):
+            yield partial(
+                check_classifier_against_expected_results,
+                estimator_class=estimator_class,
+                data_name="BasicMotions",
+                data_loader=load_basic_motions,
+                results_dict=basic_motions_proba,
+                resample_seed=4,
+            )
+    yield partial(check_classifier_overrides_and_tags, estimator_class=estimator_class)
 
     # data type irrelevant
     if _get_tag(estimator_class, "capability:contractable", raise_error=True):
@@ -73,82 +96,51 @@ def _yield_classification_checks(estimator_class, estimator_instances, datatypes
             )
 
 
-def check_classifier_against_expected_results(estimator_class):
+def check_classifier_against_expected_results(
+    estimator_class, data_name, data_loader, results_dict, resample_seed
+):
     """Test classifier against stored results."""
-    # we only use the first estimator instance for testing
-    class_name = estimator_class.__name__
-
-    # We cannot guarantee same results on ARM macOS
-    if platform == "darwin":
-        return None
-
-    # the test currently fails when numba is disabled. See issue #622
-    import os
-
-    if class_name == "HIVECOTEV2" and os.environ.get("NUMBA_DISABLE_JIT") == "1":
-        return None
-
-    for data_name, data_dict, data_loader, data_seed in [
-        ["UnitTest", unit_test_proba, load_unit_test, 0],
-        ["BasicMotions", basic_motions_proba, load_basic_motions, 4],
-    ]:
-        # retrieve expected predict_proba output, and skip test if not available
-        if class_name in data_dict.keys():
-            expected_probas = data_dict[class_name]
-        else:
-            # skip test if no expected probas are registered
-            continue
-
-        # we only use the first estimator instance for testing
-        estimator_instance = estimator_class._create_test_instance(
-            parameter_set="results_comparison"
-        )
-        # set random seed if possible
-        set_random_state(estimator_instance, 0)
-
-        # load test data
-        X_train, y_train = data_loader(split="train")
-        X_test, _ = data_loader(split="test")
-        indices = np.random.RandomState(data_seed).choice(
-            len(y_train), 10, replace=False
-        )
+    # retrieve expected predict_proba output, and skip test if not available
+    if estimator_class.__name__ in results_dict.keys():
+        expected_probas = results_dict[estimator_class.__name__]
+    else:
+        # skip test if no expected probas are registered
+        return f"No stored results for {estimator_class.__name__} on {data_name}"
 
-        # train classifier and predict probas
-        estimator_instance.fit(X_train[indices], y_train[indices])
-        y_proba = estimator_instance.predict_proba(X_test[indices])
+    # we only use the first estimator instance for testing
+    estimator_instance = estimator_class._create_test_instance(
+        parameter_set="results_comparison", return_first=True
+    )
+    # set random seed if possible
+    set_random_state(estimator_instance, 0)
+
+    # load test data
+    X_train, y_train = data_loader(split="train")
+    X_test, y_test = data_loader(split="test")
+    # resample test data
+    indices = np.random.RandomState(resample_seed).choice(
+        len(y_train), 10, replace=False
+    )
 
-        # assert probabilities are the same
-        _assert_array_almost_equal(
-            y_proba,
-            expected_probas,
-            decimal=2,
-            err_msg=f"Failed to reproduce results for {class_name} on {data_name}",
-        )
+    # train classifier and predict probas
+    estimator_instance.fit(X_train[indices], y_train[indices])
+    y_proba = estimator_instance.predict_proba(X_test[indices])
+
+    # assert probabilities are the same
+    _assert_array_almost_equal(
+        y_proba,
+        expected_probas,
+        decimal=2,
+        err_msg=(
+            f"Failed to reproduce results for {estimator_class.__name__} "
+            f"on {data_name}"
+        ),
+    )
 
 
-def check_classifier_tags_consistent(estimator_class):
-    """Test the tag X_inner_type is consistent with capability:unequal_length."""
-    valid_types = {"np-list", "df-list", "pd-multiindex"}
-    unequal = estimator_class.get_class_tag("capability:unequal_length")
-    if unequal:  # one of X_inner_types must be capable of storing unequal length
-        internal_types = estimator_class.get_class_tag("X_inner_type")
-        if isinstance(internal_types, str):
-            assert internal_types in valid_types
-        else:  # must be a list
-            assert bool(set(internal_types) & valid_types)
-    # Test can actually fit/predict with multivariate if tag is set
-    multivariate = estimator_class.get_class_tag("capability:multivariate")
-    if multivariate:
-        X = np.random.random((10, 2, 20))
-        y = np.array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1])
-        inst = estimator_class._create_test_instance(parameter_set="default")
-        inst.fit(X, y)
-        inst.predict(X)
-        inst.predict_proba(X)
-
-
-def check_classifier_does_not_override_final_methods(estimator_class):
-    """Test does not override final methods."""
+def check_classifier_overrides_and_tags(estimator_class):
+    """Test compliance with the classifier base class contract."""
+    # Test they don't override final methods, because Python does not enforce this
     final_methods = [
         "fit",
         "predict",
@@ -163,13 +155,35 @@ def check_classifier_does_not_override_final_methods(estimator_class):
                 f"Override _{method} instead."
             )
 
+    # axis class parameter is for internal use only
+    assert "axis" not in estimator_class.__dict__
+
+    # Test valid tag for X_inner_type
+    X_inner_type = estimator_class.get_class_tag(tag_name="X_inner_type")
+    if isinstance(X_inner_type, str):
+        assert X_inner_type in COLLECTIONS_DATA_TYPES
+    else:  # must be a list
+        assert all([t in COLLECTIONS_DATA_TYPES for t in X_inner_type])
+
+    # one of X_inner_types must be capable of storing unequal length
+    if estimator_class.get_class_tag("capability:unequal_length"):
+        valid_unequal_types = ["np-list", "df-list", "pd-multiindex"]
+        if isinstance(X_inner_type, str):
+            assert X_inner_type in valid_unequal_types
+        else:  # must be a list
+            assert any([t in valid_unequal_types for t in X_inner_type])
+
+    # Must have at least one set to True
+    multi = estimator_class.get_class_tag(tag_name="capability:multivariate")
+    uni = estimator_class.get_class_tag(tag_name="capability:univariate")
+    assert multi or uni
+
 
 def check_contracted_classifier(estimator_class, datatype):
     """Test classifiers that can be contracted."""
     estimator_instance = estimator_class._create_test_instance(
         parameter_set="contracting"
     )
-
     default_params = inspect.signature(estimator_class.__init__).parameters
 
     # check that the classifier has a time_limit_in_minutes parameter
@@ -192,7 +206,7 @@ def check_contracted_classifier(estimator_class, datatype):
         )
 
     # too short of a contract time can lead to test failures
-    if vars(estimator_instance).get("time_limit_in_minutes", None) < 0.5:
+    if vars(estimator_instance).get("time_limit_in_minutes", 0) < 0.5:
         raise ValueError(
             "Test parameters for test_contracted_classifier must set "
             "time_limit_in_minutes to 0.5 or more. It is recommended to make "
@@ -216,68 +230,58 @@ def check_contracted_classifier(estimator_class, datatype):
 
 
 def check_classifier_saving_loading_deep_learning(estimator_class, datatype):
-    """Test Deep Classifier saving."""
+    """Test deep classifier saving."""
     with tempfile.TemporaryDirectory() as tmp:
-        if not (
-            estimator_class.__name__
-            in [
-                "BaseDeepClassifier",
-                "InceptionTimeClassifier",
-                "LITETimeClassifier",
-                "TapNetClassifier",
-            ]
-        ):
-            if tmp[-1] != "/":
-                tmp = tmp + "/"
-            curr_time = str(time.time_ns())
-            last_file_name = curr_time + "last"
-            best_file_name = curr_time + "best"
-            init_file_name = curr_time + "init"
-
-            deep_cls_train = estimator_class(
-                n_epochs=2,
-                save_best_model=True,
-                save_last_model=True,
-                save_init_model=True,
-                best_file_name=best_file_name,
-                last_file_name=last_file_name,
-                init_file_name=init_file_name,
-                file_path=tmp,
-            )
-            deep_cls_train.fit(
-                FULL_TEST_DATA_DICT[datatype]["train"][0],
-                FULL_TEST_DATA_DICT[datatype]["train"][1],
-            )
+        if tmp[-1] != "/":
+            tmp = tmp + "/"
+
+        curr_time = str(time.time_ns())
+        last_file_name = curr_time + "last"
+        best_file_name = curr_time + "best"
+        init_file_name = curr_time + "init"
+
+        deep_cls_train = estimator_class(
+            n_epochs=2,
+            save_best_model=True,
+            save_last_model=True,
+            save_init_model=True,
+            best_file_name=best_file_name,
+            last_file_name=last_file_name,
+            init_file_name=init_file_name,
+            file_path=tmp,
+        )
+        deep_cls_train.fit(
+            FULL_TEST_DATA_DICT[datatype]["train"][0],
+            FULL_TEST_DATA_DICT[datatype]["train"][1],
+        )
 
-            deep_cls_best = estimator_class()
-            deep_cls_best.load_model(
-                model_path=os.path.join(tmp, best_file_name + ".keras"),
-                classes=np.unique(FULL_TEST_DATA_DICT[datatype]["train"][1]),
-            )
-            ypred_best = deep_cls_best.predict(
-                FULL_TEST_DATA_DICT[datatype]["train"][0]
-            )
-            assert len(ypred_best) == len(FULL_TEST_DATA_DICT[datatype]["train"][1])
+        deep_cls_best = estimator_class()
+        deep_cls_best.load_model(
+            model_path=os.path.join(tmp, best_file_name + ".keras"),
+            classes=np.unique(FULL_TEST_DATA_DICT[datatype]["train"][1]),
+        )
+        ypred_best = deep_cls_best.predict(FULL_TEST_DATA_DICT[datatype]["test"][0])
+        _assert_predict_labels(ypred_best, datatype)
 
-            deep_cls_last = estimator_class()
-            deep_cls_last.load_model(
-                model_path=os.path.join(tmp, last_file_name + ".keras"),
-                classes=np.unique(FULL_TEST_DATA_DICT[datatype]["train"][1]),
-            )
-            ypred_last = deep_cls_last.predict(
-                FULL_TEST_DATA_DICT[datatype]["train"][0]
-            )
-            assert len(ypred_last) == len(FULL_TEST_DATA_DICT[datatype]["train"][1])
+        deep_cls_last = estimator_class()
+        deep_cls_last.load_model(
+            model_path=os.path.join(tmp, last_file_name + ".keras"),
+            classes=np.unique(FULL_TEST_DATA_DICT[datatype]["train"][1]),
+        )
+        ypred_last = deep_cls_last.predict(FULL_TEST_DATA_DICT[datatype]["test"][0])
+        _assert_predict_labels(ypred_last, datatype)
 
-            deep_cls_init = estimator_class()
-            deep_cls_init.load_model(
-                model_path=os.path.join(tmp, init_file_name + ".keras"),
-                classes=np.unique(FULL_TEST_DATA_DICT[datatype]["train"][1]),
-            )
-            ypred_init = deep_cls_init.predict(
-                FULL_TEST_DATA_DICT[datatype]["train"][0]
-            )
-            assert len(ypred_init) == len(FULL_TEST_DATA_DICT[datatype]["train"][1])
+        deep_cls_init = estimator_class()
+        deep_cls_init.load_model(
+            model_path=os.path.join(tmp, init_file_name + ".keras"),
+            classes=np.unique(FULL_TEST_DATA_DICT[datatype]["train"][1]),
+        )
+        ypred_init = deep_cls_init.predict(FULL_TEST_DATA_DICT[datatype]["test"][0])
+        _assert_predict_labels(ypred_init, datatype)
+
+        ypred = deep_cls_train.predict(FULL_TEST_DATA_DICT[datatype]["test"][0])
+        _assert_predict_labels(ypred, datatype)
+        assert_array_equal(ypred, ypred_best)
 
 
 def check_classifier_train_estimate(estimator, datatype):
@@ -285,7 +289,6 @@ def check_classifier_train_estimate(estimator, datatype):
     estimator = _clone_estimator(estimator)
     estimator_class = type(estimator)
 
-    # if we have a train_estimate parameter set use it, else use default
     if (
         "_fit_predict" not in estimator_class.__dict__
         or "_fit_predict_proba" not in estimator_class.__dict__
@@ -302,27 +305,22 @@ def check_classifier_train_estimate(estimator, datatype):
         FULL_TEST_DATA_DICT[datatype]["train"][0],
         FULL_TEST_DATA_DICT[datatype]["train"][1],
     )
-    assert isinstance(train_preds, np.ndarray)
-    assert train_preds.shape == (
-        get_n_cases(FULL_TEST_DATA_DICT[datatype]["train"][0]),
+    _assert_predict_labels(
+        train_preds, datatype, split="train", unique_labels=unique_labels
     )
-    assert np.all(np.isin(np.unique(train_preds), unique_labels))
 
     # check the probabilities are valid
     train_proba = estimator.fit_predict_proba(
         FULL_TEST_DATA_DICT[datatype]["train"][0],
         FULL_TEST_DATA_DICT[datatype]["train"][1],
     )
-    assert isinstance(train_proba, np.ndarray)
-    assert train_proba.shape == (
-        get_n_cases(FULL_TEST_DATA_DICT[datatype]["train"][0]),
-        len(unique_labels),
+    _assert_predict_probabilities(
+        train_proba, datatype, split="train", n_classes=len(unique_labels)
     )
-    np.testing.assert_almost_equal(train_proba.sum(axis=1), 1, decimal=4)
 
 
 def check_classifier_random_state_deep_learning(estimator, datatype):
-    """Test Deep Classifier seeding."""
+    """Test deep classifier seeding."""
     random_state = 42
 
     deep_cls1 = _clone_estimator(estimator, random_state=random_state)
@@ -357,11 +355,7 @@ def check_classifier_random_state_deep_learning(estimator, datatype):
 
 
 def check_classifier_output(estimator, datatype):
-    """Test classifier outputs the correct data types and values.
-
-    Test predict produces a np.array or pd.Series with only values seen in the train
-    data, and that predict_proba probability estimates add up to one.
-    """
+    """Test classifier outputs the correct data types and values."""
     estimator = _clone_estimator(estimator)
 
     unique_labels = np.unique(FULL_TEST_DATA_DICT[datatype]["train"][1])
@@ -372,18 +366,8 @@ def check_classifier_output(estimator, datatype):
         FULL_TEST_DATA_DICT[datatype]["train"][1],
     )
     y_pred = estimator.predict(FULL_TEST_DATA_DICT[datatype]["test"][0])
-
-    # check predict
-    assert isinstance(y_pred, np.ndarray)
-    assert y_pred.shape == (get_n_cases(FULL_TEST_DATA_DICT[datatype]["test"][0]),)
-    assert np.all(np.isin(np.unique(y_pred), unique_labels))
+    _assert_predict_labels(y_pred, datatype, unique_labels=unique_labels)
 
     # check predict proba (all classifiers have predict_proba by default)
     y_proba = estimator.predict_proba(FULL_TEST_DATA_DICT[datatype]["test"][0])
-
-    assert isinstance(y_proba, np.ndarray)
-    assert y_proba.shape == (
-        get_n_cases(FULL_TEST_DATA_DICT[datatype]["test"][0]),
-        len(unique_labels),
-    )
-    np.testing.assert_almost_equal(y_proba.sum(axis=1), 1, decimal=4)
+    _assert_predict_probabilities(y_proba, datatype, n_classes=len(unique_labels))
diff --git a/aeon/testing/estimator_checking/_yield_regression_checks.py b/aeon/testing/estimator_checking/_yield_regression_checks.py
index 3a8e53882b..bccc5edd31 100644
--- a/aeon/testing/estimator_checking/_yield_regression_checks.py
+++ b/aeon/testing/estimator_checking/_yield_regression_checks.py
@@ -1,12 +1,14 @@
 """Tests for all regressors."""
 
+import inspect
 import os
+import sys
 import tempfile
 import time
 from functools import partial
-from sys import platform
 
 import numpy as np
+from numpy.ma.testutils import assert_array_equal
 from sklearn.utils._testing import set_random_state
 
 from aeon.base._base import _clone_estimator
@@ -17,21 +19,45 @@
     covid_3month_preds,
 )
 from aeon.testing.testing_data import FULL_TEST_DATA_DICT
-from aeon.testing.utils.estimator_checks import _assert_array_almost_equal
+from aeon.testing.utils.estimator_checks import (
+    _assert_array_almost_equal,
+    _assert_predict_labels,
+    _get_tag,
+)
+from aeon.utils import COLLECTIONS_DATA_TYPES
 
 
 def _yield_regression_checks(estimator_class, estimator_instances, datatypes):
     """Yield all regression checks for an aeon regressor."""
     # only class required
-    yield partial(
-        check_regressor_against_expected_results, estimator_class=estimator_class
-    )
-    yield partial(check_regressor_tags_consistent, estimator_class=estimator_class)
-    yield partial(
-        check_regressor_does_not_override_final_methods, estimator_class=estimator_class
-    )
+    if sys.platform != "darwin":  # We cannot guarantee same results on ARM macOS
+        # Compare against results for both Covid3Month and CardanoSentiment if available
+        yield partial(
+            check_regressor_against_expected_results,
+            estimator_class=estimator_class,
+            data_name="Covid3Month",
+            data_loader=load_covid_3month,
+            results_dict=covid_3month_preds,
+            resample_seed=0,
+        )
+        yield partial(
+            check_regressor_against_expected_results,
+            estimator_class=estimator_class,
+            data_name="CardanoSentiment",
+            data_loader=load_cardano_sentiment,
+            results_dict=cardano_sentiment_preds,
+            resample_seed=0,
+        )
+    yield partial(check_regressor_overrides_and_tags, estimator_class=estimator_class)
 
     # data type irrelevant
+    if _get_tag(estimator_class, "capability:contractable", raise_error=True):
+        yield partial(
+            check_contracted_regressor,
+            estimator_class=estimator_class,
+            datatype=datatypes[0][0],
+        )
+
     if issubclass(estimator_class, BaseDeepRegressor):
         yield partial(
             check_regressor_saving_loading_deep_learning,
@@ -42,6 +68,13 @@ def _yield_regression_checks(estimator_class, estimator_instances, datatypes):
     # test class instances
     for i, estimator in enumerate(estimator_instances):
         # data type irrelevant
+        if _get_tag(estimator_class, "capability:train_estimate", raise_error=True):
+            yield partial(
+                check_regressor_train_estimate,
+                estimator=estimator,
+                datatype=datatypes[0][0],
+            )
+
         if isinstance(estimator, BaseDeepRegressor):
             yield partial(
                 check_regressor_random_state_deep_learning,
@@ -49,151 +82,217 @@ def _yield_regression_checks(estimator_class, estimator_instances, datatypes):
                 datatype=datatypes[i][0],
             )
 
+        # test all data types
+        for datatype in datatypes[i]:
+            yield partial(
+                check_regressor_output, estimator=estimator, datatype=datatype
+            )
+
+
+def check_regressor_against_expected_results(
+    estimator_class, data_name, data_loader, results_dict, resample_seed
+):
+    """Test regressor against stored results."""
+    # retrieve expected predict output, and skip test if not available
+    if estimator_class.__name__ in results_dict.keys():
+        expected_preds = results_dict[estimator_class.__name__]
+    else:
+        # skip test if no expected preds are registered
+        return f"No stored results for {estimator_class.__name__} on {data_name}"
 
-def check_regressor_against_expected_results(estimator_class):
-    """Test classifier against stored results."""
     # we only use the first estimator instance for testing
-    classname = estimator_class.__name__
-
-    # We cannot guarantee same results on ARM macOS
-    if platform == "darwin":
-        return None
-
-    for data_name, data_dict, data_loader, data_seed in [
-        ["Covid3Month", covid_3month_preds, load_covid_3month, 0],
-        ["CardanoSentiment", cardano_sentiment_preds, load_cardano_sentiment, 0],
-    ]:
-        # retrieve expected predict output, and skip test if not available
-        if classname in data_dict.keys():
-            expected_preds = data_dict[classname]
-        else:
-            # skip test if no expected preds are registered
-            continue
-
-        # we only use the first estimator instance for testing
-        estimator_instance = estimator_class._create_test_instance(
-            parameter_set="results_comparison"
-        )
-        # set random seed if possible
-        set_random_state(estimator_instance, 0)
-
-        # load test data
-        X_train, y_train = data_loader(split="train")
-        X_test, y_test = data_loader(split="test")
-        indices_train = np.random.RandomState(data_seed).choice(
-            len(y_train), 10, replace=False
-        )
-        indices_test = np.random.RandomState(data_seed).choice(
-            len(y_test), 10, replace=False
-        )
+    estimator_instance = estimator_class._create_test_instance(
+        parameter_set="results_comparison", return_first=True
+    )
+    # set random seed if possible
+    set_random_state(estimator_instance, 0)
+
+    # load test data
+    X_train, y_train = data_loader(split="train")
+    X_test, y_test = data_loader(split="test")
+    # resample test data
+    indices_train = np.random.RandomState(resample_seed).choice(
+        len(y_train), 10, replace=False
+    )
+    indices_test = np.random.RandomState(resample_seed).choice(
+        len(y_test), 10, replace=False
+    )
 
-        # train regressor and predict
-        estimator_instance.fit(X_train[indices_train], y_train[indices_train])
-        y_pred = estimator_instance.predict(X_test[indices_test])
+    # train regressor and predict
+    estimator_instance.fit(X_train[indices_train], y_train[indices_train])
+    y_pred = estimator_instance.predict(X_test[indices_test])
+
+    # assert predictions are the same
+    _assert_array_almost_equal(
+        y_pred,
+        expected_preds,
+        decimal=2,
+        err_msg=(
+            f"Failed to reproduce results for {estimator_class.__name__} "
+            f"on {data_name}"
+        ),
+    )
 
-        # assert predictions are the same
-        _assert_array_almost_equal(
-            y_pred,
-            expected_preds,
-            decimal=2,
-            err_msg=f"Failed to reproduce results for {classname} on {data_name}",
-        )
 
+def check_regressor_overrides_and_tags(estimator_class):
+    """Test compliance with the regressor base class contract."""
+    # Test they don't override final methods, because Python does not enforce this
+    final_methods = [
+        "fit",
+        "predict",
+        "fit_predict",
+    ]
+    for method in final_methods:
+        if method in estimator_class.__dict__:
+            raise ValueError(
+                f"Regressor {estimator_class} overrides the method {method}. "
+                f"Override _{method} instead."
+            )
 
-def check_regressor_tags_consistent(estimator_class):
-    """Test the tag X_inner_type is consistent with capability:unequal_length."""
-    valid_types = {"np-list", "df-list", "pd-multivariate"}
-    unequal = estimator_class.get_class_tag("capability:unequal_length")
-    if unequal:  # one of X_inner_types must be capable of storing unequal length
-        internal_types = estimator_class.get_class_tag("X_inner_type")
-        if isinstance(internal_types, str):
-            assert internal_types in valid_types
+    # axis class parameter is for internal use only
+    assert "axis" not in estimator_class.__dict__
+
+    # Test valid tag for X_inner_type
+    X_inner_type = estimator_class.get_class_tag(tag_name="X_inner_type")
+    if isinstance(X_inner_type, str):
+        assert X_inner_type in COLLECTIONS_DATA_TYPES
+    else:  # must be a list
+        assert all([t in COLLECTIONS_DATA_TYPES for t in X_inner_type])
+
+    # one of X_inner_types must be capable of storing unequal length
+    if estimator_class.get_class_tag("capability:unequal_length"):
+        valid_unequal_types = ["np-list", "df-list", "pd-multiindex"]
+        if isinstance(X_inner_type, str):
+            assert X_inner_type in valid_unequal_types
         else:  # must be a list
-            assert bool(set(internal_types) & valid_types)
-    # Test can actually fit/predict with multivariate if tag is set
-    multivariate = estimator_class.get_class_tag("capability:multivariate")
-    if multivariate:
-        X = np.random.random((10, 2, 20))
-        y = np.random.random(10)
-        inst = estimator_class._create_test_instance(parameter_set="default")
-        inst.fit(X, y)
-        inst.predict(X)
-
-
-def check_regressor_does_not_override_final_methods(estimator_class):
-    """Test does not override final methods."""
-    if "fit" in estimator_class.__dict__:
-        raise ValueError(f"Classifier {estimator_class} overrides the method fit")
-    if "predict" in estimator_class.__dict__:
+            assert any([t in valid_unequal_types for t in X_inner_type])
+
+    # Must have at least one set to True
+    multi = estimator_class.get_class_tag(tag_name="capability:multivariate")
+    uni = estimator_class.get_class_tag(tag_name="capability:univariate")
+    assert multi or uni
+
+
+def check_contracted_regressor(estimator_class, datatype):
+    """Test regressors that can be contracted."""
+    estimator_instance = estimator_class._create_test_instance(
+        parameter_set="contracting"
+    )
+    default_params = inspect.signature(estimator_class.__init__).parameters
+
+    # check that the regressor has a time_limit_in_minutes parameter
+    if default_params.get("time_limit_in_minutes", None) is None:
+        raise ValueError(
+            f"Regressor {estimator_class} which sets "
+            "capability:contractable=True must have a time_limit_in_minutes "
+            "parameter."
+        )
+
+    # check that the default value is to turn off contracting
+    if default_params.get("time_limit_in_minutes", None).default not in (
+        0,
+        -1,
+        None,
+    ):
         raise ValueError(
-            f"Classifier {estimator_class} overrides the method " f"predict"
+            "time_limit_in_minutes parameter must have a default value of 0, "
+            "-1 or None, disabling contracting by default."
         )
 
+    # too short of a contract time can lead to test failures
+    if vars(estimator_instance).get("time_limit_in_minutes", 0) < 0.5:
+        raise ValueError(
+            "Test parameters for test_contracted_regressor must set "
+            "time_limit_in_minutes to 0.5 or more. It is recommended to make "
+            "this larger and add an alternative stopping mechanism "
+            "(i.e. max ensemble members)."
+        )
+
+    # run fit and predict
+    estimator_instance.fit(
+        FULL_TEST_DATA_DICT[datatype]["train"][0],
+        FULL_TEST_DATA_DICT[datatype]["train"][1],
+    )
+    y_pred = estimator_instance.predict(FULL_TEST_DATA_DICT[datatype]["test"][0])
+
+    # check predict
+    _assert_predict_labels(y_pred, datatype)
+
 
 def check_regressor_saving_loading_deep_learning(estimator_class, datatype):
-    """Test Deep Regressor saving."""
+    """Test deep regressor saving."""
     with tempfile.TemporaryDirectory() as tmp:
-        if not (
-            estimator_class.__name__
-            in [
-                "BaseDeepRegressor",
-                "InceptionTimeRegressor",
-                "LITETimeRegressor",
-                "TapNetRegressor",
-            ]
-        ):
-            if tmp[-1] != "/":
-                tmp = tmp + "/"
-            curr_time = str(time.time_ns())
-            last_file_name = curr_time + "last"
-            best_file_name = curr_time + "best"
-            init_file_name = curr_time + "init"
-
-            deep_rgs_train = estimator_class(
-                n_epochs=2,
-                save_best_model=True,
-                save_last_model=True,
-                save_init_model=True,
-                best_file_name=best_file_name,
-                last_file_name=last_file_name,
-                init_file_name=init_file_name,
-                file_path=tmp,
-            )
-            deep_rgs_train.fit(
-                FULL_TEST_DATA_DICT[datatype]["train"][0],
-                FULL_TEST_DATA_DICT[datatype]["train"][1],
-            )
+        if tmp[-1] != "/":
+            tmp = tmp + "/"
+
+        curr_time = str(time.time_ns())
+        last_file_name = curr_time + "last"
+        best_file_name = curr_time + "best"
+        init_file_name = curr_time + "init"
+
+        deep_rgs_train = estimator_class(
+            n_epochs=2,
+            save_best_model=True,
+            save_last_model=True,
+            save_init_model=True,
+            best_file_name=best_file_name,
+            last_file_name=last_file_name,
+            init_file_name=init_file_name,
+            file_path=tmp,
+        )
+        deep_rgs_train.fit(
+            FULL_TEST_DATA_DICT[datatype]["train"][0],
+            FULL_TEST_DATA_DICT[datatype]["train"][1],
+        )
 
-            deep_rgs_best = estimator_class()
-            deep_rgs_best.load_model(
-                model_path=os.path.join(tmp, best_file_name + ".keras"),
-            )
-            ypred_best = deep_rgs_best.predict(
-                FULL_TEST_DATA_DICT[datatype]["train"][0]
-            )
-            assert len(ypred_best) == len(FULL_TEST_DATA_DICT[datatype]["train"][1])
+        deep_rgs_best = estimator_class()
+        deep_rgs_best.load_model(
+            model_path=os.path.join(tmp, best_file_name + ".keras"),
+        )
+        ypred_best = deep_rgs_best.predict(FULL_TEST_DATA_DICT[datatype]["test"][0])
+        _assert_predict_labels(ypred_best, datatype)
 
-            deep_rgs_last = estimator_class()
-            deep_rgs_last.load_model(
-                model_path=os.path.join(tmp, last_file_name + ".keras"),
-            )
-            ypred_last = deep_rgs_last.predict(
-                FULL_TEST_DATA_DICT[datatype]["train"][0]
-            )
-            assert len(ypred_last) == len(FULL_TEST_DATA_DICT[datatype]["train"][1])
+        deep_rgs_last = estimator_class()
+        deep_rgs_last.load_model(
+            model_path=os.path.join(tmp, last_file_name + ".keras"),
+        )
+        ypred_last = deep_rgs_last.predict(FULL_TEST_DATA_DICT[datatype]["test"][0])
+        _assert_predict_labels(ypred_last, datatype)
+
+        deep_rgs_init = estimator_class()
+        deep_rgs_init.load_model(
+            model_path=os.path.join(tmp, init_file_name + ".keras"),
+        )
+        ypred_init = deep_rgs_init.predict(FULL_TEST_DATA_DICT[datatype]["test"][0])
+        _assert_predict_labels(ypred_init, datatype)
+
+        ypred = deep_rgs_train.predict(FULL_TEST_DATA_DICT[datatype]["test"][0])
+        _assert_predict_labels(ypred, datatype)
+        assert_array_equal(ypred, ypred_best)
 
-            deep_rgs_init = estimator_class()
-            deep_rgs_init.load_model(
-                model_path=os.path.join(tmp, init_file_name + ".keras"),
-            )
-            ypred_init = deep_rgs_init.predict(
-                FULL_TEST_DATA_DICT[datatype]["train"][0]
-            )
-            assert len(ypred_init) == len(FULL_TEST_DATA_DICT[datatype]["train"][1])
+
+def check_regressor_train_estimate(estimator, datatype):
+    """Test regressors that can produce train set prediction estimates."""
+    estimator = _clone_estimator(estimator)
+    estimator_class = type(estimator)
+
+    # if we have a train_estimate parameter set use it, else use default
+    if "_fit_predict" not in estimator_class.__dict__:
+        raise ValueError(
+            f"Regressor {estimator_class} has capability:train_estimate=True "
+            "and must override the _fit_predict method."
+        )
+
+    # check the predictions are valid
+    train_preds = estimator.fit_predict(
+        FULL_TEST_DATA_DICT[datatype]["train"][0],
+        FULL_TEST_DATA_DICT[datatype]["train"][1],
+    )
+    _assert_predict_labels(train_preds, datatype, split="train")
 
 
 def check_regressor_random_state_deep_learning(estimator, datatype):
-    """Test Deep Regressor seeding."""
+    """Test deep regressor seeding."""
     random_state = 42
 
     deep_rgs1 = _clone_estimator(estimator, random_state=random_state)
@@ -225,3 +324,16 @@ def check_regressor_random_state_deep_learning(estimator, datatype):
             _weight2 = np.asarray(weights2[j])
 
             np.testing.assert_almost_equal(_weight1, _weight2, 4)
+
+
+def check_regressor_output(estimator, datatype):
+    """Test regressor outputs the correct data types and values."""
+    estimator = _clone_estimator(estimator)
+
+    # run fit and predict
+    estimator.fit(
+        FULL_TEST_DATA_DICT[datatype]["train"][0],
+        FULL_TEST_DATA_DICT[datatype]["train"][1],
+    )
+    y_pred = estimator.predict(FULL_TEST_DATA_DICT[datatype]["test"][0])
+    _assert_predict_labels(y_pred, datatype)
diff --git a/aeon/testing/estimator_checking/tests/test_check_estimator.py b/aeon/testing/estimator_checking/tests/test_check_estimator.py
index fcc273da5f..ed16d3835a 100644
--- a/aeon/testing/estimator_checking/tests/test_check_estimator.py
+++ b/aeon/testing/estimator_checking/tests/test_check_estimator.py
@@ -33,7 +33,13 @@
 @parametrize_with_checks(list(test_classes.values()), use_first_parameter_set=True)
 def test_parametrize_with_checks_classes(check):
     """Test parametrize_with_checks with class input."""
-    name = _get_check_estimator_ids(check).split("=")[1].split("(")[0].split(")")[0]
+    name = (
+        _get_check_estimator_ids(check)
+        .split("=")[1]
+        .split(",")[0]
+        .split("(")[0]
+        .split(")")[0]
+    )
     assert callable(check)
     dict_before = test_classes[name].__dict__.copy()
     dict_before.pop("__slotnames__", None)
@@ -51,7 +57,13 @@ def test_parametrize_with_checks_classes(check):
 @parametrize_with_checks(list(test_instances.values()), use_first_parameter_set=True)
 def test_parametrize_with_checks_instances(check):
     """Test parametrize_with_checks with estimator instance input."""
-    name = _get_check_estimator_ids(check).split("=")[1].split("(")[0].split(")")[0]
+    name = (
+        _get_check_estimator_ids(check)
+        .split("=")[1]
+        .split(",")[0]
+        .split("(")[0]
+        .split(")")[0]
+    )
     assert callable(check)
     dict_before = test_instances[name].__dict__.copy()
     check()
diff --git a/aeon/testing/testing_config.py b/aeon/testing/testing_config.py
index 2d06f92140..6006abf267 100644
--- a/aeon/testing/testing_config.py
+++ b/aeon/testing/testing_config.py
@@ -43,8 +43,14 @@
     # has a keras fail, unknown reason, see #1387
     "LearningShapeletClassifier": ["check_fit_deterministic"],
     # does not fit structure for test, needs investigation
-    "TapNetClassifier": ["check_classifier_random_state_deep_learning"],
-    "TapNetRegressor": ["check_regressor_random_state_deep_learning"],
+    "TapNetClassifier": [
+        "check_classifier_random_state_deep_learning",
+        "check_classifier_saving_loading_deep_learning",
+    ],
+    "TapNetRegressor": [
+        "check_regressor_random_state_deep_learning",
+        "check_regressor_saving_loading_deep_learning",
+    ],
     # needs investigation
     "SASTClassifier": ["check_fit_deterministic"],
     "RSASTClassifier": ["check_fit_deterministic"],
diff --git a/aeon/testing/utils/estimator_checks.py b/aeon/testing/utils/estimator_checks.py
index 1c9e8f8cb3..796fb7ab2a 100644
--- a/aeon/testing/utils/estimator_checks.py
+++ b/aeon/testing/utils/estimator_checks.py
@@ -12,6 +12,7 @@
 from aeon.regression.base import BaseRegressor
 from aeon.testing.testing_data import FULL_TEST_DATA_DICT
 from aeon.transformations.base import BaseTransformer
+from aeon.utils.validation import get_n_cases
 
 
 def _run_estimator_method(estimator, method_name, datatype, split):
@@ -51,6 +52,37 @@ def _get_tag(estimator, tag_name, default=None, raise_error=False):
         )
 
 
+def _assert_predict_labels(y_pred, datatype, split="test", unique_labels=None):
+    if isinstance(datatype, str):
+        datatype = FULL_TEST_DATA_DICT[datatype][split][0]
+
+    assert isinstance(y_pred, np.ndarray)
+    assert y_pred.shape == (get_n_cases(datatype),)
+    if unique_labels is not None:
+        assert np.all(np.isin(np.unique(y_pred), unique_labels))
+
+
+def _assert_predict_probabilities(y_proba, datatype, split="test", n_classes=None):
+    if isinstance(datatype, str):
+        if n_classes is None:
+            n_classes = len(np.unique(FULL_TEST_DATA_DICT[datatype][split][1]))
+        datatype = FULL_TEST_DATA_DICT[datatype][split][0]
+
+    if n_classes is None:
+        raise ValueError(
+            "n_classes must be provided if not using a test dataset string"
+        )
+
+    assert isinstance(y_proba, np.ndarray)
+    assert y_proba.shape == (
+        get_n_cases(datatype),
+        n_classes,
+    )
+    assert np.all(y_proba >= 0)
+    assert np.all(y_proba <= 1)
+    assert np.allclose(np.sum(y_proba, axis=1), 1)
+
+
 def _list_required_methods(estimator):
     """Return list of required method names (beyond BaseAeonEstimator ones)."""
     # all BaseAeonEstimator children must implement these