Skip to content

Commit

Permalink
clustering for v1.0.0 (#318)
Browse files Browse the repository at this point in the history
  • Loading branch information
MatthewMiddlehurst authored Dec 13, 2024
1 parent f03d7f6 commit be38696
Show file tree
Hide file tree
Showing 12 changed files with 177 additions and 56 deletions.
1 change: 1 addition & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ Functions for running experiments.
experiments.get_classifier_by_name
experiments.get_clusterer_by_name
experiments.get_regressor_by_name
experiments.run_timing_experiment
experiments.classification_cross_validation
experiments.classification_cross_validation_folds
experiments.regression_cross_validation
Expand Down
2 changes: 2 additions & 0 deletions tsml_eval/experiments/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"get_classifier_by_name",
"get_clusterer_by_name",
"get_regressor_by_name",
"run_timing_experiment",
"classification_cross_validation",
"classification_cross_validation_folds",
"regression_cross_validation",
Expand All @@ -30,6 +31,7 @@
run_clustering_experiment,
run_regression_experiment,
)
from tsml_eval.experiments.scalability import run_timing_experiment
from tsml_eval.experiments.set_classifier import get_classifier_by_name
from tsml_eval.experiments.set_clusterer import get_clusterer_by_name
from tsml_eval.experiments.set_regressor import get_regressor_by_name
43 changes: 21 additions & 22 deletions tsml_eval/experiments/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,11 @@ def run_clustering_experiment(
If None, the clusterers default is used. If -1, the number of classes in the
dataset is used.
This may not work as intended for pipelines currently.
The `n_clusters` parameter for arguments which are estimators will also be
set to this value if it exists. Please ensure that the argument input itself
has the `n_clusters` parameters and is not a default such as None. This is
likely to be the case for parameters such as `estimator` or `clusterer` in
pipelines and deep learners.
clusterer_name : str or None, default=None
Name of clusterer used in writing results. If None, the name is taken from
the clusterer.
Expand Down Expand Up @@ -821,22 +825,18 @@ def run_clustering_experiment(
f"Encoder dictionary: {str(encoder_dict)}"
)

# set n_clusters for clusterer and any contained estimators
# NOTE: If the clusterer has an estimator parameteri.e. `estimator` or `clusterer`
# which defaults to None, we cannot set the n_clusters parameter for it here.
if isinstance(n_clusters, int):
try:
if n_clusters == -1:
n_clusters = n_classes

if isinstance(clusterer, SklearnToTsmlClusterer):
clusterer.set_params(clusterer__n_clusters=n_clusters)
else:
clusterer.set_params(n_clusters=n_clusters)
except ValueError:
warnings.warn(
f"{clusterer_name} does not have a n_clusters parameter, "
"so it cannot be set.",
stacklevel=1,
)
n_clusters = None
if n_clusters == -1:
n_clusters = n_classes

if "n_clusters" in clusterer.get_params():
clusterer.set_params(n_clusters=n_clusters)
for att in clusterer.__dict__.values():
if isinstance(att, BaseEstimator) and "n_clusters" in att.get_params():
att.set_params(n_clusters=n_clusters)
elif n_clusters is not None:
raise ValueError("n_clusters must be an int or None.")

Expand Down Expand Up @@ -868,7 +868,7 @@ def run_clustering_experiment(
train_probs = np.zeros(
(
len(train_preds),
n_clusters if n_clusters is not None else len(np.unique(train_preds)),
len(np.unique(train_preds)),
)
)
train_probs[np.arange(len(train_preds)), train_preds] = 1
Expand Down Expand Up @@ -909,11 +909,7 @@ def run_clustering_experiment(
test_probs = np.zeros(
(
len(test_preds),
(
n_clusters
if n_clusters is not None
else len(np.unique(train_preds))
),
len(np.unique(train_preds)),
)
)
test_probs[np.arange(len(test_preds)), test_preds] = 1
Expand Down Expand Up @@ -989,6 +985,9 @@ def load_and_run_clustering_experiment(
Number of clusters to use if the clusterer has an `n_clusters` parameter.
If None, the clusterers default is used. If -1, the number of classes in the
dataset is used.
The `n_clusters` parameter for attributes which are estimators will also be
set to this value if it exists.
clusterer_name : str or None, default=None
Name of clusterer used in writing results. If None, the name is taken from
the clusterer.
Expand Down
2 changes: 1 addition & 1 deletion tsml_eval/experiments/scalability.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def run_timing_experiment(
function="fit",
random_state=None,
):
"""Return the time taken to run eestimator functions for randomly generated data.
"""Return the time taken to run estimator functions for randomly generated data.
Will time the function for each estimator in milliseconds, gradually increasing the
size of the chosen dimension. The time taken will be stored in a dictionary.
Expand Down
27 changes: 27 additions & 0 deletions tsml_eval/experiments/set_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
["inceptiontimeclassifier", "inceptiontime"],
["h-inceptiontimeclassifier", "h-inceptiontime"],
["litetimeclassifier", "litetime"],
["individualliteclassifier", "individuallite"],
["disjointcnnclassifier", "disjointcnn"],
]
dictionary_based_classifiers = [
["bossensemble", "boss"],
Expand All @@ -38,6 +40,7 @@
["weasel_v2", "weaseldilation", "weasel-dilation", "weasel-d"],
"redcomets",
"redcomets-500",
["mrseqlclassifier", "mrseql"],
["mrsqmclassifier", "mrsqm"],
]
distance_based_classifiers = [
Expand Down Expand Up @@ -83,6 +86,7 @@
"summary-intervals",
["randomintervals-500", "catch22-intervals-500"],
["randomintervalclassifier", "randomintervals", "catch22-intervals"],
["supervisedintervalclassifier", "supervisedintervals"],
["quantclassifier", "quant"],
]
other_classifiers = [
Expand All @@ -97,6 +101,7 @@
["randomshapeletforestclassifier", "randomshapeletforest", "rsf"],
["sastclassifier", "sast"],
["rsastclassifier", "rsast"],
["learningshapeletclassifier", "ls"],
]
vector_classifiers = [
["rotationforestclassifier", "rotationforest", "rotf"],
Expand Down Expand Up @@ -299,6 +304,14 @@ def _set_classifier_deep_learning(
from aeon.classification.deep_learning import LITETimeClassifier

return LITETimeClassifier(random_state=random_state, **kwargs)
elif c == "individualliteclassifier" or c == "individuallite":
from aeon.classification.deep_learning import IndividualLITEClassifier

return IndividualLITEClassifier(random_state=random_state, **kwargs)
elif c == "disjointcnnclassifier" or c == "disjointcnn":
from aeon.classification.deep_learning import DisjointCNNClassifier

return DisjointCNNClassifier(random_state=random_state, **kwargs)


def _set_classifier_dictionary_based(
Expand Down Expand Up @@ -391,6 +404,10 @@ def _set_classifier_dictionary_based(
return REDCOMETS(
n_trees=500, random_state=random_state, n_jobs=n_jobs, **kwargs
)
elif c == "mrseqlclassifier" or c == "mrseql":
from aeon.classification.dictionary_based import MrSEQLClassifier

return MrSEQLClassifier(**kwargs)
elif c == "mrsqmclassifier" or c == "mrsqm":
from aeon.classification.dictionary_based import MrSQMClassifier

Expand Down Expand Up @@ -670,6 +687,12 @@ def _set_classifier_interval_based(
return RandomIntervalClassifier(
random_state=random_state, n_jobs=n_jobs, **kwargs
)
elif c == "supervisedintervalclassifier" or c == "supervisedintervals":
from aeon.classification.interval_based import SupervisedIntervalClassifier

return SupervisedIntervalClassifier(
random_state=random_state, n_jobs=n_jobs, **kwargs
)
elif c == "quantclassifier" or c == "quant":
from aeon.classification.interval_based import QUANTClassifier

Expand Down Expand Up @@ -734,6 +757,10 @@ def _set_classifier_shapelet_based(
from aeon.classification.shapelet_based import RSASTClassifier

return RSASTClassifier(seed=random_state, n_jobs=n_jobs, **kwargs)
elif c == "learningshapeletclassifier" or c == "ls":
from aeon.classification.shapelet_based import LearningShapeletClassifier

return LearningShapeletClassifier(random_state=random_state, **kwargs)


def _set_classifier_vector(c, random_state, n_jobs, fit_contract, checkpoint, kwargs):
Expand Down
73 changes: 66 additions & 7 deletions tsml_eval/experiments/set_clusterer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
KSpectralCentroid,
TimeSeriesCLARA,
TimeSeriesCLARANS,
TimeSeriesKernelKMeans,
TimeSeriesKMeans,
TimeSeriesKMedoids,
TimeSeriesKShape,
Expand All @@ -21,6 +22,10 @@
deep_learning_clusterers = [
["aefcnclusterer", "aefcn"],
["aeresnetclusterer", "aeresnet"],
["aeattentionbigruclusterer", "aeattentionbigru"],
["aebigruclusterer", "aebigru"],
["aedcnnclusterer", "aedcnn"],
["aedrnnclusterer", "aedrnn"],
]
distance_based_clusterers = [
"kmeans-euclidean",
Expand Down Expand Up @@ -130,14 +135,13 @@
"elasticsom",
"kspectralcentroid",
"timeserieskshape",
"timeserieskernelkmeans",
]

feature_based_clusterers = [
["catch22", "catch22clusterer"],
["tsfresh", "tsfreshclusterer"],
["summary", "summaryclusterer"],
]

other_clusterers = [
["dummyclusterer", "dummy", "dummyclusterer-tsml"],
"dummyclusterer-aeon",
Expand Down Expand Up @@ -231,11 +235,51 @@ def _set_clusterer_deep_learning(
if c == "aefcnclusterer" or c == "aefcn":
from aeon.clustering.deep_learning import AEFCNClusterer

return AEFCNClusterer(random_state=random_state, **kwargs)
return AEFCNClusterer(
estimator=TimeSeriesKMeans(distance="euclidean", averaging_method="mean"),
random_state=random_state,
**kwargs,
)
elif c == "aeresnetclusterer" or c == "aeresnet":
from aeon.clustering.deep_learning import AEResNetClusterer

return AEResNetClusterer(random_state=random_state, **kwargs)
return AEResNetClusterer(
estimator=TimeSeriesKMeans(distance="euclidean", averaging_method="mean"),
random_state=random_state,
**kwargs,
)
elif c == "aeattentionbigruclusterer" or c == "aeattentionbigru":
from aeon.clustering.deep_learning import AEAttentionBiGRUClusterer

return AEAttentionBiGRUClusterer(
estimator=TimeSeriesKMeans(distance="euclidean", averaging_method="mean"),
random_state=random_state,
**kwargs,
)
elif c == "aebigruclusterer" or c == "aebigru":
from aeon.clustering.deep_learning import AEBiGRUClusterer

return AEBiGRUClusterer(
estimator=TimeSeriesKMeans(distance="euclidean", averaging_method="mean"),
random_state=random_state,
**kwargs,
)
elif c == "aedcnnclusterer" or c == "aedcnn":
from aeon.clustering.deep_learning import AEDCNNClusterer

return AEDCNNClusterer(
estimator=TimeSeriesKMeans(distance="euclidean", averaging_method="mean"),
random_state=random_state,
**kwargs,
)
elif c == "aedrnnclusterer" or c == "aedrnn":
from aeon.clustering.deep_learning import AEDRNNClusterer

return AEDRNNClusterer(
estimator=TimeSeriesKMeans(distance="euclidean", averaging_method="mean"),
random_state=random_state,
**kwargs,
)


def _set_clusterer_distance_based(
Expand Down Expand Up @@ -388,6 +432,15 @@ def _set_clusterer_distance_based(
random_state=random_state,
**kwargs,
)
elif c == "timeserieskernelkmeans" or c == "kernelkmeans":
return TimeSeriesKernelKMeans(
max_iter=50,
n_init=10,
tol=1e-06,
random_state=random_state,
n_jobs=n_jobs,
**kwargs,
)


def _get_distance_default_params(
Expand Down Expand Up @@ -433,15 +486,21 @@ def _set_clusterer_feature_based(
if c == "catch22" or c == "catch22clusterer":
from aeon.clustering.feature_based import Catch22Clusterer

return Catch22Clusterer(random_state=random_state, n_jobs=n_jobs, **kwargs)
return Catch22Clusterer(
estimator=KMeans(), random_state=random_state, n_jobs=n_jobs, **kwargs
)
elif c == "tsfresh" or c == "tsfreshclusterer":
from aeon.clustering.feature_based import TSFreshClusterer

return TSFreshClusterer(random_state=random_state, n_jobs=n_jobs, **kwargs)
return TSFreshClusterer(
estimator=KMeans(), random_state=random_state, n_jobs=n_jobs, **kwargs
)
elif c == "summary" or c == "summaryclusterer":
from aeon.clustering.feature_based import SummaryClusterer

return SummaryClusterer(random_state=random_state, n_jobs=n_jobs, **kwargs)
return SummaryClusterer(
estimator=KMeans(), random_state=random_state, n_jobs=n_jobs, **kwargs
)


def _set_clusterer_other(c, random_state, n_jobs, fit_contract, checkpoint, kwargs):
Expand Down
13 changes: 11 additions & 2 deletions tsml_eval/experiments/set_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
["multirockethydraregressor", "multirockethydra", "multirocket-hydra"],
]
deep_learning_regressors = [
["cnnregressor", "cnn"],
["timecnnregressor", "timecnn", "cnnregressor", "cnn"],
["fcnregressor", "fcnn", "fcn"],
["mlpregressor", "mlp"],
["encoderregressor", "encoder"],
Expand All @@ -23,7 +23,8 @@
["inceptiontimeregressor", "inception", "inceptiontime"],
["h-inceptiontimeregressor", "h-inceptiontime"],
["litetimeregressor", "litetime"],
["timecnnregressor", "timecnn"],
["individualliteregressor", "individuallite"],
["disjointcnnregressor", "disjointcnn"],
]
distance_based_regressors = [
"1nn-ed",
Expand Down Expand Up @@ -252,6 +253,14 @@ def _set_regressor_deep_learning(
from aeon.regression.deep_learning import LITETimeRegressor

return LITETimeRegressor(random_state=random_state, **kwargs)
elif r == "individualliteregressor" or r == "individuallite":
from aeon.regression.deep_learning import IndividualLITERegressor

return IndividualLITERegressor(random_state=random_state, **kwargs)
elif r == "disjointcnnregressor" or r == "disjointcnn":
from aeon.regression.deep_learning import DisjointCNNRegressor

return DisjointCNNRegressor(random_state=random_state, **kwargs)


def _set_regressor_distance_based(
Expand Down
10 changes: 3 additions & 7 deletions tsml_eval/experiments/tests/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,15 +212,11 @@ def test_aeon_classifiers_available():
"ClassifierPipeline",
"ClassifierEnsemble",
"SklearnClassifierWrapper",
# just missing
"IndividualLITEClassifier",
"IntervalForestClassifier",
# ordinal
"OrdinalTDE",
"IndividualOrdinalTDE",
"IntervalForestClassifier",
"SupervisedIntervalClassifier",
"LearningShapeletClassifier",
"DisjointCNNClassifier",
"MrSEQLClassifier",
# just missing
]

est = [e for e, _ in all_estimators(type_filter="classifier")]
Expand Down
Loading

0 comments on commit be38696

Please sign in to comment.