Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
ksarink committed Nov 5, 2021
2 parents bc337ac + ca7100f commit f0497e0
Show file tree
Hide file tree
Showing 28 changed files with 209 additions and 98 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-test_and_deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
pip install tensorflow pytest pytest-cov coveralls -r photonai.egg-info/requires.txt -r photonai/optimization/smac/requirements.txt -r photonai/optimization/nevergrad/requirements.txt
- name: Test with pytest
run: |
PYTHONPATH=./ pytest ./test --cov=./photonai
PYTHONPATH=./ pytest ./test --cov=./photonai --tb=long
- name: Coveralls
run: coveralls
env:
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@

#### PHOTONAI is a high level python API for designing and optimizing machine learning pipelines.

We create a system in which you can easily select and combine both pre-processing and learning algorithms from
We've created a system in which you can easily select and combine both pre-processing and learning algorithms from
state-of-the-art machine learning toolboxes,
and arrange them in simple or parallel pipeline data streams.

In addition, you can parametrize your training and testing
workflow choosing cross-validation schemas, performance metrics and hyperparameter
workflow choosing cross-validation schemes, performance metrics and hyperparameter
optimization metrics from a list of pre-registered options.

Importantly, you can integrate custom solutions into your data processing pipeline,
Expand All @@ -25,7 +25,7 @@ state-of-the-art machine learning toolboxes,
For a detailed description,
__[visit our website and read the documentation](https://www.photon-ai.com)__

or you can read a prolonged introduction on [Arxiv](https://arxiv.org/abs/2002.05426)
or you can read our paper in [PLOS ONE](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0254062)



Expand Down
58 changes: 58 additions & 0 deletions examples/advanced/svc_kernel_speed_up.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import os

from sklearn.datasets import make_classification
from sklearn.model_selection import ShuffleSplit
from sklearn.metrics.pairwise import rbf_kernel
from joblib import Memory

from photonai.base import Hyperpipe, PipelineElement
from photonai.optimization import FloatRange


cache_dir = './tmp/kernel_cache'
os.makedirs(cache_dir, exist_ok=True)
memory = Memory(cachedir=cache_dir, verbose=0)


@memory.cache
def cached_rbf(X, Y):
return rbf_kernel(X, Y)


# create toy data
n_features = 10000
n_samples = 1000
n_informative = 10
X, y = make_classification(n_samples, n_features, n_informative=n_informative)
gamma = 1 / n_features

"""
Especially with large datasets, it is unnecessary to recompute the kernel for every hyperparameter configuration.
For that reason, you can pass a cached kernel function that will only recompute the kernel if the input data changes.
If you don't want to cache the kernel, it still decreases the computation time by magnitudes when passing the kernel
as dedicated function. See this issue for details:
https://github.com/scikit-learn/scikit-learn/issues/21410
https://stackoverflow.com/questions/69680420/using-a-custom-rbf-kernel-function-for-sklearns-svc-is-way-faster-than-built-in
"""
#kernel = 'kernel'
#kernel = rbf_kernel
kernel = cached_rbf

pipe = Hyperpipe('svm_with_custom_kernel',
inner_cv=ShuffleSplit(n_splits=1, test_size=0.2),
outer_cv=ShuffleSplit(n_splits=1, test_size=0.2),
optimizer='sk_opt',
optimizer_params={'n_configurations': 15},
metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'],
best_config_metric='accuracy',
project_folder='./tmp',
verbosity=1)

pipe += PipelineElement('StandardScaler')

pipe += PipelineElement('SVC',
hyperparameters={'C': FloatRange(1e-6, 1e6)},
gamma=gamma, kernel=kernel)

pipe.fit(X, y)

2 changes: 1 addition & 1 deletion photonai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
"""

__version__ = '2.1.0'
__version__ = '2.2.0'

# __all__ = ( )
38 changes: 17 additions & 21 deletions photonai/base/hyperpipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,10 +346,10 @@ def __init__(self, name: Optional[str],
The metric that should be maximized or minimized in order to choose
the best hyperparameter configuration.
eval_final_performance [bool, default=True]:
eval_final_performance:
DEPRECATED! Use "use_test_set" instead!
use_test_set [bool, default=True]:
use_test_set:
If the metrics should be calculated for the test set,
otherwise the test set is seperated but not used.
Expand Down Expand Up @@ -603,6 +603,11 @@ def input_data_sanity_checks(self, data, targets, **kwargs):
"PHOTONAI erases every data item that has a Nan Target".format(str(nr_of_nans)))
self.X = self.X[~nans_in_y]
self.y = self.y[~nans_in_y]
new_kwargs = dict()
for name, element_list in kwargs.items():
new_kwargs[name] = element_list[~nans_in_y]
self.kwargs = new_kwargs

except Exception as e:
# This is only for convenience so if it fails then never mind
logger.error("Removing Nans in target vector failed: " + str(e))
Expand Down Expand Up @@ -637,7 +642,9 @@ def disable_multiprocessing_recursively(pipe):
if hasattr(pipe, 'nr_of_processes'):
pipe.nr_of_processes = 1
for child in pipe.elements:
if hasattr(child, 'base_element'):
if isinstance(child, Branch):
Hyperpipe.disable_multiprocessing_recursively(child)
elif hasattr(child, 'base_element'):
Hyperpipe.disable_multiprocessing_recursively(child.base_element)
elif isinstance(pipe, PhotonPipeline):
for name, child in pipe.named_steps.items():
Expand Down Expand Up @@ -866,7 +873,8 @@ def _finalize_optimization(self):
self.optimum_pipe.fit(self.data.X, self.data.y, **self.data.kwargs)

# Before saving the optimum pipe, add preprocessing without multiprocessing
self.optimum_pipe.add_preprocessing(self.disable_multiprocessing_recursively(self.preprocessing))
self.disable_multiprocessing_recursively(self.preprocessing)
self.optimum_pipe.add_preprocessing(self.preprocessing)

# Now truly set to no caching (including single_subject_caching)
self.recursive_cache_folder_propagation(self.optimum_pipe, None, None)
Expand Down Expand Up @@ -939,11 +947,8 @@ def _prepare_pipeline(self):
# ===================================================================

@staticmethod
def fit_outer_folds(outer_fold_computer, X, y, kwargs, cache_folder):
try:
outer_fold_computer.fit(X, y, **kwargs)
finally:
CacheManager.clear_cache_files(cache_folder)
def fit_outer_folds(outer_fold_computer, X, y, kwargs):
outer_fold_computer.fit(X, y, **kwargs)
return

def fit(self, data: np.ndarray, targets: np.ndarray, **kwargs):
Expand Down Expand Up @@ -1038,8 +1043,7 @@ def fit(self, data: np.ndarray, targets: np.ndarray, **kwargs):
result = dask.delayed(Hyperpipe.fit_outer_folds)(outer_fold_computer,
self.data.X,
self.data.y,
self.data.kwargs,
self.cache_folder)
self.data.kwargs)
delayed_jobs.append(result)
else:
try:
Expand All @@ -1058,7 +1062,8 @@ def fit(self, data: np.ndarray, targets: np.ndarray, **kwargs):
# evaluate hyperparameter optimization results for best config
self._finalize_optimization()

# clear complete cache ?
# clear complete cache ? use self.cache_folder to delete all subfolders within the parent cache folder
# directory
CacheManager.clear_cache_files(self.cache_folder, force_all=True)

###############################################################################################
Expand Down Expand Up @@ -1247,15 +1252,6 @@ def get_permutation_feature_importances(self, **kwargs):
Returns mean of "importances_mean" and of "importances_std" of all outer folds.
Parameters:
X_val:
The array-like data with shape=[M, D],
where M is the number of samples and D is the number
of features. D must correspond to the number
of trained dimensions of the fit method.
y_val:
The array-like true targets.
**kwargs:
Keyword arguments, passed to sklearn.permutation_importance.
Expand Down
14 changes: 14 additions & 0 deletions photonai/helper/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,20 @@ def print_double_metrics(metric_dict_train, metric_dict_test, photon_system_log=
logger.debug(t)


def print_outer_folds(metric_list, outer_fold_list, photon_system_log=True, summary=False):
t = PrettyTable(["fold #"] + [metric for metric in metric_list] + ["Best Hyperparameter Config"])
for outer_fold in outer_fold_list:
nr_str = str(outer_fold.fold_nr)
if outer_fold.owns_best_config:
nr_str += "*"
t.add_row([nr_str] +
["%.4f" % outer_fold.best_config.best_config_score.validation.metrics[m] for m in metric_list] +
[outer_fold.best_config.human_readable_config])
if summary:
return t
if photon_system_log:
logger.photon_system_log(t)

def print_estimator_metrics(estimator_performances, metric_list, summary=False):
t = PrettyTable(['Estimator'] + metric_list)
for estimator_name, estimator_values in estimator_performances.items():
Expand Down
20 changes: 10 additions & 10 deletions photonai/modelwrapper/keras_base_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
import numpy as np
import keras
from typing import Union
from keras.utils import to_categorical
from keras.utils.all_utils import to_categorical
from keras.layers import Dropout, Dense
from keras.layers.normalization import BatchNormalization
from keras.layers import BatchNormalization
from keras.models import Sequential
from keras.optimizers import Optimizer, Adam, RMSprop, Adadelta, Adagrad, Adamax, Nadam, SGD
from keras.optimizers import Optimizer, adam_v2, rmsprop_v2, adadelta_v2, adagrad_v2, adamax_v2, nadam_v2, gradient_descent_v2
from keras.activations import softmax, softplus, selu, sigmoid, softsign, hard_sigmoid, elu, relu, tanh, \
linear, exponential
from sklearn.base import ClassifierMixin, RegressorMixin
Expand All @@ -15,13 +15,13 @@
from photonai.modelwrapper.keras_base_estimator import KerasBaseEstimator

__supported_optimizers__ = {
'sgd': SGD,
'rmsprop': RMSprop,
'adagrad': Adagrad,
'adadelta': Adadelta,
'adam': Adam,
'adamax': Adamax,
'nadam': Nadam
'sgd': gradient_descent_v2.SGD,
'rmsprop': rmsprop_v2.RMSprop,
'adagrad': adagrad_v2.Adagrad,
'adadelta': adadelta_v2.Adadelta,
'adam': adam_v2.Adam,
'adamax': adamax_v2.Adamax,
'nadam': nadam_v2.Nadam
}
__supported_activations__ = {
'softmax': softmax,
Expand Down
1 change: 1 addition & 0 deletions photonai/optimization/optimization_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,5 +120,6 @@ def get_optimum_config_outer_folds(self, outer_folds):
# min metric
best_config_metric_nr = np.argmin(list_of_scores)

outer_folds[best_config_metric_nr].owns_best_config = True
best_config = outer_folds[best_config_metric_nr].best_config
return best_config
2 changes: 1 addition & 1 deletion photonai/optimization/smac/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# please install SWIG -> have a look at https://github.com/automl/SMAC3
smac
smac>=1.0.0
emcee
pyDOE
4 changes: 2 additions & 2 deletions photonai/optimization/smac/smac.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
from smac.configspace import UniformFloatHyperparameter, UniformIntegerHyperparameter, CategoricalHyperparameter, \
ConfigurationSpace, Configuration, InCondition, Constant
from smac.scenario.scenario import Scenario
from smac.facade.smac_bo_facade import SMAC4BO
from smac.facade.smac_bb_facade import SMAC4BB as SMAC4BO
from smac.facade.smac_hpo_facade import SMAC4HPO
from smac.facade.smac_ac_facade import SMAC4AC
from smac.facade.smac_bohb_facade import BOHB4HPO
from smac.facade.smac_mf_facade import SMAC4MF as BOHB4HPO
__found__ = True
except (ModuleNotFoundError, ImportError):
__found__ = False
Expand Down
4 changes: 3 additions & 1 deletion photonai/processing/cross_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ class StratifiedKFoldRegression(_BaseKFold):
"""

def __init__(self, n_splits=3, shuffle=False, random_state=None):
super(StratifiedKFoldRegression, self).__init__(n_splits, shuffle, random_state)
super(StratifiedKFoldRegression, self).__init__(n_splits=n_splits,
shuffle=shuffle,
random_state=random_state)

def _make_test_folds(self, X, y=None):
rng = self.random_state
Expand Down
Loading

0 comments on commit f0497e0

Please sign in to comment.