Skip to content

Commit

Permalink
Add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
fraimondo committed Jul 30, 2024
1 parent 4d8228d commit 82aa46b
Show file tree
Hide file tree
Showing 7 changed files with 257 additions and 26 deletions.
36 changes: 36 additions & 0 deletions julearn/base/column_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,42 @@ def __eq__(self, other: Union["ColumnTypes", str]):
other = other if isinstance(other, ColumnTypes) else ColumnTypes(other)
return self._column_types == other._column_types

def __and__(self, other: "ColumnTypes"):
"""Get the intersection of the column_types.
Parameters
----------
other : ColumnTypes
The other column_types to get the intersection with.
Returns
-------
ColumnTypes
The intersection of the column_types.
"""
return ColumnTypes(self._column_types & other._column_types)

def __or__(self, other: "ColumnTypes"):
"""Get the union of the column_types.
Parameters
----------
other : ColumnTypes
The other column_types to get the union with.
Returns
-------
ColumnTypes
The union of the column_types.
"""
return ColumnTypes(self._column_types | other._column_types)

def __len__(self):
"""Get the number of column_types."""
return len(self._column_types)

def __iter__(self):
"""Iterate over the column_types."""

Expand Down
4 changes: 3 additions & 1 deletion julearn/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,9 @@ def model(request: FixtureRequest) -> str:
return request.param


@fixture(params=["regression", "classification"], scope="function")
@fixture(
params=["regression", "classification", "transformer"], scope="function"
)
def problem_type(request: FixtureRequest) -> str:
"""Return different problem types.
Expand Down
42 changes: 40 additions & 2 deletions julearn/pipeline/pipeline_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,38 @@ def add(
" and only to the PipelineCreator like this"
" PipelineCreator(problem_type=problem_type)"
)

if self._added_target_generator:
# If a target generator was added, we need to make sure that
# the apply_to parameter is set.
if apply_to is None:
raise_error(
"A target generator was added. To prevent training on "
"the features used to generate the target, you need to "
"explicitly set the apply_to parameter."
)
else:
# and it should also be set to exclude what is used to
# generate the target.
apply_to = ColumnTypes(apply_to)
if len(apply_to & ColumnTypes("*")) > 0:
raise_error(
"A target generator was added. The apply_to parameter "
"of subsequent steps cannot include the wildcard type "
"'*'."
)
else:

target_gen_step = next(iter([
x for x in self._steps if x.name == "generate_target"
]))
if len(apply_to & target_gen_step.apply_to) > 0:
raise_error(
"A target generator was added. The apply_to "
"parameter of subsequent steps should exclude the "
"types used to generate the target."
)

apply_to = self.apply_to if apply_to is None else apply_to
apply_to = ColumnTypes(apply_to)

Expand All @@ -240,7 +272,6 @@ def add(
step = typing.cast(JuTargetPipeline, step)

# The name "generate_target" is reserved for the target generator step
# TODO: Add CI TEST
if name == "generate_target" and step != "generate_target":
raise_error(
"The name 'generate_target' is reserved for the target "
Expand Down Expand Up @@ -303,13 +334,20 @@ def add(
else:
logger.debug(f"Special step is {step}")
name = "generate_target"
if len(apply_to & ColumnTypes("*")) > 0:
raise_error(
"The 'generate_target' step cannot apply to all types."
)
if "transformer" not in params_to_set:
# TODO: CI TEST
raise_error(
"The 'generate_target' step should have a "
"transformer parameter."
)
step = params_to_set["transformer"]
if not isinstance(step, PipelineCreator):
raise_error(
"The transformer parameter in the generate_target "
"step should be a PipelineCreator.")
elif len(params_to_set) > 0:
step.set_params(**params_to_set) # type: ignore

Expand Down
137 changes: 122 additions & 15 deletions julearn/pipeline/tests/test_pipeline_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@

import pandas as pd
import pytest
from numpy.testing import assert_array_equal
from sklearn.decomposition import PCA
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

Check failure on line 15 in julearn/pipeline/tests/test_pipeline_creator.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (F401)

julearn/pipeline/tests/test_pipeline_creator.py:15:54: F401 `sklearn.ensemble.RandomForestRegressor` imported but unused
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import RobustScaler, StandardScaler
from sklearn.svm import SVC
Expand Down Expand Up @@ -45,7 +48,8 @@ def test_construction_working(
preprocess = preprocess if isinstance(preprocess, list) else [preprocess]
for step in preprocess:
creator.add(step, apply_to="categorical")
creator.add(model)
if problem_type in ["classification", "regression"]:
creator.add(model)
X_types = {"categorical": ["A"]}
pipeline = creator.to_pipeline(X_types=X_types)

Expand All @@ -60,16 +64,19 @@ def test_construction_working(
)

# check model step
model_name, model = pipeline.steps[-1]
assert isinstance(model, WrapModel)
assert isinstance(
model.model,
get_model(
model_name,
problem_type=problem_type,
).__class__,
)
assert len(preprocess) + 2 == len(pipeline.steps)
if problem_type in ["classification", "regression"]:
model_name, model = pipeline.steps[-1]
assert isinstance(model, WrapModel)
assert isinstance(
model.model,
get_model(
model_name,
problem_type=problem_type,
).__class__,
)
assert len(preprocess) + 2 == len(pipeline.steps)
else:
assert len(preprocess) + 1 == len(pipeline.steps)


def test_fit_and_transform_no_error(
Expand Down Expand Up @@ -98,10 +105,14 @@ def test_fit_and_transform_no_error(
creator = PipelineCreator.from_list(
preprocess, model_params={}, problem_type=problem_type
)
creator.add(model)
if problem_type in ["classification", "regression"]:
creator.add(model)
pipeline = creator.to_pipeline({})
pipeline.fit(X_iris, y_iris)
pipeline[:-1].transform(X_iris)
if problem_type in ["classification", "regression"]:
pipeline[:-1].transform(X_iris)
else:
pipeline.transform(X_iris)


def _hyperparam_tuning_base_test(
Expand Down Expand Up @@ -138,6 +149,8 @@ def _hyperparam_tuning_base_test(
convention.
"""
if problem_type == "transformer":
pytest.skip("Transformers can't be tuned")
if isinstance(preprocess, str):
preprocess = [preprocess]

Expand Down Expand Up @@ -197,7 +210,8 @@ def test_hyperparameter_tuning(
"""

if problem_type == "transformer":
pytest.skip("Transformers can't be tuned")
pipeline, param_grid = _hyperparam_tuning_base_test(
X_types_iris,
model,
Expand Down Expand Up @@ -245,6 +259,8 @@ def test_hyperparameter_tuning_bayes(
The parameters for the search.
"""
if problem_type == "transformer":
pytest.skip("Transformers can't be tuned")
BayesSearchCV = pytest.importorskip("skopt.BayesSearchCV")

pipeline, param_grid = _hyperparam_tuning_base_test(
Expand Down Expand Up @@ -292,6 +308,8 @@ def test_hyperparameter_tuning_optuna(
# OptunaSearchCV = optuna_integration.OptunaSearchCV
from julearn.external.optuna_searchcv import OptunaSearchCV

if problem_type == "transformer":
pytest.skip("Transformers can't be tuned")
pipeline, param_grid = _hyperparam_tuning_base_test(
X_types_iris,
model,
Expand Down Expand Up @@ -363,6 +381,8 @@ def test_hyperparameter_tuning_distributions(
The parameters for the search.
"""
if problem_type == "transformer":
pytest.skip("Transformers can't be tuned")
kind = "grid"
if search_params is not None:
kind = search_params.get("kind", "grid")
Expand Down Expand Up @@ -411,6 +431,8 @@ def test_hyperparameter_tuning_distributions_bayes(
The parameters for the search.
"""
if problem_type == "transformer":
pytest.skip("Transformers can't be tuned")
BayesSearchCV = pytest.importorskip("skopt.BayesSearchCV")

pipeline, param_grid = _hyperparam_tuning_base_test(
Expand Down Expand Up @@ -858,3 +880,88 @@ def test_PipelineCreator_set_hyperparameter() -> None:
model3 = creator3.to_pipeline()

assert model3.steps[-1][1].get_params()["strategy"] == "uniform"


def test_PipelineCreator_generated_target(
X_iris: pd.DataFrame, # noqa: N803
) -> None:
"""Test the pipeline creator with a generated target."""

# Create a transfomer that will apply to the petal features
tranformer_creator = PipelineCreator(
problem_type="transformer", apply_to="petal"
)
tranformer_creator.add("pca", n_components=2, random_state=42)
tranformer_creator.add("pick_columns", keep="pca__pca0")

# Create a model that uses the previous transformer to generate the target
creator = PipelineCreator(problem_type="regression", apply_to="*")
creator.add(
"generate_target", apply_to="petal", transformer=tranformer_creator
)
creator.add("linreg", apply_to="sepal") # sepal only

X_types = {
"sepal": ["sepal_length", "sepal_width"],
"petal": ["petal_length", "petal_width"],
}
fake_y = pd.Series([0] * len(X_iris))

model = creator.to_pipeline(X_types)
assert len(model.steps) == 2
assert model.steps[0][0] == "set_column_types"
assert model.steps[1][0] == "linreg_target_generate"

model.fit(X_iris.copy(), fake_y)

# Get the in sample predictions
ju_pred = model.predict(X_iris.copy())

pca = PCA(n_components=2, random_state=42)
linreg = LinearRegression()

X_iris_petal_vals = X_iris[["petal_length", "petal_width"]].values
X_iris_sepal_vals = X_iris[["sepal_length", "sepal_width"]].values
y_gen = pca.fit(X_iris_petal_vals).transform(X_iris_petal_vals)[:, 0]

linreg.fit(X_iris_sepal_vals, y_gen)
sk_pred = linreg.predict(X_iris_sepal_vals)

assert_array_equal(ju_pred, sk_pred)


def test_PipelineCreator_generated_target_errors() -> None:
"""Test errors with the generated target."""
tranformer_creator = PipelineCreator(
problem_type="transformer", apply_to="petal"
)
tranformer_creator.add("pca")

# Create a model that uses the previous transformer to generate the target
with pytest.raises(ValueError, match="reserved for the target"):
creator = PipelineCreator(problem_type="regression", apply_to="*")
creator.add("pca", name="generate_target")
with pytest.raises(ValueError, match="have a transformer parameter"):
creator = PipelineCreator(problem_type="regression", apply_to="*")
creator.add("generate_target", apply_to="petal")
with pytest.raises(ValueError, match="should be a PipelineCreator"):
creator = PipelineCreator(problem_type="regression", apply_to="*")
creator.add("generate_target", apply_to="petal", transformer="pca")

with pytest.raises(ValueError, match="all types"):
creator = PipelineCreator(problem_type="regression", apply_to="*")
creator.add(
"generate_target", apply_to="*", transformer=tranformer_creator
)

creator = PipelineCreator(problem_type="regression", apply_to="*")
creator.add(
"generate_target", apply_to="petal", transformer=tranformer_creator
)
with pytest.raises(ValueError, match="explicitly set the apply_to"):
creator.add("linreg")
with pytest.raises(ValueError, match="exclude the types"):
creator.add("linreg", apply_to=["sepal", "petal"])
with pytest.raises(ValueError, match="wildcard"):
creator.add("linreg", apply_to="*")
creator.add("linreg", apply_to=["sepal"])
17 changes: 10 additions & 7 deletions julearn/transformers/dataframe/pick_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ def __init__(
)

def _fit(
self, X: pd.DataFrame, y: Optional[DataLike] = None # noqa: N803
self,
X: pd.DataFrame,

Check failure on line 54 in julearn/transformers/dataframe/pick_columns.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (N803)

julearn/transformers/dataframe/pick_columns.py:54:9: N803 Argument name `X` should be lowercase
y: Optional[DataLike] = None, # noqa: N803

Check failure on line 55 in julearn/transformers/dataframe/pick_columns.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (RUF100)

julearn/transformers/dataframe/pick_columns.py:55:40: RUF100 Unused `noqa` directive (unused: `N803`)
) -> "PickColumns":
"""Fit the transformer.
Expand Down Expand Up @@ -82,9 +84,8 @@ def _fit(
self.keep_columns_ = []
self.support_mask_ = self.support_mask_.values
return self
return self

def transform(self, X: pd.DataFrame) -> pd.DataFrame: # noqa: N803
def transform(self, X: pd.DataFrame) -> Union[pd.DataFrame, pd.Series]: # noqa: N803
"""Pick the columns.
Parameters
Expand All @@ -99,7 +100,11 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame: # noqa: N803
"""
logger.debug(f"Picking columns: {self.keep_columns_}")
return X[self.keep_columns_]
if len(self.keep_columns_) == 1:
out = X[self.keep_columns_[0]]
else:
out = X[self.keep_columns_]
return out

def get_support(
self, indices: bool = False
Expand All @@ -118,9 +123,7 @@ def get_support(
"""
if indices:
return np.arange(len(self.support_mask_))[
self.support_mask_
] # type: ignore
return np.arange(len(self.support_mask_))[self.support_mask_] # type: ignore
else:
return self.support_mask_ # type: ignore

Expand Down
8 changes: 7 additions & 1 deletion julearn/transformers/target/ju_generated_target_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,13 @@ def generate_target(
"""
logger.debug("Generating target")
gen_y = self.transformer.transform(X) # type: ignore
logger.debug(f"Target generated: {gen_y.columns}") # type: ignore

# If it's a pandas dataframe convert to series
if gen_y.shape[1] == 1:
gen_y = gen_y.iloc[:, 0]
logger.debug(f"Target generated: {gen_y.name}")
else:
logger.debug(f"Target generated: {gen_y.columns}")
return gen_y

@property
Expand Down
Loading

0 comments on commit 82aa46b

Please sign in to comment.