From 2e68936f5b12d26bdc367c8662032610247c33e9 Mon Sep 17 00:00:00 2001 From: VandvC <69578966+VandvC@users.noreply.github.com> Date: Tue, 7 Jan 2025 15:36:45 +0100 Subject: [PATCH 1/7] :wrench: pre commit --- .pre-commit-config.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2a0fb7d..a91edc5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,10 +43,10 @@ repos: - id: flake8 additional_dependencies: [ flake8-docstrings, "flake8-bugbear==22.8.23" ] -- repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.11 - hooks: - - id: ruff +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.11 + hooks: + - id: ruff - repo: https://github.com/pre-commit/mirrors-isort rev: v5.10.1 From f679c52e7f94e5294e22991cbdf84ecfe753de19 Mon Sep 17 00:00:00 2001 From: VandvC <69578966+VandvC@users.noreply.github.com> Date: Tue, 7 Jan 2025 15:37:19 +0100 Subject: [PATCH 2/7] :memo: fix contributing doc --- CONTRIBUTING.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c1c99a1..c56d97d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -28,18 +28,15 @@ Next, the Melusine team, or the community, will give you a feedback on whether y ## Fork to code in your personal Melusine repo -The first step is to get our MAIF repository on your personal GitHub repositories. To do so, use the "Fork" button. +The first step is to get our MAIF repository on your personal GitHub repositories. To do so, use the "Fork" button on Github landing page of melusine project. -fork this repository ## Clone your forked repository -clone your forked repository - Click on the "Code" button to copy the url of your repository, and next, you can paste this url to clone your forked repository. ``` -git clone https://github.com/YOUR_GITHUB_PROFILE/melusine.git +git clone https://github.com//melusine.git ``` ## Make sure that your repository is up-to-date From 9f3400b4f3f27e11e4a7b68f27d4e9904a3652a0 Mon Sep 17 00:00:00 2001 From: VandvC <69578966+VandvC@users.noreply.github.com> Date: Tue, 7 Jan 2025 15:45:09 +0100 Subject: [PATCH 3/7] :goal_net: catch errors for pipeline --- melusine/pipeline.py | 53 ++++++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/melusine/pipeline.py b/melusine/pipeline.py index bead973..3a3ed10 100644 --- a/melusine/pipeline.py +++ b/melusine/pipeline.py @@ -8,6 +8,7 @@ import copy import importlib +import warnings from typing import Iterable, TypeVar from sklearn.pipeline import Pipeline @@ -35,7 +36,7 @@ class MelusinePipeline(Pipeline): """ OBJ_NAME: str = "name" - OBJ_KEY: str = "config_key" + OBJ_CONFIG_KEY: str = "config_key" OBJ_PARAMS: str = "parameters" STEPS_KEY: str = "steps" OBJ_CLASS: str = "class_name" @@ -194,10 +195,10 @@ def from_config( # Get config dict if config_key and not config_dict: raw_config_dict = config[config_key] - config_dict = cls.parse_pipeline_config(raw_config_dict) + config_dict = cls.parse_pipeline_config(config_dict=raw_config_dict) elif config_dict and not config_key: - config_dict = cls.parse_pipeline_config(config_dict) + config_dict = cls.parse_pipeline_config(config_dict=config_dict) else: raise ValueError("You should specify one and only one of 'config_key' and 'config_dict'") @@ -218,10 +219,33 @@ def from_config( # Step arguments obj_params = obj_meta[cls.OBJ_PARAMS] - if issubclass(obj_class, IoMixin): + # Verbose option + suppress_warnings: Any = kwargs.pop("suppress_warnings", False) + + try: obj = obj_class.from_config(config_dict=obj_params) - else: - raise TypeError(f"Object {obj_class} does not inherit from the SaverMixin class") # pragma: no cover + if not issubclass(obj_class, IoMixin) and not suppress_warnings: + type_warn_msg: str = f""" + It seems you are not using a melusine object in your melusine pipeline, but object '{obj_class}' (class {type(obj_class)}) for step '{step_name}'. + The expected behavior is not guaranteed and can break in future version of melusine. + + Recommended usage: + - Exclusive usage of melusine class Pipeline with melusine objects (MelusineTransformer, MelusineDetector...) + + To suppress this warning, instanciate melusine Pipeline with suppress_warnings argument at True (MelusinePipeline.from_config(..., suppress_warnings=True)). + + Visit Melusine Open-Source project: https://github.com/MAIF/melusine and the documentation for more information. + """ + warnings.warn(message=type_warn_msg, category=DeprecationWarning, stacklevel=2) + except AttributeError as err: + if not hasattr(obj_class, "from_config"): + raise AttributeError( + f"Object '{obj_class}' does not implement 'from_config' method, maybe consider to inherit it from the Melusine IoMixin class or use a MelusineTransformer class." + ) from None + else: + raise AttributeError(f"Error in loading class: '{obj_class}'\\n{str(err)}").with_traceback( + err.__traceback__ + ) from err # Add step to pipeline steps.append((step_name, obj)) @@ -251,17 +275,17 @@ def validate_step_config(cls, step: dict[str, Any]) -> dict[str, Any]: f"Pipeline step conf should have a {cls.OBJ_MODULE} key and a {cls.OBJ_CLASS} key." ) - if step.get(cls.OBJ_KEY): + if step.get(cls.OBJ_CONFIG_KEY): return { cls.OBJ_CLASS: step[cls.OBJ_CLASS], cls.OBJ_MODULE: step[cls.OBJ_MODULE], - cls.OBJ_KEY: step[cls.OBJ_KEY], + cls.OBJ_CONFIG_KEY: step[cls.OBJ_CONFIG_KEY], } if not step.get(cls.OBJ_NAME) or not step.get(cls.OBJ_PARAMS): raise PipelineConfigurationError( - f"Pipeline step conf should have a {cls.OBJ_NAME} key and a {cls.OBJ_KEY} key " - f"(unless a {cls.OBJ_KEY} is specified)." + f"Pipeline step conf should have a {cls.OBJ_NAME} key and a {cls.OBJ_CONFIG_KEY} key " + f"(unless a {cls.OBJ_CONFIG_KEY} is specified)." ) if not isinstance(step[cls.OBJ_PARAMS], dict): @@ -325,11 +349,12 @@ def parse_pipeline_config(cls, config_dict: dict[str, Any]) -> dict[str, Any]: steps = [] for step in config_dict[cls.STEPS_KEY]: # Step defined from the config - config_key = step.get(cls.OBJ_KEY) + config_key = step.get(cls.OBJ_CONFIG_KEY) + config_name = step.get(cls.OBJ_NAME) if config_key: - # Use config key as step name - step[cls.OBJ_NAME] = config_key - _ = step.pop(cls.OBJ_KEY) + # Use config key as step name if no name is provided in config + step[cls.OBJ_NAME] = config_name or config_key + _ = step.pop(cls.OBJ_CONFIG_KEY) # Update step parameters step[cls.OBJ_PARAMS] = config[config_key] From 537a5254696312be855ccc2ef5d59d74239f0c1b Mon Sep 17 00:00:00 2001 From: VandvC <69578966+VandvC@users.noreply.github.com> Date: Tue, 7 Jan 2025 15:54:46 +0100 Subject: [PATCH 4/7] :bookmark: 3.1.0 -> 3.1.1 --- melusine/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/melusine/__init__.py b/melusine/__init__.py index e33be5c..49745e1 100644 --- a/melusine/__init__.py +++ b/melusine/__init__.py @@ -8,7 +8,7 @@ __all__ = ["config"] -VERSION = (3, 1, 0) +VERSION = (3, 1, 1) __version__ = ".".join(map(str, VERSION)) # ------------------------------- # From a26af959f2db07fbfc4edb2a23090aae0b08467f Mon Sep 17 00:00:00 2001 From: VandvC <69578966+VandvC@users.noreply.github.com> Date: Tue, 7 Jan 2025 15:55:40 +0100 Subject: [PATCH 5/7] :white_check_mark: test new feature --- tests/pipeline/test_pipeline_basic.py | 129 +++++++++++++++++++++++++- 1 file changed, 124 insertions(+), 5 deletions(-) diff --git a/tests/pipeline/test_pipeline_basic.py b/tests/pipeline/test_pipeline_basic.py index d2b2f25..0a7f669 100644 --- a/tests/pipeline/test_pipeline_basic.py +++ b/tests/pipeline/test_pipeline_basic.py @@ -1,6 +1,8 @@ """ Example script to fit a minimal preprocessing pipeline """ +from contextlib import nullcontext as does_not_raise +from unittest import mock import pandas as pd import pytest @@ -34,7 +36,7 @@ def test_pipeline_basic(dataframe_basic): @pytest.mark.usefixtures("use_test_config") def test_pipeline_from_config(dataframe_basic): """ - Train a pipeline using transformers defined in a pipeline config file. + Train a pipeline using scikit-learn transformers defined in a pipeline config file. """ # Input data df = dataframe_basic.copy() @@ -79,7 +81,7 @@ def test_pipeline_from_config(dataframe_basic): @pytest.mark.usefixtures("use_test_config") def test_pipeline_from_dict(dataframe_basic): """ - Train a pipeline using transformers defined in a pipeline config file. + Train a pipeline using scikit-learn transformers defined in a pipeline config file. """ # Input data df = dataframe_basic.copy() @@ -125,7 +127,7 @@ def test_pipeline_from_dict(dataframe_basic): @pytest.mark.usefixtures("use_test_config") def test_missing_config_key(): """ - Train a pipeline using transformers defined in a pipeline config file. + Train a pipeline using scikit-learn transformers defined in a pipeline config file. """ # Set config keys normalizer_name = "normalizer" @@ -159,7 +161,7 @@ def test_missing_config_key(): @pytest.mark.usefixtures("use_test_config") def test_invalid_config_key(): """ - Train a pipeline using transformers defined in a pipeline config file. + Train a pipeline using scikit-learn transformers defined in a pipeline config file. """ incorrect_config_key = "INCORRECT_CONFIG_KEY" @@ -263,7 +265,7 @@ def test_invalid_config_key(): ) def test_pipeline_config_error(pipeline_conf): """ - Train a pipeline using transformers defined in a pipeline config file. + Train a pipeline using scikit-learn transformers defined in a pipeline config file. """ # Create pipeline from a json config file (using config key "my_pipeline") with pytest.raises(PipelineConfigurationError): @@ -313,3 +315,120 @@ def test_missing_input_field(dataframe_basic): # Fit the pipeline and transform the data with pytest.raises(ValueError, match=rf"(?s){tokenizer_name}.*{missing_field_name}"): pipe.transform(df) + + +@pytest.mark.usefixtures("use_test_config") +def test_pipeline_from_config_with_error(): + """ + Instanciate a Melusine Pipeline not using scikit-learn transformers defined in a pipeline config file (raising errors). + """ + # Set config keys + normalizer_key = "test_normalizer" + pipeline_key = "test_pipeline" + + # Pipeline configuration + conf_pipeline_basic = { + "steps": [ + { + "class_name": "Normalizer", + "module": "melusine.processors", + "config_key": normalizer_key, + }, + ] + } + + test_conf_dict = config.dict() + test_conf_dict[pipeline_key] = conf_pipeline_basic + config.reset(config_dict=test_conf_dict) + + with mock.patch("melusine.pipeline.MelusinePipeline.import_class") as mock_import_class, pytest.raises( + AttributeError, match=r"Object '.+' does not implement 'from_config'.+to inherit it from the Melusine IoMixin.+" + ): + + class WrongNormalizerWithoutMethodFromConfig: + def __init__(self, form, input_columns, lowercase, output_columns) -> None: + """Mock class without from_config method so we raise error.""" + return None + + mock_import_class.return_value = WrongNormalizerWithoutMethodFromConfig + + # Create pipeline from a json config file + _ = MelusinePipeline.from_config(config_key=pipeline_key, verbose=True) + + with ( + mock.patch("melusine.pipeline.MelusinePipeline.import_class") as mock_import_class, + pytest.raises( + AttributeError, + ) as e, + ): + + class WrongNormalizerWithMethodFromConfig: + def __init__(self) -> None: + return None + + @classmethod + def from_config(cls, config_dict: dict): + """Mock method from_config so we have a valid 'duck typed' class but with attribute error.""" + + return cls.class_constant_not_defined_raising_attribute_error + + mock_import_class.return_value = WrongNormalizerWithMethodFromConfig + + # Create pipeline from a json config file + _ = MelusinePipeline.from_config(config_key=pipeline_key, verbose=True) + + assert "does not implement 'from_config'" not in str(e) + + +@pytest.mark.usefixtures("use_test_config") +def test_pipeline_from_config_with_warning(recwarn): + """ + Instanciate a Melusine Pipeline not using scikit-learn transformers defined in a pipeline config file (raising warnings). + """ + # Set config keys + normalizer_key = "test_normalizer" + pipeline_key = "test_pipeline" + + # Pipeline configuration + conf_pipeline_basic = { + "steps": [ + { + "class_name": "Normalizer", + "module": "melusine.processors", + "config_key": normalizer_key, + }, + ] + } + + test_conf_dict = config.dict() + test_conf_dict[pipeline_key] = conf_pipeline_basic + config.reset(config_dict=test_conf_dict) + + with mock.patch("melusine.pipeline.MelusinePipeline.import_class") as mock_import_class: + + class WrongNormalizerWithMethodFromConfig: + def __init__(self) -> None: + return None + + @classmethod + def from_config(cls, config_dict: dict): + """Mock method from_config so we have a valid 'duck typed' class but with invalid functional behavior.""" + return cls + + mock_import_class.return_value = WrongNormalizerWithMethodFromConfig + + with does_not_raise(): + # Create pipeline from a json config file and suppress warning + _ = MelusinePipeline.from_config(config_key=pipeline_key, verbose=True, suppress_warnings=True) + + assert len(recwarn) == 0 + + _ = MelusinePipeline.from_config(config_key=pipeline_key, verbose=True, suppress_warnings=False) + + assert len(recwarn) == 1 + + with pytest.warns( + DeprecationWarning, match=r".+It seems you are not using a melusine object in your melusine pipeline.+" + ): + # Create pipeline from a json config file + _ = MelusinePipeline.from_config(config_key=pipeline_key, verbose=True, suppress_warnings=False) From 3d06dd5633ab1c445e22621751eb9167651ec306 Mon Sep 17 00:00:00 2001 From: VandvC <69578966+VandvC@users.noreply.github.com> Date: Tue, 7 Jan 2025 17:09:01 +0100 Subject: [PATCH 6/7] :memo: remove dead link --- CONTRIBUTING.md | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c56d97d..f28e174 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -134,9 +134,7 @@ Your branch is now available on your remote forked repository, with your changes A pull request allows you to ask the Melusine team to review your changes, and merge your changes into the master branch of the official repository. -To create one, on the top of your forked repository, you will find a button "Compare & pull request" - -pull request +To create one, on the top of your forked repository, you will find a button "Compare & pull request". As you can see, you can select on the right side which branch of your forked repository you want to associate to the pull request. @@ -147,12 +145,7 @@ On the left side, you will find the official Melusine repository. Due to increas - Head repository: your-github-username/melusine - Head branch: your-contribution-branch -clone your forked repository - Once you have selected the right branch, let's create the pull request with the green button "Create pull request". - -clone your forked repository - In the description, a template is initialized with all information you have to give about what you are doing on what your PR is doing. Please follow this to write your PR content. From cc7fb7d94990a7188117e8230aa1e0ee3d345d58 Mon Sep 17 00:00:00 2001 From: VandvC <69578966+VandvC@users.noreply.github.com> Date: Wed, 8 Jan 2025 12:01:06 +0100 Subject: [PATCH 7/7] :white_check_mark: correct test py3.8 --- tests/pipeline/test_pipeline_basic.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/pipeline/test_pipeline_basic.py b/tests/pipeline/test_pipeline_basic.py index 0a7f669..73db444 100644 --- a/tests/pipeline/test_pipeline_basic.py +++ b/tests/pipeline/test_pipeline_basic.py @@ -355,12 +355,9 @@ def __init__(self, form, input_columns, lowercase, output_columns) -> None: # Create pipeline from a json config file _ = MelusinePipeline.from_config(config_key=pipeline_key, verbose=True) - with ( - mock.patch("melusine.pipeline.MelusinePipeline.import_class") as mock_import_class, - pytest.raises( - AttributeError, - ) as e, - ): + with mock.patch("melusine.pipeline.MelusinePipeline.import_class") as mock_import_class, pytest.raises( + AttributeError + ) as e: class WrongNormalizerWithMethodFromConfig: def __init__(self) -> None: