From 22d06eb5cb6b47917b6519439a3bd7637a2b65a7 Mon Sep 17 00:00:00 2001 From: James Duncan Date: Fri, 9 Feb 2024 15:24:58 -0800 Subject: [PATCH] Move to hatchling build system (#55) * Bumps minimum Python version to 3.8 and minimum Pandas version to 2.0.0. * Incorporates linting via ruff, black, and isort. * Expands python-package GitHub workflow to test more OSes and Python version. * Fixes failing tests and adds new tests. --- .github/workflows/python-package.yml | 101 +- .gitignore | 53 + Makefile | 22 + README.md | 12 +- docs/troubleshooting.md | 7 - hatch.toml | 62 + pyproject.toml | 56 +- requirements.txt | 9 + setup.py | 53 - tests/test_basic.py | 65 +- tests/test_helpers.py | 745 +++++-- tests/test_pipelines.py | 393 ++-- tests/test_utils.py | 3093 +++++++++++++++++--------- vflow/__init__.py | 62 +- vflow/helpers.py | 152 +- vflow/pipeline.py | 25 +- vflow/subkey.py | 26 +- vflow/utils.py | 140 +- vflow/vfunc.py | 62 +- vflow/vset.py | 117 +- 20 files changed, 3557 insertions(+), 1698 deletions(-) create mode 100644 Makefile delete mode 100644 docs/troubleshooting.md create mode 100644 hatch.toml create mode 100644 requirements.txt delete mode 100644 setup.py diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 71713ba..d62d4ae 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -1,35 +1,96 @@ -# This workflow will install Python dependencies, run tests -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - + # based on https://github.com/pypa/hatch/blob/master/.github/workflows/test.yml name: tests -on: [ push ] +on: + push: + branches: + - master + pull_request: + branches: + - master + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + STABLE_PYTHON_VERSION: '3.11' + PYTHONUNBUFFERED: "1" + FORCE_COLOR: "1" jobs: build: - - runs-on: ubuntu-latest + name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }} + runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: - python-version: [3.7.15, 3.10.8] + python-version: ["3.9", "3.10", "3.11"] + os: [ubuntu-latest, windows-latest, macos-latest] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install .[dev] --no-cache-dir + + - name: Ensure latest pip + run: python -m pip install --upgrade pip + + - name: Install Hatch + run: pip install hatch + + - name: Install vflow + run: python -m pip install -e . + + - name: Check styles + run: hatch run style:check + - name: Test with pytest - run: | - pytest --cov=./ --cov-report=xml - - name: Lint with pylint - run: | - pylint vflow *.py --rcfile=.pylintrc - - name: "Upload coverage to Codecov" - uses: codecov/codecov-action@v2 + run: hatch run full + + - name: Disambiguate coverage filename + run: mv .coverage ".coverage.${{ matrix.os }}.${{ matrix.python-version }}" + + - name: Upload coverage data + uses: actions/upload-artifact@v3 + with: + name: coverage-data + path: .coverage.* + + coverage: + name: Report coverage + runs-on: ubuntu-latest + needs: + - build + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ env.STABLE_PYTHON_VERSION }} + uses: actions/setup-python@v4 + with: + python-version: ${{ env.STABLE_PYTHON_VERSION }} + + - name: Install Hatch + run: pip install hatch + + - name: Download coverage data + uses: actions/download-artifact@v3 + with: + name: coverage-data + + - name: Combine coverage data + run: hatch run coverage:combine + + - name: Export coverage reports + run: hatch run coverage:report-xml + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 with: fail_ci_if_error: true + token: ${{ secrets.CODECOV_TOKEN }} + verbose: true diff --git a/.gitignore b/.gitignore index e47fdb2..e554f41 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +# -*- mode: gitignore; -*- + **mlruns **.ipynb_checkpoints **cache* @@ -48,3 +50,54 @@ notebooks/data/* .hypothesis **coverage* codecov + +# -- Emacs +# https://github.com/github/gitignore/blob/main/Global/Emacs.gitignore + +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc +auto-save-list +tramp +.\#* + +# Org-mode +.org-id-locations +*_archive + +# flymake-mode +*_flymake.* + +# eshell files +/eshell/history +/eshell/lastdir + +# elpa packages +/elpa/ + +# reftex files +*.rel + +# AUCTeX auto folder +/auto/ + +# cask packages +.cask/ +dist/ + +# Flycheck +flycheck_*.el + +# server auth directory +/server/ + +# projectiles files +.projectile + +# directory configuration +.dir-locals.el + +# network security +/network-security.data diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4f5b0b2 --- /dev/null +++ b/Makefile @@ -0,0 +1,22 @@ +VERSION ?= $(shell git rev-parse --short HEAD) +CONDA_ENV_NAME ?= vflow +HATCH_ENV_NAME ?= test + +.PHONY: build_conda_env build_ipykernel test_% run_tests fix_styles + +build_conda_env: + conda create -n $(CONDA_ENV_NAME) -y python==3.10 pip + conda run -n $(CONDA_ENV_NAME) --no-capture-output pip install -r requirements.txt + conda run -n $(CONDA_ENV_NAME) --no-capture-output pip install . ipykernel + +build_ipykernel: + conda run -n $(CONDA_ENV_NAME) python -m ipykernel install --user --name $(CONDA_ENV_NAME) --display-name "Python [conda:$(CONDA_ENV_NAME)]" + +test_%: + hatch -v run dev $(PYTEST_ARGS) tests/test_$*.py + +run_tests: + hatch -v run cov + +fix_styles: + hatch -v run style:fmt diff --git a/README.md b/README.md index 78d011e..22b5df0 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,17 @@ See the [docs](https://yu-group.github.io/veridical-flow/) for reference on the ## Installation -Install with `pip install vflow` (see [here](https://github.com/Yu-Group/veridical-flow/blob/master/docs/troubleshooting.md) for help). For dev version (unstable), clone the repo and run `python setup.py develop` from the repo directory. +### Stable version + +```bash +pip install vflow +``` + +### Development version (unstable) + +```bash +pip install vflow@git+https://github.com/Yu-Group/veridical-flow +``` # References diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md deleted file mode 100644 index f553b2b..0000000 --- a/docs/troubleshooting.md +++ /dev/null @@ -1,7 +0,0 @@ -In case you run into issues with installation, here are some things that could help: - -If you don't have permissions to install on your machine, use the --user flag: - -`pip install git+https://github.com/Yu-group/pcs-pipline --user` - -To develop locally, run `python3 setup.py develop` \ No newline at end of file diff --git a/hatch.toml b/hatch.toml new file mode 100644 index 0000000..ddcf5bb --- /dev/null +++ b/hatch.toml @@ -0,0 +1,62 @@ +# based on https://github.com/pypa/hatch/blob/master/hatch.toml + +[envs.default] +dependencies = [ + "coverage[toml]", + "pytest-cov", + "pytest-randomly", + "pytest-rerunfailures", + "pytest-xdist", +] + +[envs.default.scripts] +# --cov must not come before an argument in order to use the sources defined by config +_cov = "pytest --cov --cov-report=term-missing --cov-config=pyproject.toml" +dev = "pytest -p no:randomly --no-cov {args:tests}" +cov = "_cov -p no:randomly {args:tests}" +full = "_cov -n auto --reruns 5 --reruns-delay 3 -r aR {args:tests}" + +[envs.dev] +template = "default" +dependencies = [ + "jupyterlab", + "torch>=1.0.0", + "torchvision", + "tqdm", + "scikit-learn>=0.23.0", +] + +[envs.dev.env-vars] +PIP_INDEX_URL = "https://download.pytorch.org/whl/cpu" +PIP_EXTRA_INDEX_URL = "https://pypi.org/simple/" + +[envs.style] +detached = true +dependencies = [ + "ruff", + "black", + "isort", +] + +[envs.style.scripts] +check = [ + "ruff vflow tests", + "black --check --diff vflow tests", + "isort --check --diff --profile black vflow tests", +] +fmt = [ + "isort --profile black ./vflow ./tests", + "black ./vflow ./tests", + "check", +] + +[envs.coverage] +detached = true +dependencies = [ + "coverage[toml]", + "lxml", +] + +[envs.coverage.scripts] +combine = "coverage combine {args}" +report-xml = "coverage xml -i" diff --git a/pyproject.toml b/pyproject.toml index fed528d..85a8ba3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,55 @@ [build-system] -requires = ["setuptools"] -build-backend = "setuptools.build_meta" +requires = ["hatchling", "hatch-requirements-txt"] +build-backend = "hatchling.build" + +[project] +name = "vflow" +version = "0.1.2" +authors = [ + { name="Chandan Singh", email="chandan_singh@berkeley.edu" }, + { name="James Duncan", email="jpduncan@berkeley.edu" }, + { name="Abhineet Agarwal", email="aa3797@berkeley.edu" }, + { name="Rush Kapoor", email="rush.kapoor@berkeley.edu " }, +] +maintainers = [ + { name="James Duncan", email="jpduncan@berkeley.edu" }, +] +description = "A framework for doing stability analysis with PCS." +readme = "README.md" +requires-python = ">=3.9" +classifiers = [ + "Intended Audience :: Science/Research", + "Development Status :: 3 - Alpha", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Operating System :: OS Independent", +] +license = {text = "MIT"} +dynamic = ["dependencies"] + +[project.urls] +Homepage = "https://vflow.csinva.io/" +Issues = "https://github.com/Yu-Group/veridical-flow/issues" + +[project.optional-dependencies] +gpu = ["torch"] + +[tool.hatch.metadata] +allow-direct-references = true + +[tool.hatch.metadata.hooks.requirements_txt] +files = ["requirements.txt"] + +[tool.hatch.build.targets.sdist] +exclude = [ + "/.github", + "/notebooks", +] + +[tool.hatch.build.targets.wheel] +packages = ["vflow"] + +[tool.coverage.run] +branch = true +source_pkgs = ["vflow", "tests"] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a2f6b90 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +numpy +scipy +matplotlib +networkx +pandas>=2.0.0 +joblib +pytest +ray +mlflow diff --git a/setup.py b/setup.py deleted file mode 100644 index eb60502..0000000 --- a/setup.py +++ /dev/null @@ -1,53 +0,0 @@ -from os import path - -import setuptools - -path_to_repo = path.abspath(path.dirname(__file__)) -with open(path.join(path_to_repo, 'README.md'), encoding='utf-8') as f: - long_description = f.read() - -setuptools.setup( - name="vflow", - version="0.1.2", - author="Yu Group", - author_email="chandan_singh@berkeley.edu", - description="A framework for doing stability analysis with PCS.", - long_description=long_description, - long_description_content_type="text/markdown", - url="https://github.com/Yu-Group/pcs-pipeline", - packages=setuptools.find_packages(), - install_requires=[ - 'numpy', - 'scipy', - 'matplotlib', - 'networkx', - 'pandas', - 'joblib', - 'pytest', - 'ray', - 'mlflow', - ], - extras_require={ - 'dev': [ - 'pytest', - 'pytest-cov', - 'pylint==2.12.2', - 'tqdm', - 'scikit-learn >=0.23.0', # 0.23+ only works on py3.6+) - ], - 'notebooks': [ - 'tqdm', - 'jupyter', - 'jupyterlab', - 'scikit-learn >=0.23.0', # 0.23+ only works on py3.6+) - 'torch >= 1.0.0', - 'torchvision', - ], - }, - python_requires='>=3.6', - classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - ], -) diff --git a/tests/test_basic.py b/tests/test_basic.py index 004240f..0eafbe5 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -1,13 +1,13 @@ import pytest import vflow -from vflow.utils import to_tuple, to_list +from vflow.utils import to_list, to_tuple class TestBasic: def setup_method(self): self.pipeline = vflow.PCSPipeline() - self.vfunc_set = vflow.Vset(name='s', vfuncs={}) + self.vfunc_set = vflow.Vset(name="s", vfuncs={}) self.vfunc = vflow.Vfunc() def test_class_initializations(self): @@ -16,29 +16,58 @@ def test_class_initializations(self): assert self.vfunc is not None def test_iteration(self): - """Tests that iterating over pipeline is same as iterating over its steps - """ + """Tests that iterating over pipeline is same as iterating over its steps""" self.pipeline.steps = [0, 1, 2] assert self.pipeline.steps[0] == 0 - assert self.pipeline[0] == 0, 'accessing pipeline steps' + assert self.pipeline[0] == 0, "accessing pipeline steps" for i, x in enumerate(self.pipeline): - assert x == i, 'iterating over pipeline steps' - assert self.pipeline[1:] == [1, 2], 'slicing pipeline' + assert x == i, "iterating over pipeline steps" + assert self.pipeline[1:] == [1, 2], "slicing pipeline" def test_list_packing(self): - """Test that packing / unpacking lists works appropriately - """ + """Test that packing / unpacking lists works appropriately""" start = [[0, 10], [1, 11], [2, 12]] X, y = to_tuple(start) packed = to_list((X, y)) - assert start == packed, 'unpacking/packing works' - + assert start == packed, "unpacking/packing works" + def test_to_list(self): - assert to_list((['x1', 'x2', 'x3'], ['y1', 'y2', 'y3'])) == [['x1', 'y1'], ['x2', 'y2'], ['x3', 'y3']] - assert to_list((['x1'], ['y1'])) == [['x1', 'y1']] - assert to_list((['x1', 'x2', 'x3'],)) == [['x1'], ['x2'], ['x3']] - assert to_list(('x1', )) == [['x1']] - assert to_list(('x1', 'y1')) == [['x1', 'y1']] - assert to_list(('x1', 'x2', 'x3', 'y1', 'y2', 'y3')) == [['x1', 'y1'], ['x2', 'y2'], ['x3', 'y3']] + assert to_list((["x1", "x2", "x3"], ["y1", "y2", "y3"])) == [ + ["x1", "y1"], + ["x2", "y2"], + ["x3", "y3"], + ] + assert to_list((["x1"], ["y1"])) == [["x1", "y1"]] + assert to_list((["x1", "x2", "x3"],)) == [["x1"], ["x2"], ["x3"]] + assert to_list(("x1",)) == [["x1"]] + assert to_list(("x1", "y1")) == [["x1", "y1"]] + assert to_list(("x1", "x2", "x3", "y1", "y2", "y3")) == [ + ["x1", "y1"], + ["x2", "y2"], + ["x3", "y3"], + ] with pytest.raises(ValueError): - to_list(('x1', 'x2', 'x3', 'y1', 'y2')) + to_list(("x1", "x2", "x3", "y1", "y2")) + + def test_build_graph(self): + v0 = vflow.Vset("v0", [lambda x: x + 1], ["add1"]) + v1 = vflow.Vset("v1", [lambda x: 2 * x], ["mult2"]) + v2 = vflow.Vset("v2", [lambda x: x % 3], ["mod3"]) + + x = vflow.init_args([1.5], ["x"])[0] + x0 = v0.fit_transform(x) + x1 = v1.fit_transform(x0) + x2 = v2.fit_transform(x1) + + graph = vflow.build_graph(x2) + assert graph.is_directed() + assert graph.size() == 4 # edges + assert graph.order() == 5 # nodes: init + 3 Vsets + End + in_degrees = dict(graph.in_degree).values() + assert max(in_degrees) == 1 + assert sum(in_degrees) == 4 + edges = list(graph.edges) + assert ("init", v0) in edges + assert (v0, v1) in edges + assert (v1, v2) in edges + assert (v2, "End") in edges diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 06b9e8e..2004f16 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -1,292 +1,591 @@ +import numpy as np from numpy.testing import assert_equal +from sklearn.datasets import make_classification +from sklearn.metrics import accuracy_score, balanced_accuracy_score +from sklearn.model_selection import train_test_split +from sklearn.tree import DecisionTreeClassifier +from sklearn.utils import resample -from vflow.helpers import * +from vflow.helpers import ( + build_vset, + cum_acc_by_uncertainty, + filter_vset_by_metric, + init_args, +) +from vflow.subkey import Subkey +from vflow.utils import dict_to_df +from vflow.vset import Vset -class TestHelpers: +class TestHelpers: def test_build_vset(self): - - def my_func(param1: str, param2: str, param3: str='a'): + def my_func(param1: str, param2: str, param3: str = "a"): return (param1, param2, param3) - def my_func2(param1: str, param2: str, param3: str='b'): - return (param1 + '1', param2 + '2', param3) + def my_func2(param1: str, param2: str, param3: str = "b"): + return (param1 + "1", param2 + "2", param3) - param_dict1 = { 'param1': ['hello', 'foo'], 'param2': ['world', 'bar'] } - param_dict2 = { 'param1': ['hello'], 'param2': ['world', 'there']} + param_dict1 = {"param1": ["hello", "foo"], "param2": ["world", "bar"]} + param_dict2 = {"param1": ["hello"], "param2": ["world", "there"]} # my_func without param_dict - vset = build_vset("vset", my_func, param1='hello', param2='world', param3='b') - assert len(vset) == 1, \ - 'build_vset with my_func fails' + vset = build_vset("vset", my_func, param1="hello", param2="world", param3="b") + assert len(vset) == 1, "build_vset with my_func fails" d_key = [key[0] for key in list(vset.vfuncs.keys())][0] - assert d_key.value == 'vset_0', \ - 'build_vset with my_func fails' + assert d_key.value == "vset_0", "build_vset with my_func fails" d_keyword = [val.vfunc.keywords for val in list(vset.vfuncs.values())][0] - assert d_keyword == {'param1': 'hello', 'param2': 'world', 'param3': 'b'}, \ - 'build_vset with my_func fails' + assert d_keyword == { + "param1": "hello", + "param2": "world", + "param3": "b", + }, "build_vset with my_func fails" + assert next(iter(vset.vfuncs.values())).transform() == ("hello", "world", "b") # my_func without param_dict, reps - vset = build_vset("vset", my_func, reps=2, param1='hello', param2='world', param3='b') - assert len(vset) == 2, \ - 'build_vset with my_func + reps fails' + vset = build_vset( + "vset", my_func, reps=2, param1="hello", param2="world", param3="b" + ) + assert len(vset) == 2, "build_vset with my_func + reps fails" d_keys = [key[0].value[0] for key in list(vset.vfuncs.keys())] - assert d_keys[0] == 'rep=0', \ - 'build_vset with my_func + reps fails' - assert d_keys[1] == 'rep=1', \ - 'build_vset with my_func + reps fails' + assert d_keys[0] == "rep=0", "build_vset with my_func + reps fails" + assert d_keys[1] == "rep=1", "build_vset with my_func + reps fails" d_keywords = [val.vfunc.keywords for val in list(vset.vfuncs.values())] - assert d_keywords[0] == {'param1': 'hello', 'param2': 'world', 'param3': 'b'}, \ - 'build_vset with my_func + reps fails' - assert d_keywords[1] == {'param1': 'hello', 'param2': 'world', 'param3': 'b'}, \ - 'build_vset with my_func + reps fails' + assert d_keywords[0] == { + "param1": "hello", + "param2": "world", + "param3": "b", + }, "build_vset with my_func + reps fails" + assert d_keywords[1] == { + "param1": "hello", + "param2": "world", + "param3": "b", + }, "build_vset with my_func + reps fails" # my_func with param_dict1 - vset = build_vset("vset", my_func, param_dict1, param3='b') - assert len(vset) == 4, \ - 'build_vset with my_func + param_dict1 fails' + vset = build_vset("vset", my_func, param_dict1, param3="b") + assert len(vset) == 4, "build_vset with my_func + param_dict1 fails" d_keys = [key[0] for key in list(vset.vfuncs.keys())] - assert d_keys[0].value == ('func=my_func', 'param1=hello', 'param2=world'), \ - 'build_vset with my_func + param_dict1 fails' - assert d_keys[1].value == ('func=my_func', 'param1=hello', 'param2=bar'), \ - 'build_vset with my_func + param_dict1 fails' - assert d_keys[2].value == ('func=my_func', 'param1=foo', 'param2=world'), \ - 'build_vset with my_func + param_dict1 fails' - assert d_keys[3].value == ('func=my_func', 'param1=foo', 'param2=bar'), \ - 'build_vset with my_func + param_dict1 fails' + assert d_keys[0].value == ( + "func=my_func", + "param1=hello", + "param2=world", + ), "build_vset with my_func + param_dict1 fails" + assert d_keys[1].value == ( + "func=my_func", + "param1=hello", + "param2=bar", + ), "build_vset with my_func + param_dict1 fails" + assert d_keys[2].value == ( + "func=my_func", + "param1=foo", + "param2=world", + ), "build_vset with my_func + param_dict1 fails" + assert d_keys[3].value == ( + "func=my_func", + "param1=foo", + "param2=bar", + ), "build_vset with my_func + param_dict1 fails" d_keywords = [val.vfunc.keywords for val in list(vset.vfuncs.values())] - assert d_keywords[0] == {'param1': 'hello', 'param2': 'world', 'param3': 'b'}, \ - 'build_vset with my_func + param_dict1 fails' - assert d_keywords[1] == {'param1': 'hello', 'param2': 'bar', 'param3': 'b'}, \ - 'build_vset with my_func + param_dict1 fails' - assert d_keywords[2] == {'param1': 'foo', 'param2': 'world', 'param3': 'b'}, \ - 'build_vset with my_func + param_dict1 fails' - assert d_keywords[3] == {'param1': 'foo', 'param2': 'bar', 'param3': 'b'}, \ - 'build_vset with my_func + param_dict1 fails' + assert d_keywords[0] == { + "param1": "hello", + "param2": "world", + "param3": "b", + }, "build_vset with my_func + param_dict1 fails" + assert d_keywords[1] == { + "param1": "hello", + "param2": "bar", + "param3": "b", + }, "build_vset with my_func + param_dict1 fails" + assert d_keywords[2] == { + "param1": "foo", + "param2": "world", + "param3": "b", + }, "build_vset with my_func + param_dict1 fails" + assert d_keywords[3] == { + "param1": "foo", + "param2": "bar", + "param3": "b", + }, "build_vset with my_func + param_dict1 fails" # my_func with param_dict2, reps - vset = build_vset("vset", my_func, param_dict2, reps=2, lazy=True, param3='b') - assert vset._lazy, \ - 'build_vset with my_func + param_dict2 + reps fails' - assert len(vset) == 4, \ - 'build_vset with my_func + param_dict2 + reps fails' + vset = build_vset("vset", my_func, param_dict2, reps=2, lazy=True, param3="b") + assert vset._lazy, "build_vset with my_func + param_dict2 + reps fails" + assert len(vset) == 4, "build_vset with my_func + param_dict2 + reps fails" d_keys = [key[0] for key in list(vset.vfuncs.keys())] - assert d_keys[0].value == ('rep=0', 'func=my_func', 'param1=hello', 'param2=world'), \ - 'build_vset with my_func + param_dict2 + reps fails' - assert d_keys[1].value == ('rep=1', 'func=my_func', 'param1=hello', 'param2=world'), \ - 'build_vset with my_func + param_dict2 + reps fails' - assert d_keys[2].value == ('rep=0', 'func=my_func', 'param1=hello', 'param2=there'), \ - 'build_vset with my_func + param_dict2 + reps fails' - assert d_keys[3].value == ('rep=1', 'func=my_func', 'param1=hello', 'param2=there'), \ - 'build_vset with my_func + param_dict2 + reps fails' + assert d_keys[0].value == ( + "rep=0", + "func=my_func", + "param1=hello", + "param2=world", + ), "build_vset with my_func + param_dict2 + reps fails" + assert d_keys[1].value == ( + "rep=1", + "func=my_func", + "param1=hello", + "param2=world", + ), "build_vset with my_func + param_dict2 + reps fails" + assert d_keys[2].value == ( + "rep=0", + "func=my_func", + "param1=hello", + "param2=there", + ), "build_vset with my_func + param_dict2 + reps fails" + assert d_keys[3].value == ( + "rep=1", + "func=my_func", + "param1=hello", + "param2=there", + ), "build_vset with my_func + param_dict2 + reps fails" d_keywords = [val.vfunc.keywords for val in list(vset.vfuncs.values())] - assert d_keywords[0] == {'param1': 'hello', 'param2': 'world', 'param3': 'b'}, \ - 'build_vset with my_func + param_dict2 fails' - assert d_keywords[1] == {'param1': 'hello', 'param2': 'world', 'param3': 'b'}, \ - 'build_vset with my_func + param_dict2 fails' - assert d_keywords[2] == {'param1': 'hello', 'param2': 'there', 'param3': 'b'}, \ - 'build_vset with my_func + param_dict2 fails' - assert d_keywords[3] == {'param1': 'hello', 'param2': 'there', 'param3': 'b'}, \ - 'build_vset with my_func + param_dict2 fails' + assert d_keywords[0] == { + "param1": "hello", + "param2": "world", + "param3": "b", + }, "build_vset with my_func + param_dict2 fails" + assert d_keywords[1] == { + "param1": "hello", + "param2": "world", + "param3": "b", + }, "build_vset with my_func + param_dict2 fails" + assert d_keywords[2] == { + "param1": "hello", + "param2": "there", + "param3": "b", + }, "build_vset with my_func + param_dict2 fails" + assert d_keywords[3] == { + "param1": "hello", + "param2": "there", + "param3": "b", + }, "build_vset with my_func + param_dict2 fails" # 1 func with list of param_dicts - vset = build_vset("vset", my_func, [param_dict1, param_dict2], param3='b') - assert len(vset) == 5, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' + vset = build_vset("vset", my_func, [param_dict1, param_dict2], param3="b") + assert ( + len(vset) == 5 + ), "build_vset with my_func + [param_dict1, param_dict2] fails" d_keys = [key[0].value for key in list(vset.vfuncs.keys())] - assert ('func=my_func', 'param1=hello', 'param2=world') in d_keys, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert ('func=my_func', 'param1=hello', 'param2=bar') in d_keys, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert ('func=my_func', 'param1=foo', 'param2=world') in d_keys, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert ('func=my_func', 'param1=foo', 'param2=bar') in d_keys, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert ('func=my_func', 'param1=hello', 'param2=there') in d_keys, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' + assert ( + "func=my_func", + "param1=hello", + "param2=world", + ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert ( + "func=my_func", + "param1=hello", + "param2=bar", + ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert ( + "func=my_func", + "param1=foo", + "param2=world", + ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert ( + "func=my_func", + "param1=foo", + "param2=bar", + ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert ( + "func=my_func", + "param1=hello", + "param2=there", + ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails" d_keywords = [val.vfunc.keywords for val in list(vset.vfuncs.values())] - assert {'param1': 'hello', 'param2': 'world', 'param3': 'b'} in d_keywords, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert {'param1': 'hello', 'param2': 'bar', 'param3': 'b'} in d_keywords, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert {'param1': 'foo', 'param2': 'world', 'param3': 'b'} in d_keywords, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert {'param1': 'foo', 'param2': 'bar', 'param3': 'b'} in d_keywords, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert {'param1': 'hello', 'param2': 'there', 'param3': 'b'} in d_keywords, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' + assert { + "param1": "hello", + "param2": "world", + "param3": "b", + } in d_keywords, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert { + "param1": "hello", + "param2": "bar", + "param3": "b", + } in d_keywords, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert { + "param1": "foo", + "param2": "world", + "param3": "b", + } in d_keywords, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert { + "param1": "foo", + "param2": "bar", + "param3": "b", + } in d_keywords, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert { + "param1": "hello", + "param2": "there", + "param3": "b", + } in d_keywords, "build_vset with my_func + [param_dict1, param_dict2] fails" # list of funcs with 1 param_dict - vset = build_vset("vset", [my_func, my_func2], param_dict1, param3='b') - assert len(vset) == 8, \ - 'build_vset with [my_func, my_func2] + param_dict1 fails' + vset = build_vset("vset", [my_func, my_func2], param_dict1, param3="b") + assert len(vset) == 8, "build_vset with [my_func, my_func2] + param_dict1 fails" d_keys = [key[0].value for key in list(vset.vfuncs.keys())] - assert ('func=my_func', 'param1=hello', 'param2=world') in d_keys, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert ('func=my_func', 'param1=hello', 'param2=bar') in d_keys, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert ('func=my_func', 'param1=foo', 'param2=world') in d_keys, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert ('func=my_func', 'param1=foo', 'param2=bar') in d_keys, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert ('func=my_func2', 'param1=hello', 'param2=world') in d_keys, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert ('func=my_func2', 'param1=hello', 'param2=bar') in d_keys, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert ('func=my_func2', 'param1=foo', 'param2=world') in d_keys, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert ('func=my_func2', 'param1=foo', 'param2=bar') in d_keys, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' + assert ( + "func=my_func", + "param1=hello", + "param2=world", + ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert ( + "func=my_func", + "param1=hello", + "param2=bar", + ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert ( + "func=my_func", + "param1=foo", + "param2=world", + ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert ( + "func=my_func", + "param1=foo", + "param2=bar", + ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert ( + "func=my_func2", + "param1=hello", + "param2=world", + ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert ( + "func=my_func2", + "param1=hello", + "param2=bar", + ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert ( + "func=my_func2", + "param1=foo", + "param2=world", + ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert ( + "func=my_func2", + "param1=foo", + "param2=bar", + ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails" d_keywords = [val.vfunc.keywords for val in list(vset.vfuncs.values())] - assert d_keywords.count({'param1': 'hello', 'param2': 'world', 'param3': 'b'}) == 2, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert d_keywords.count({'param1': 'hello', 'param2': 'bar', 'param3': 'b'}) == 2, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert d_keywords.count({'param1': 'foo', 'param2': 'world', 'param3': 'b'}) == 2, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert d_keywords.count({'param1': 'foo', 'param2': 'bar', 'param3': 'b'}) == 2, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' + assert ( + d_keywords.count({"param1": "hello", "param2": "world", "param3": "b"}) == 2 + ), "build_vset with my_func + [param_dict1, param_dict2] fails" + assert ( + d_keywords.count({"param1": "hello", "param2": "bar", "param3": "b"}) == 2 + ), "build_vset with my_func + [param_dict1, param_dict2] fails" + assert ( + d_keywords.count({"param1": "foo", "param2": "world", "param3": "b"}) == 2 + ), "build_vset with my_func + [param_dict1, param_dict2] fails" + assert ( + d_keywords.count({"param1": "foo", "param2": "bar", "param3": "b"}) == 2 + ), "build_vset with my_func + [param_dict1, param_dict2] fails" + + for key, vfunc in vset.vfuncs.items(): + subkey = key[0] + assert isinstance(subkey, Subkey) + assert len(subkey.value) == 3 + assert all([isinstance(x, str) for x in subkey.value]) + func_name = subkey.value[0][5:] + assert func_name in ["my_func", "my_func2"] + assert subkey.value[1][:6] == "param1" + assert subkey.value[2][:6] == "param2" + param1 = subkey.value[1][7:] + param2 = subkey.value[2][7:] + if func_name == "my_func": + expected_vfunc_output = (param1, param2, "b") + else: + expected_vfunc_output = (param1 + "1", param2 + "2", "b") + assert vfunc.transform() == expected_vfunc_output - # list of funcs with list of param_dicts + # list of funcs with list of param_dicts vset = build_vset("vset", [my_func, my_func2], [param_dict1, param_dict2]) - assert len(vset) == 6, \ - 'build_vset with [my_func, my_func2] + [param_dict1, param_dict2] fails' + assert ( + len(vset) == 6 + ), "build_vset with [my_func, my_func2] + [param_dict1, param_dict2] fails" d_keys = [key[0].value for key in list(vset.vfuncs.keys())] - assert ('func=my_func', 'param1=hello', 'param2=world') in d_keys, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert ('func=my_func', 'param1=hello', 'param2=bar') in d_keys, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert ('func=my_func', 'param1=foo', 'param2=world') in d_keys, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert ('func=my_func', 'param1=foo', 'param2=bar') in d_keys, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert ('func=my_func2', 'param1=hello', 'param2=world') in d_keys, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' - assert ('func=my_func2', 'param1=hello', 'param2=there') in d_keys, \ - 'build_vset with my_func + [param_dict1, param_dict2] fails' + assert ( + "func=my_func", + "param1=hello", + "param2=world", + ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert ( + "func=my_func", + "param1=hello", + "param2=bar", + ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert ( + "func=my_func", + "param1=foo", + "param2=world", + ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert ( + "func=my_func", + "param1=foo", + "param2=bar", + ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert ( + "func=my_func2", + "param1=hello", + "param2=world", + ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails" + assert ( + "func=my_func2", + "param1=hello", + "param2=there", + ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails" class my_class: - def __init__(self, param1, param2, param3: str='a'): + def __init__(self, param1, param2, param3: str = "a"): self.param1 = param1 self.param2 = param2 self.param3 = param3 - def fit(self, arg1: str): + def fit(self, arg1: str = "default"): self.arg1 = arg1 + return self # my_class without param_dict - vset = build_vset("vset", my_class, param1='hello', param2='world', param3='b') - assert len(vset) == 1, \ - 'build_vset with my_class fails' + vset = build_vset("vset", my_class, param1="hello", param2="world", param3="b") + assert len(vset) == 1, "build_vset with my_class fails" d_key = [key[0] for key in list(vset.vfuncs.keys())][0] - assert d_key.value == 'vset_0', \ - 'build_vset with my_class fails' + assert d_key.value == "vset_0", "build_vset with my_class fails" d_val = [val.vfunc for val in list(vset.vfuncs.values())][0] - assert isinstance(d_val, my_class), \ - 'build_vset with my_class fails' - assert (d_val.param1, d_val.param2, d_val.param3) == ('hello', 'world', 'b'), \ - 'build_vset with my_class fails' + assert isinstance(d_val, my_class), "build_vset with my_class fails" + assert (d_val.param1, d_val.param2, d_val.param3) == ( + "hello", + "world", + "b", + ), "build_vset with my_class fails" # my_class without param_dict, reps - vset = build_vset("vset", my_class, reps=2, param1='hello', param2='world', param3='b') - assert len(vset) == 2, \ - 'build_vset with my_class + reps fails' + vset = build_vset( + "vset", my_class, reps=2, param1="hello", param2="world", param3="b" + ) + vset.fit() + objs = list(vset.fitted_vfuncs.values()) + assert len(objs) == 3 + assert all([isinstance(x, my_class) for x in objs[:-1]]) + assert isinstance(objs[-1], tuple) + assert isinstance(objs[-1][0], Vset) + + assert len(vset) == 2, "build_vset with my_class + reps fails" d_keys = [key[0].value[0] for key in list(vset.vfuncs.keys())] - assert d_keys[0] == 'rep=0', \ - 'build_vset with my_class + reps fails' - assert d_keys[1] == 'rep=1', \ - 'build_vset with my_class + reps fails' + assert d_keys[0] == "rep=0", "build_vset with my_class + reps fails" + assert d_keys[1] == "rep=1", "build_vset with my_class + reps fails" d_vals = [val.vfunc for val in list(vset.vfuncs.values())] - assert isinstance(d_vals[0], my_class), \ - 'build_vset with my_class + reps fails' - assert isinstance(d_vals[1], my_class), \ - 'build_vset with my_class + reps fails' - assert (d_vals[0].param1, d_vals[0].param2, d_vals[0].param3) == ('hello', 'world', 'b'), \ - 'build_vset with my_class + reps fails' - assert (d_vals[1].param1, d_vals[1].param2, d_vals[1].param3) == ('hello', 'world', 'b'), \ - 'build_vset with my_class + reps fails' + assert isinstance(d_vals[0], my_class), "build_vset with my_class + reps fails" + assert isinstance(d_vals[1], my_class), "build_vset with my_class + reps fails" + assert (d_vals[0].param1, d_vals[0].param2, d_vals[0].param3) == ( + "hello", + "world", + "b", + ), "build_vset with my_class + reps fails" + assert (d_vals[1].param1, d_vals[1].param2, d_vals[1].param3) == ( + "hello", + "world", + "b", + ), "build_vset with my_class + reps fails" # my_class with param_dict1 - vset = build_vset("vset", my_class, param_dict1, param3='b') - assert len(vset) == 4, \ - 'build_vset with my_class + param_dict1 fails' + vset = build_vset("vset", my_class, param_dict1, param3="b") + assert len(vset) == 4, "build_vset with my_class + param_dict1 fails" d_keys = [key[0] for key in list(vset.vfuncs.keys())] - assert d_keys[0].value == ('func=my_class', 'param1=hello', 'param2=world'), \ - 'build_vset with my_class + param_dict1 fails' - assert d_keys[1].value == ('func=my_class', 'param1=hello', 'param2=bar'), \ - 'build_vset with my_class + param_dict1 fails' - assert d_keys[2].value == ('func=my_class', 'param1=foo', 'param2=world'), \ - 'build_vset with my_class + param_dict1 fails' - assert d_keys[3].value == ('func=my_class', 'param1=foo', 'param2=bar'), \ - 'build_vset with my_class + param_dict1 fails' + assert d_keys[0].value == ( + "func=my_class", + "param1=hello", + "param2=world", + ), "build_vset with my_class + param_dict1 fails" + assert d_keys[1].value == ( + "func=my_class", + "param1=hello", + "param2=bar", + ), "build_vset with my_class + param_dict1 fails" + assert d_keys[2].value == ( + "func=my_class", + "param1=foo", + "param2=world", + ), "build_vset with my_class + param_dict1 fails" + assert d_keys[3].value == ( + "func=my_class", + "param1=foo", + "param2=bar", + ), "build_vset with my_class + param_dict1 fails" d_vals = [val.vfunc for val in list(vset.vfuncs.values())] - assert isinstance(d_vals[0], my_class), \ - 'build_vset with my_class + param_dict1 fails' - assert isinstance(d_vals[1], my_class), \ - 'build_vset with my_class + param_dict1 fails' - assert isinstance(d_vals[1], my_class), \ - 'build_vset with my_class + param_dict1 fails' - assert isinstance(d_vals[1], my_class), \ - 'build_vset with my_class + param_dict1 fails' - assert (d_vals[0].param1, d_vals[0].param2, d_vals[0].param3) == ('hello', 'world', 'b'), \ - 'build_vset with my_class + param_dict1 fails' - assert (d_vals[1].param1, d_vals[1].param2, d_vals[1].param3) == ('hello', 'bar', 'b'), \ - 'build_vset with my_class + param_dict1 fails' - assert (d_vals[2].param1, d_vals[2].param2, d_vals[2].param3) == ('foo', 'world', 'b'), \ - 'build_vset with my_class + param_dict1 fails' - assert (d_vals[3].param1, d_vals[3].param2, d_vals[3].param3) == ('foo', 'bar', 'b'), \ - 'build_vset with my_class + param_dict1 fails' + assert isinstance( + d_vals[0], my_class + ), "build_vset with my_class + param_dict1 fails" + assert isinstance( + d_vals[1], my_class + ), "build_vset with my_class + param_dict1 fails" + assert isinstance( + d_vals[1], my_class + ), "build_vset with my_class + param_dict1 fails" + assert isinstance( + d_vals[1], my_class + ), "build_vset with my_class + param_dict1 fails" + assert (d_vals[0].param1, d_vals[0].param2, d_vals[0].param3) == ( + "hello", + "world", + "b", + ), "build_vset with my_class + param_dict1 fails" + assert (d_vals[1].param1, d_vals[1].param2, d_vals[1].param3) == ( + "hello", + "bar", + "b", + ), "build_vset with my_class + param_dict1 fails" + assert (d_vals[2].param1, d_vals[2].param2, d_vals[2].param3) == ( + "foo", + "world", + "b", + ), "build_vset with my_class + param_dict1 fails" + assert (d_vals[3].param1, d_vals[3].param2, d_vals[3].param3) == ( + "foo", + "bar", + "b", + ), "build_vset with my_class + param_dict1 fails" # my_class with param_dict2, reps - vset = build_vset("vset", my_class, param_dict2, reps=2, lazy=True, param3='b') - assert vset._lazy, \ - 'build_vset with my_class + param_dict2 + reps fails' - assert len(vset) == 4, \ - 'build_vset with my_class + param_dict2 + reps fails' + vset = build_vset("vset", my_class, param_dict2, reps=2, lazy=True, param3="b") + assert vset._lazy, "build_vset with my_class + param_dict2 + reps fails" + assert len(vset) == 4, "build_vset with my_class + param_dict2 + reps fails" d_keys = [key[0] for key in list(vset.vfuncs.keys())] - assert d_keys[0].value == ('rep=0', 'func=my_class', 'param1=hello', 'param2=world'), \ - 'build_vset with my_class + param_dict2 + reps fails' - assert d_keys[1].value == ('rep=1', 'func=my_class', 'param1=hello', 'param2=world'), \ - 'build_vset with my_class + param_dict2 + reps fails' - assert d_keys[2].value == ('rep=0', 'func=my_class', 'param1=hello', 'param2=there'), \ - 'build_vset with my_class + param_dict2 + reps fails' - assert d_keys[3].value == ('rep=1', 'func=my_class', 'param1=hello', 'param2=there'), \ - 'build_vset with my_class + param_dict2 + reps fails' + assert d_keys[0].value == ( + "rep=0", + "func=my_class", + "param1=hello", + "param2=world", + ), "build_vset with my_class + param_dict2 + reps fails" + assert d_keys[1].value == ( + "rep=1", + "func=my_class", + "param1=hello", + "param2=world", + ), "build_vset with my_class + param_dict2 + reps fails" + assert d_keys[2].value == ( + "rep=0", + "func=my_class", + "param1=hello", + "param2=there", + ), "build_vset with my_class + param_dict2 + reps fails" + assert d_keys[3].value == ( + "rep=1", + "func=my_class", + "param1=hello", + "param2=there", + ), "build_vset with my_class + param_dict2 + reps fails" d_vals = [val.vfunc for val in list(vset.vfuncs.values())] - assert isinstance(d_vals[0], my_class), \ - 'build_vset with my_class + param_dict2 + reps fails' - assert isinstance(d_vals[1], my_class), \ - 'build_vset with my_class + param_dict2 + reps fails' - assert isinstance(d_vals[1], my_class), \ - 'build_vset with my_class + param_dict2 + reps fails' - assert isinstance(d_vals[1], my_class), \ - 'build_vset with my_class + param_dict2 + reps fails' - assert (d_vals[0].param1, d_vals[0].param2, d_vals[0].param3) == ('hello', 'world', 'b'), \ - 'build_vset with my_class + param_dict2 + reps fails' - assert (d_vals[1].param1, d_vals[1].param2, d_vals[1].param3) == ('hello', 'world', 'b'), \ - 'build_vset with my_class + param_dict2 + reps fails' - assert (d_vals[2].param1, d_vals[2].param2, d_vals[2].param3) == ('hello', 'there', 'b'), \ - 'build_vset with my_class + param_dict2 + reps fails' - assert (d_vals[3].param1, d_vals[3].param2, d_vals[3].param3) == ('hello', 'there', 'b'), \ - 'build_vset with my_class + param_dict2 + reps fails' - + assert isinstance( + d_vals[0], my_class + ), "build_vset with my_class + param_dict2 + reps fails" + assert isinstance( + d_vals[1], my_class + ), "build_vset with my_class + param_dict2 + reps fails" + assert isinstance( + d_vals[1], my_class + ), "build_vset with my_class + param_dict2 + reps fails" + assert isinstance( + d_vals[1], my_class + ), "build_vset with my_class + param_dict2 + reps fails" + assert (d_vals[0].param1, d_vals[0].param2, d_vals[0].param3) == ( + "hello", + "world", + "b", + ), "build_vset with my_class + param_dict2 + reps fails" + assert (d_vals[1].param1, d_vals[1].param2, d_vals[1].param3) == ( + "hello", + "world", + "b", + ), "build_vset with my_class + param_dict2 + reps fails" + assert (d_vals[2].param1, d_vals[2].param2, d_vals[2].param3) == ( + "hello", + "there", + "b", + ), "build_vset with my_class + param_dict2 + reps fails" + assert (d_vals[3].param1, d_vals[3].param2, d_vals[3].param3) == ( + "hello", + "there", + "b", + ), "build_vset with my_class + param_dict2 + reps fails" def test_cum_acc_by_uncertainty(self): - mean_dict = {'group_0': np.array([[0.2, 0.8], [0.25, 0.75], [0.1, 0.9]]), - 'group_1': np.array([[0.4, 0.6], [0.5, 0.5], [0.45, 0.55]])} - std_dict = {'group_0': np.array([[0.003, 0.003], [0.146, 0.146], [0.0023, 0.0023]]), - 'group_1': np.array([[0.0054, 0.0054], [0.2344, 0.2344], [0.5166, 0.5166]])} + mean_dict = { + "group_0": np.array([[0.2, 0.8], [0.25, 0.75], [0.1, 0.9]]), + "group_1": np.array([[0.4, 0.6], [0.5, 0.5], [0.45, 0.55]]), + } + std_dict = { + "group_0": np.array([[0.003, 0.003], [0.146, 0.146], [0.0023, 0.0023]]), + "group_1": np.array([[0.0054, 0.0054], [0.2344, 0.2344], [0.5166, 0.5166]]), + } true_labels = [0, 1, 1] - true_labels_dict = {'y': [0, 1, 1]} + true_labels_dict = {"y": [0, 1, 1]} u0, c0, idx0 = cum_acc_by_uncertainty(mean_dict, std_dict, true_labels) u1, c1, idx1 = cum_acc_by_uncertainty(mean_dict, std_dict, true_labels_dict) assert_equal(u0, u1) assert_equal(c0, c1) assert_equal(idx0, idx1) assert u0.shape == c0.shape == (2, 3) - assert_equal(u0[0], sorted(x[1] for x in std_dict['group_0'])) - assert_equal(u0[1], sorted(x[1] for x in std_dict['group_1'])) - assert_equal(c0[0], [1, 1/2, 2/3]) - assert_equal(c0[1], [0, 0, 1/3]) + assert_equal(u0[0], sorted(x[1] for x in std_dict["group_0"])) + assert_equal(u0[1], sorted(x[1] for x in std_dict["group_1"])) + assert_equal(c0[0], [1, 1 / 2, 2 / 3]) + assert_equal(c0[1], [0, 0, 1 / 3]) assert_equal(idx0[0], [2, 0, 1]) assert_equal(idx0[1], [0, 1, 2]) + + def test_filter_vset_by_metric(self): + X, y = make_classification(n_samples=100, n_features=5) + X_train, X_test, y_train, y_test = train_test_split( + X, y, random_state=42 + ) # ex. with another split? + X_train, X_test, y_train, y_test = init_args( + (X_train, X_test, y_train, y_test), + names=["X_train", "X_test", "y_train", "y_test"], + ) # optionally provide names for each of these + + # subsample data + subsampling_set = build_vset( + "subsampling", + resample, + param_dict={"random_state": list(range(3))}, + n_samples=20, + ) + X_trains, y_trains = subsampling_set(X_train, y_train) + + # fit models + dt_set = build_vset( + name="DT", + func=DecisionTreeClassifier, + param_dict={"criterion": ["gini", "entropy", "log_loss"]}, + ) + dt_set.fit(X_trains, y_trains) + preds_test = dt_set.predict(X_test) + + # get metrics + hard_metrics_set = Vset( + name="hard_metrics", + vfuncs=[accuracy_score, balanced_accuracy_score], + vfunc_keys=["Acc", "Bal_Acc"], + ) + + hard_metrics = hard_metrics_set.evaluate(preds_test, y_test) + df = dict_to_df(hard_metrics) + + filtered_dt_set = filter_vset_by_metric( + metric_dict=hard_metrics, + vset=dt_set, + n_keep=1, + filter_on=["Bal_Acc"], + group=False, + ) + + df_bal_acc = df[df["hard_metrics"] == "Bal_Acc"] + top_DT = df_bal_acc[df_bal_acc["out"] == df_bal_acc["out"].max()]["DT"].iloc[0] + subkey = next(iter(filtered_dt_set.vfuncs.keys()))[0].value + assert top_DT == subkey + + filtered_dt_set = filter_vset_by_metric( + metric_dict=hard_metrics, + vset=dt_set, + n_keep=1, + filter_on=["Acc"], + group=True, + ) + + df_acc = df[df["hard_metrics"] == "Acc"] + df_acc_mean = df_acc.groupby("DT").mean(numeric_only=True) + top_DT = df_acc_mean[df_acc_mean["out"] == df_acc_mean["out"].max()].index[0] + subkey = next(iter(filtered_dt_set.vfuncs.keys()))[0].value + assert top_DT == subkey diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index 3d3205c..2ddeb87 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -1,108 +1,137 @@ -import time import os +import sys +import time from functools import partial from shutil import rmtree import numpy as np import pandas as pd +import pytest import ray -import sklearn from numpy.testing import assert_equal -from sklearn.datasets import make_classification +from sklearn.datasets import fetch_california_housing, make_classification from sklearn.ensemble import RandomForestRegressor from sklearn.inspection import permutation_importance from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score, balanced_accuracy_score, r2_score from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor +from sklearn.utils import resample -from vflow import Vset, init_args, build_vset # must install vflow first (pip install vflow) +from vflow import Vset, build_vset, dict_to_df, init_args from vflow.subkey import Subkey as sm from vflow.vset import PREV_KEY class TestPipelines: - def setup_method(self): pass def test_subsampling_fitting_metrics_pipeline(self): - """Simplest synthetic pipeline - """ + """Simplest synthetic pipeline""" # initialize data np.random.seed(13) - X, y = sklearn.datasets.make_classification(n_samples=50, n_features=5) - X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) # ex. with another split? - X_train, X_test, y_train, y_test = init_args((X_train, X_test, y_train, y_test), - names=['X_train', 'X_test', 'y_train', - 'y_test']) # optionally provide names for each of these + X, y = make_classification(n_samples=50, n_features=5) + X_train, X_test, y_train, y_test = train_test_split( + X, y, random_state=42 + ) # ex. with another split? + X_train, X_test, y_train, y_test = init_args( + (X_train, X_test, y_train, y_test), + names=["X_train", "X_test", "y_train", "y_test"], + ) # optionally provide names for each of these # subsample data - subsampling_set = build_vset('subsampling', sklearn.utils.resample, - param_dict={'random_state': list(range(3))}, - n_samples=20) + subsampling_set = build_vset( + "subsampling", + resample, + param_dict={"random_state": list(range(3))}, + n_samples=20, + ) X_trains, y_trains = subsampling_set(X_train, y_train) # fit models - modeling_set = Vset(name='modeling', - vfuncs=[LogisticRegression(max_iter=1000, tol=0.1), - DecisionTreeClassifier()], - vfunc_keys=["LR", "DT"]) + modeling_set = Vset( + name="modeling", + vfuncs=[ + LogisticRegression(max_iter=1000, tol=0.1), + DecisionTreeClassifier(), + ], + vfunc_keys=["LR", "DT"], + ) modeling_set.fit(X_trains, y_trains) preds_test = modeling_set.predict(X_test) # get metrics - hard_metrics_set = Vset(name='hard_metrics', - vfuncs=[accuracy_score, balanced_accuracy_score], - vfunc_keys=["Acc", "Bal_Acc"]) + hard_metrics_set = Vset( + name="hard_metrics", + vfuncs=[accuracy_score, balanced_accuracy_score], + vfunc_keys=["Acc", "Bal_Acc"], + ) hard_metrics = hard_metrics_set.evaluate(preds_test, y_test) # asserts - k1 = (sm('X_test', 'init'), sm('X_train', 'init'), - sm(('func=resample', 'random_state=0'), 'subsampling'), - sm('y_train', 'init'), sm('LR', 'modeling'), - sm('y_test', 'init'), sm('Acc', 'hard_metrics')) - - assert k1 in hard_metrics, 'hard metrics should have ' + str(k1) + ' as key' - assert hard_metrics[k1] > 0.9 # 0.9090909090909091 + k1 = ( + sm("X_test", "init"), + sm("X_train", "init"), + sm(("func=resample", "random_state=0"), "subsampling"), + sm("y_train", "init"), + sm("LR", "modeling"), + sm("y_test", "init"), + sm("Acc", "hard_metrics"), + ) + + assert k1 in hard_metrics, "hard metrics should have " + str(k1) + " as key" + assert isinstance(hard_metrics[k1], float) assert PREV_KEY in hard_metrics assert len(hard_metrics.keys()) == 13 def test_feat_engineering(self): - """Feature engineering pipeline - """ + """Feature engineering pipeline""" # get data as df np.random.seed(13) - data = sklearn.datasets.fetch_california_housing() - df = pd.DataFrame.from_dict(data['data']) - df.columns = data['feature_names'] - y = data['target'] - X_train, X_test, y_train, y_test = init_args(train_test_split(df, y, random_state=123), - names=['X_train', 'X_test', 'y_train', 'y_test']) + data = fetch_california_housing() + df = pd.DataFrame.from_dict(data["data"]) + df.columns = data["feature_names"] + y = data["target"] + X_train, X_test, y_train, y_test = init_args( + train_test_split(df, y, random_state=123), + names=["X_train", "X_test", "y_train", "y_test"], + ) # feature extraction - extracts two different sets of features from the same data def extract_feats(df: pd.DataFrame, feat_names=None): - """extract specific columns from dataframe - """ + """extract specific columns from dataframe""" if feat_names is None: - feat_names = ['HouseAge', 'AveBedrms', 'Population'] + feat_names = ["HouseAge", "AveBedrms", "Population"] return df[feat_names] - feat_extraction_funcs = [partial(extract_feats, feat_names=['HouseAge', 'AveBedrms', 'Population']), - partial(extract_feats, feat_names=['HouseAge', 'AveBedrms', 'Population', 'MedInc', 'AveOccup']), - ] - feat_extraction = Vset(name='feat_extraction', - vfuncs=feat_extraction_funcs, - output_matching=True) + feat_extraction_funcs = [ + partial(extract_feats, feat_names=["HouseAge", "AveBedrms", "Population"]), + partial( + extract_feats, + feat_names=[ + "HouseAge", + "AveBedrms", + "Population", + "MedInc", + "AveOccup", + ], + ), + ] + feat_extraction = Vset( + name="feat_extraction", vfuncs=feat_extraction_funcs, output_matching=True + ) X_feats_train = feat_extraction(X_train) - modeling_set = Vset(name='modeling', - vfuncs=[DecisionTreeRegressor(), RandomForestRegressor()], - vfunc_keys=["DT", "RF"]) + modeling_set = Vset( + name="modeling", + vfuncs=[DecisionTreeRegressor(), RandomForestRegressor()], + vfunc_keys=["DT", "RF"], + ) # how can we properly pass a y here so that it will fit properly? # this runs, but modeling_set.fitted_vfuncs is empty @@ -112,77 +141,111 @@ def extract_feats(df: pd.DataFrame, feat_names=None): preds_all = modeling_set.predict(X_feats_train) # get metrics - hard_metrics_set = Vset(name='hard_metrics', - vfuncs=[r2_score], - vfunc_keys=["r2"]) + hard_metrics_set = Vset( + name="hard_metrics", vfuncs=[r2_score], vfunc_keys=["r2"] + ) hard_metrics = hard_metrics_set.evaluate(preds_all, y_train) # asserts - k1 = (sm('X_train', 'init'), sm('feat_extraction_0', 'feat_extraction', True), sm('X_train', 'init'), - sm('y_train', 'init'), - sm('DT', 'modeling'), sm('y_train', 'init'), sm('r2', 'hard_metrics')) - assert k1 in hard_metrics, 'hard metrics should have ' + str(k1) + ' as key' - assert hard_metrics[k1] > 0.99 # 0.9997246132375425 + k1 = ( + sm("X_train", "init"), + sm("feat_extraction_0", "feat_extraction", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("DT", "modeling"), + sm("y_train", "init"), + sm("r2", "hard_metrics"), + ) + assert k1 in hard_metrics, "hard metrics should have " + str(k1) + " as key" + assert hard_metrics[k1] > 0.99 # 0.9997246132375425 assert PREV_KEY in hard_metrics assert len(hard_metrics.keys()) == 5 def test_feature_importance(self): - """Simplest synthetic pipeline for feature importance - """ + """Simplest synthetic pipeline for feature importance""" # initialize data np.random.seed(13) - X, y = sklearn.datasets.make_classification(n_samples=50, n_features=5) - X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) # ex. with another split? - X_train, X_test, y_train, y_test = init_args((X_train, X_test, y_train, y_test), - names=['X_train', 'X_test', 'y_train', - 'y_test']) # optionally provide names for each of these + X, y = make_classification(n_samples=50, n_features=5) + X_train, X_test, y_train, y_test = train_test_split( + X, y, random_state=42 + ) # ex. with another split? + X_train, X_test, y_train, y_test = init_args( + (X_train, X_test, y_train, y_test), + names=["X_train", "X_test", "y_train", "y_test"], + ) # optionally provide names for each of these # subsample data - subsampling_set = build_vset('subsampling', sklearn.utils.resample, - param_dict={'random_state': list(range(3))}, - n_samples=20) + subsampling_set = build_vset( + "subsampling", + resample, + param_dict={"random_state": list(range(3))}, + n_samples=20, + ) X_trains, y_trains = subsampling_set(X_train, y_train) # fit models - modeling_set = Vset(name='modeling', - vfuncs=[LogisticRegression(max_iter=1000, tol=0.1), - DecisionTreeClassifier()], - vfunc_keys=["LR", "DT"]) + modeling_set = Vset( + name="modeling", + vfuncs=[ + LogisticRegression(max_iter=1000, tol=0.1), + DecisionTreeClassifier(), + ], + vfunc_keys=["LR", "DT"], + ) modeling_set.fit(X_trains, y_trains) - preds_test = modeling_set.predict(X_test) # get metrics - feature_importance_set = Vset(name='feature_importance', vfuncs=[permutation_importance], - vfunc_keys=["permutation_importance"]) - importances = feature_importance_set.evaluate(modeling_set.fitted_vfuncs, X_test, y_test) + feature_importance_set = Vset( + name="feature_importance", + vfuncs=[permutation_importance], + vfunc_keys=["permutation_importance"], + ) + importances = feature_importance_set.evaluate( + modeling_set.fitted_vfuncs, X_test, y_test + ) # asserts - k1 = (sm('X_train', 'init'), sm(('func=resample', 'random_state=0'), 'subsampling'), - sm('y_train', 'init'), sm('LR', 'modeling'), sm('X_test', 'init'), - sm('y_test', 'init'), sm('permutation_importance', 'feature_importance')) - assert k1 in importances, 'hard metrics should have ' + str(k1) + ' as key' + k1 = ( + sm("X_train", "init"), + sm(("func=resample", "random_state=0"), "subsampling"), + sm("y_train", "init"), + sm("LR", "modeling"), + sm("X_test", "init"), + sm("y_test", "init"), + sm("permutation_importance", "feature_importance"), + ) + assert k1 in importances, "hard metrics should have " + str(k1) + " as key" assert PREV_KEY in importances assert len(importances.keys()) == 7 def test_repeated_subsampling(self): np.random.seed(13) - X, y = sklearn.datasets.make_classification(n_samples=50, n_features=5) + X, y = make_classification(n_samples=50, n_features=5) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) - X_train, X_test, y_train, y_test = init_args((X_train, X_test, y_train, y_test), - names=['X_train', 'X_test', 'y_train', 'y_test']) + X_train, X_test, y_train, y_test = init_args( + (X_train, X_test, y_train, y_test), + names=["X_train", "X_test", "y_train", "y_test"], + ) # subsample data - subsampling_set = build_vset('subsampling', sklearn.utils.resample, - param_dict={'random_state': list(range(3))}, - n_samples=20) + subsampling_set = build_vset( + "subsampling", + resample, + param_dict={"random_state": list(range(3))}, + n_samples=20, + ) X_trains, y_trains = subsampling_set(X_train, y_train) X_tests, y_tests = subsampling_set(X_test, y_test) - modeling_set = Vset(name='modeling', - vfuncs=[LogisticRegression(max_iter=1000, tol=0.1), - DecisionTreeClassifier()], - vfunc_keys=["LR", "DT"]) + modeling_set = Vset( + name="modeling", + vfuncs=[ + LogisticRegression(max_iter=1000, tol=0.1), + DecisionTreeClassifier(), + ], + vfunc_keys=["LR", "DT"], + ) modeling_set.fit(X_trains, y_trains) preds_test = modeling_set.predict(X_tests) @@ -193,41 +256,41 @@ def test_repeated_subsampling(self): assert len(preds_test.keys()) == 19 def test_lazy_eval(self): - def f(arg_name: str = '', i: int = 0): - return arg_name, f'f_iter={i}' + def f(arg_name: str = "", i: int = 0): + return arg_name, f"f_iter={i}" f_vfuncs = [partial(f, i=i) for i in range(3)] - f_arg = init_args(('f_arg',), names=['f_init'])[0] + f_arg = init_args(("f_arg",), names=["f_init"])[0] - f_set = Vset('f', vfuncs=f_vfuncs) - f_lazy_set = Vset('f', vfuncs=f_vfuncs, lazy=True) + f_set = Vset("f", vfuncs=f_vfuncs) + f_lazy_set = Vset("f", vfuncs=f_vfuncs, lazy=True) f_res = f_set(f_arg) f_lazy_res = f_lazy_set(f_arg) assert_equal(f_res.keys(), f_lazy_res.keys()) - def g(tup, arg_name: str = '', i: int = 0): - return tup, arg_name, f'g_iter={i}' + def g(tup, arg_name: str = "", i: int = 0): + return tup, arg_name, f"g_iter={i}" g_vfuncs = [partial(g, i=i) for i in range(2)] - g_arg = init_args(('g_arg',), names=['g_init'])[0] + g_arg = init_args(("g_arg",), names=["g_init"])[0] - g_set = Vset('g', vfuncs=g_vfuncs) - g_lazy_set = Vset('g', vfuncs=g_vfuncs, lazy=True) + g_set = Vset("g", vfuncs=g_vfuncs) + g_lazy_set = Vset("g", vfuncs=g_vfuncs, lazy=True) g_res = g_set(f_res, g_arg, n_out=1) g_lazy_res = g_lazy_set(f_lazy_res, g_arg, n_out=1) assert_equal(g_res.keys(), g_lazy_res.keys()) - def h(tup, arg_name: str = '', i: int = 0): - return tup, arg_name, f'h_iter={i}' + def h(tup, arg_name: str = "", i: int = 0): + return tup, arg_name, f"h_iter={i}" h_vfuncs = [partial(h, i=i) for i in range(2)] - h_arg = init_args(('h_arg',), names=['h_init'])[0] + h_arg = init_args(("h_arg",), names=["h_init"])[0] - h_set = Vset('h', vfuncs=h_vfuncs) + h_set = Vset("h", vfuncs=h_vfuncs) h_res = h_set(g_res, h_arg, n_out=1) h_lazy_res = h_set(g_lazy_res, h_arg, n_out=1) @@ -248,12 +311,14 @@ def test_caching(self): try: np.random.seed(13) X, _ = make_classification(n_samples=50, n_features=5) - X = init_args([X], names=['X'])[0] + X = init_args([X], names=["X"])[0] subsampling_funcs = [partial(costly_compute, row_index=np.arange(25))] - uncached_set = Vset(name='subsampling', vfuncs=subsampling_funcs) - cached_set = Vset(name='subsampling', vfuncs=subsampling_funcs, cache_dir='./') + uncached_set = Vset(name="subsampling", vfuncs=subsampling_funcs) + cached_set = Vset( + name="subsampling", vfuncs=subsampling_funcs, cache_dir="./" + ) # this always takes about 1 seconds begin = time.time() @@ -265,47 +330,68 @@ def test_caching(self): cached_set.fit(X) assert time.time() - begin >= 1 - assert_equal(uncached_set.fitted_vfuncs.keys(), cached_set.fitted_vfuncs.keys()) + assert_equal( + uncached_set.fitted_vfuncs.keys(), cached_set.fitted_vfuncs.keys() + ) # this should be very fast because it's using the already cached results - cached_set2 = Vset(name='subsampling', vfuncs=subsampling_funcs, cache_dir='./') + cached_set2 = Vset( + name="subsampling", vfuncs=subsampling_funcs, cache_dir="./" + ) begin = time.time() cached_set2.fit(X) assert time.time() - begin < 1 - assert_equal(uncached_set.fitted_vfuncs.keys(), cached_set2.fitted_vfuncs.keys()) + assert_equal( + uncached_set.fitted_vfuncs.keys(), cached_set2.fitted_vfuncs.keys() + ) finally: # clean up - rmtree('./joblib') + rmtree("./joblib") + @pytest.mark.skipif(sys.platform == "win32", reason="Does not work on Windows.") def test_mlflow_tracking(self, tmp_path): try: - runs_path = os.path.join(tmp_path, 'mlruns') + runs_path = os.path.join(tmp_path, "mlruns") np.random.seed(13) X, y = make_classification(n_samples=50, n_features=5) - X_train, X_test, y_train, y_test = init_args(train_test_split(X, y, random_state=42), - names=['X_train', 'X_test', 'y_train', 'y_test']) + X_train, X_test, y_train, y_test = init_args( + train_test_split(X, y, random_state=42), + names=["X_train", "X_test", "y_train", "y_test"], + ) # fit models - modeling_set = Vset(name='modeling', - vfuncs=[LogisticRegression(C=1, max_iter=1000, tol=0.1)], - vfunc_keys=["LR"]) + modeling_set = Vset( + name="modeling", + vfuncs=[LogisticRegression(C=1, max_iter=1000, tol=0.1)], + vfunc_keys=["LR"], + ) _ = modeling_set.fit(X_train, y_train) preds_test = modeling_set.predict(X_test) - hard_metrics_set = Vset(name='hard_metrics', - vfuncs=[accuracy_score, balanced_accuracy_score], - vfunc_keys=["Acc", "Bal_Acc"], - tracking_dir=runs_path) - hard_metrics = hard_metrics_set.evaluate(y_test, preds_test) + hard_metrics_set = Vset( + name="hard_metrics", + vfuncs=[accuracy_score, balanced_accuracy_score], + vfunc_keys=["Acc", "Bal_Acc"], + tracking_dir=runs_path, + ) + df = dict_to_df(hard_metrics_set.evaluate(preds_test, y_test)) runs_path = os.path.join(runs_path, hard_metrics_set._exp_id) assert os.path.isdir(runs_path) assert len(os.listdir(runs_path)) == 2 - runs_path = os.path.join(runs_path, [d for d in os.listdir(runs_path) if d != 'meta.yaml'][0]) - runs_path = os.path.join(runs_path, 'metrics') - with open(os.path.join(runs_path, 'Acc')) as acc: - assert len(acc.read().split(" ")) == 3 - with open(os.path.join(runs_path, 'Bal_Acc')) as bal_acc: - assert len(bal_acc.read().split(" ")) == 3 + runs_path = os.path.join( + runs_path, [d for d in os.listdir(runs_path) if d != "meta.yaml"][0] + ) + runs_path = os.path.join(runs_path, "metrics") + with open(os.path.join(runs_path, "Acc")) as acc: + acc_split = acc.read().split(" ") + assert len(acc_split) == 3 + acc_from_df = df["out"][df["hard_metrics"] == "Acc"] + assert np.isclose(float(acc_split[1]), acc_from_df) + with open(os.path.join(runs_path, "Bal_Acc")) as bal_acc: + bal_acc_split = bal_acc.read().split(" ") + assert len(bal_acc_split) == 3 + bal_acc_from_df = df["out"][df["hard_metrics"] == "Bal_Acc"] + assert np.isclose(float(bal_acc_split[1]), bal_acc_from_df) finally: # clean up rmtree(tmp_path) @@ -320,19 +406,25 @@ def fun1(a, b=1): def fun2(a, b=1): return a * b - data_param_dict = {'n': [1, 2, 3]} - data_vset = build_vset('data', gen_data, param_dict=data_param_dict, reps=5, lazy=True) + data_param_dict = {"n": [1, 2, 3]} + data_vset = build_vset( + "data", gen_data, param_dict=data_param_dict, reps=5, lazy=True + ) assert len(data_vset.vfuncs) == 15 - fun_param_dict = {'b': [1, 2, 3]} - fun1_vset = build_vset('fun1', fun1, param_dict=fun_param_dict, lazy=True) - fun1_vset_async = build_vset('fun1', fun1, param_dict=fun_param_dict, lazy=True, is_async=True) - fun2_vset = build_vset('fun2', fun2, param_dict=fun_param_dict) - fun2_vset_async = build_vset('fun2', fun2, param_dict=fun_param_dict, is_async=True) + fun_param_dict = {"b": [1, 2, 3]} + fun1_vset = build_vset("fun1", fun1, param_dict=fun_param_dict, lazy=True) + fun1_vset_async = build_vset( + "fun1", fun1, param_dict=fun_param_dict, lazy=True, is_async=True + ) + fun2_vset = build_vset("fun2", fun2, param_dict=fun_param_dict) + fun2_vset_async = build_vset( + "fun2", fun2, param_dict=fun_param_dict, is_async=True + ) np.random.seed(13) - ray.init(local_mode=True) + ray.init(num_cpus=1, ignore_reinit_error=True) data = data_vset() @@ -363,26 +455,30 @@ class learner: def fit(self, a): self.a = a return self + def transform(self, b): return self.a + b + def predict(self, x): - return self.a*x + return self.a * x + def predict_proba(self, x): - y = np.exp(-self.a*x) + y = np.exp(-self.a * x) return 1 / (1 + y) vset = Vset("learner", [learner()], is_async=True, lazy=True) - vset.fit(*init_args([.4])) + vset.fit(*init_args([0.4])) data = init_args([np.array([1, 2, 3])])[0] transformed = vset.transform(data) preds = vset.predict(transformed) preds_proba = vset.predict_proba(transformed) assert_equal(list(transformed.values())[0](), [1.4, 2.4, 3.4]) - assert_equal(list(preds.values())[0](), np.array([1.4, 2.4, 3.4])*.4) - assert_equal(list(preds_proba.values())[0](), - 1 / (1 + np.exp(-np.array([1.4, 2.4, 3.4])*.4))) - + assert_equal(list(preds.values())[0](), np.array([1.4, 2.4, 3.4]) * 0.4) + assert_equal( + list(preds_proba.values())[0](), + 1 / (1 + np.exp(-np.array([1.4, 2.4, 3.4]) * 0.4)), + ) def test_lazy_async_two_step(self): def add_a(arr, a=0.4): @@ -392,10 +488,12 @@ class learner: def fit(self, a): self.mean = sum(a) / len(a) return self + def predict(self, x): - return self.mean*x + return self.mean * x + def predict_proba(self, x): - y = np.exp(-self.mean*x) + y = np.exp(-self.mean * x) return 1 / (1 + y) add_a_vset = Vset("add_a", [add_a], lazy=True) @@ -407,8 +505,11 @@ def predict_proba(self, x): preds_proba = vset.predict_proba(transformed) assert_equal(list(transformed.values())[0].value, [1.4, 2.4, 3.4]) - assert_equal(list(preds.values())[0], np.array([1.4*2, 2.4*2, 3.4*2])) - assert_equal(list(preds_proba.values())[0], 1 / (1 + np.exp(-np.array([1.4*2, 2.4*2, 3.4*2])))) + assert_equal(list(preds.values())[0], np.array([1.4 * 2, 2.4 * 2, 3.4 * 2])) + assert_equal( + list(preds_proba.values())[0], + 1 / (1 + np.exp(-np.array([1.4 * 2, 2.4 * 2, 3.4 * 2]))), + ) def costly_compute(data, row_index=0): diff --git a/tests/test_utils.py b/tests/test_utils.py index c2716ce..9a98b19 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,928 +1,1870 @@ +import numpy as np import pandas as pd import pytest from numpy.testing import assert_equal -from vflow.utils import * from vflow.subkey import Subkey as sm +from vflow.utils import ( + PREV_KEY, + apply_vfuncs, + combine_dicts, + dict_to_df, + perturbation_stats, + to_list, +) @pytest.mark.parametrize( - 'in_dicts,out_dict', + "in_dicts,out_dict", [ # first or second dict has only one key ( - # in_dicts - [ - { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): 'RF_fitted', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('LR', 'modeling')): 'LR_fitted', - }, - {(sm('X_test', 'init'),): 'X_test_data'} - ], - # out_dict + # in_dicts + [ { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling'), sm('X_test', 'init')): ( - 'RF_fitted', 'X_test_data'), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('LR', 'modeling'), sm('X_test', 'init')): ( - 'LR_fitted', 'X_test_data') - } + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + ): "RF_fitted", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("LR", "modeling"), + ): "LR_fitted", + }, + {(sm("X_test", "init"),): "X_test_data"}, + ], + # out_dict + { + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + sm("X_test", "init"), + ): ("RF_fitted", "X_test_data"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("LR", "modeling"), + sm("X_test", "init"), + ): ("LR_fitted", "X_test_data"), + }, ), ( - # in_dicts - [ - { - PREV_KEY: ('prev_0', 'prev_1',), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling'), sm('X_test', 'init')): [ - 'RF_fitted', 'X_test_data'], - (sm('X_train', 'init'), sm('y_train', 'init'), sm('LR', 'modeling'), sm('X_test', 'init')): [ - 'LR_fitted', 'X_test_data'] - }, - { - (sm('y_test', 'init'),): 'y_test_data', (sm('y_test', 'init'),): 'y_test_data', - PREV_KEY: ('prev_2',), - } - ], - # out_dict + # in_dicts + [ { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling'), sm('X_test', 'init'), - sm('y_test', 'init')): ( - ['RF_fitted', 'X_test_data'], 'y_test_data' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('LR', 'modeling'), sm('X_test', 'init'), - sm('y_test', 'init')): ( - ['LR_fitted', 'X_test_data'], 'y_test_data' - ) - } + PREV_KEY: ( + "prev_0", + "prev_1", + ), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + sm("X_test", "init"), + ): ["RF_fitted", "X_test_data"], + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("LR", "modeling"), + sm("X_test", "init"), + ): ["LR_fitted", "X_test_data"], + }, + { + (sm("y_test", "init"),): "y_test_data", + PREV_KEY: ("prev_2",), + }, + ], + # out_dict + { + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + sm("X_test", "init"), + sm("y_test", "init"), + ): (["RF_fitted", "X_test_data"], "y_test_data"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("LR", "modeling"), + sm("X_test", "init"), + sm("y_test", "init"), + ): (["LR_fitted", "X_test_data"], "y_test_data"), + }, ), ( - # in_dicts - [ - {(sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_0', 'subsample'), - sm('RF', 'modeling')): 'RF_fitted_0', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_1', 'subsample'), - sm('RF', 'modeling')): 'RF_fitted_1', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_0', 'subsample'), - sm('LR', 'modeling')): 'LR_fitted_0', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_1', 'subsample'), - sm('LR', 'modeling')): 'LR_fitted_1'}, - {(sm('X_test', 'init'),): 'X_test_data'}, - {(sm('y_test', 'init'),): 'y_test_data'} - - ], - # out_dict + # in_dicts + [ { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_0', 'subsample'), - sm('RF', 'modeling'), sm('X_test', 'init'), sm('y_test', 'init')): ( - 'RF_fitted_0', 'X_test_data', 'y_test_data' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_1', 'subsample'), - sm('RF', 'modeling'), sm('X_test', 'init'), sm('y_test', 'init')): ( - 'RF_fitted_1', 'X_test_data', 'y_test_data' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_0', 'subsample'), - sm('LR', 'modeling'), sm('X_test', 'init'), sm('y_test', 'init')): ( - 'LR_fitted_0', 'X_test_data', 'y_test_data' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_1', 'subsample'), - sm('LR', 'modeling'), sm('X_test', 'init'), sm('y_test', 'init')): ( - 'LR_fitted_1', 'X_test_data', 'y_test_data' - ), - } + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subsampling_0", "subsample"), + sm("RF", "modeling"), + ): "RF_fitted_0", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subsampling_1", "subsample"), + sm("RF", "modeling"), + ): "RF_fitted_1", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subsampling_0", "subsample"), + sm("LR", "modeling"), + ): "LR_fitted_0", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subsampling_1", "subsample"), + sm("LR", "modeling"), + ): "LR_fitted_1", + }, + {(sm("X_test", "init"),): "X_test_data"}, + {(sm("y_test", "init"),): "y_test_data"}, + ], + # out_dict + { + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subsampling_0", "subsample"), + sm("RF", "modeling"), + sm("X_test", "init"), + sm("y_test", "init"), + ): ("RF_fitted_0", "X_test_data", "y_test_data"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subsampling_1", "subsample"), + sm("RF", "modeling"), + sm("X_test", "init"), + sm("y_test", "init"), + ): ("RF_fitted_1", "X_test_data", "y_test_data"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subsampling_0", "subsample"), + sm("LR", "modeling"), + sm("X_test", "init"), + sm("y_test", "init"), + ): ("LR_fitted_0", "X_test_data", "y_test_data"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subsampling_1", "subsample"), + sm("LR", "modeling"), + sm("X_test", "init"), + sm("y_test", "init"), + ): ("LR_fitted_1", "X_test_data", "y_test_data"), + }, ), ( - # in_dicts - [ - { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_0', 'origin_0', True), - sm('RF', 'modeling')): 'RF_fitted_0', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_1', 'origin_0', True), - sm('RF', 'modeling')): 'RF_fitted_1', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_0', 'origin_0', True), - sm('LR', 'modeling')): 'LR_fitted_0', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_1', 'origin_0', True), - sm('LR', 'modeling')): 'LR_fitted_1', - }, - { - (sm('X_train', 'init'), sm('subsampling_0', 'origin_0', True)): 'X_train_data_0', - (sm('X_train', 'init'), sm('subsampling_1', 'origin_0', True)): 'X_train_data_1', - }, - { - (sm('y_train', 'init'), sm('subsampling_0', 'origin_0', True)): 'y_train_data_0', - (sm('y_train', 'init'), sm('subsampling_1', 'origin_0', True)): 'y_train_data_1', - } - ], - # out_dict + # in_dicts + [ { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_0', 'origin_0', True), - sm('RF', 'modeling'), sm('X_train', 'init'), sm('y_train', 'init')): ( - 'RF_fitted_0', 'X_train_data_0', 'y_train_data_0' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_1', 'origin_0', True), - sm('RF', 'modeling'), sm('X_train', 'init'), sm('y_train', 'init')): ( - 'RF_fitted_1', 'X_train_data_1', 'y_train_data_1' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_0', 'origin_0', True), - sm('LR', 'modeling'), sm('X_train', 'init'), sm('y_train', 'init')): ( - 'LR_fitted_0', 'X_train_data_0', 'y_train_data_0' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_1', 'origin_0', True), - sm('LR', 'modeling'), sm('X_train', 'init'), sm('y_train', 'init')): ( - 'LR_fitted_1', 'X_train_data_1', 'y_train_data_1' - ), - } + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subsampling_0", "origin_0", True), + sm("RF", "modeling"), + ): "RF_fitted_0", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subsampling_1", "origin_0", True), + sm("RF", "modeling"), + ): "RF_fitted_1", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subsampling_0", "origin_0", True), + sm("LR", "modeling"), + ): "LR_fitted_0", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subsampling_1", "origin_0", True), + sm("LR", "modeling"), + ): "LR_fitted_1", + }, + { + ( + sm("X_train", "init"), + sm("subsampling_0", "origin_0", True), + ): "X_train_data_0", + ( + sm("X_train", "init"), + sm("subsampling_1", "origin_0", True), + ): "X_train_data_1", + }, + { + ( + sm("y_train", "init"), + sm("subsampling_0", "origin_0", True), + ): "y_train_data_0", + ( + sm("y_train", "init"), + sm("subsampling_1", "origin_0", True), + ): "y_train_data_1", + }, + ], + # out_dict + { + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subsampling_0", "origin_0", True), + sm("RF", "modeling"), + sm("X_train", "init"), + sm("y_train", "init"), + ): ("RF_fitted_0", "X_train_data_0", "y_train_data_0"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subsampling_1", "origin_0", True), + sm("RF", "modeling"), + sm("X_train", "init"), + sm("y_train", "init"), + ): ("RF_fitted_1", "X_train_data_1", "y_train_data_1"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subsampling_0", "origin_0", True), + sm("LR", "modeling"), + sm("X_train", "init"), + sm("y_train", "init"), + ): ("LR_fitted_0", "X_train_data_0", "y_train_data_0"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subsampling_1", "origin_0", True), + sm("LR", "modeling"), + sm("X_train", "init"), + sm("y_train", "init"), + ): ("LR_fitted_1", "X_train_data_1", "y_train_data_1"), + }, ), ( - # in_dicts - [ - { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling')): 'RF_fitted_00', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling')): 'RF_fitted_01', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling')): 'RF_fitted_10', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling')): 'RF_fitted_11', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling')): 'LR_fitted_00', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling')): 'LR_fitted_01', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling')): 'LR_fitted_10', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling')): 'LR_fitted_11' - }, - { - (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True)): 'X_test_data_00', - (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True)): 'X_test_data_01', - (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True)): 'X_test_data_10', - (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True)): 'X_test_data_11' - } - ], - # out_dict + # in_dicts + [ { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): ( - 'RF_fitted_00', 'X_test_data_00' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): ( - 'RF_fitted_01', 'X_test_data_01' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): ( - 'RF_fitted_10', 'X_test_data_10' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): ( - 'RF_fitted_11', 'X_test_data_11' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): ( - 'LR_fitted_00', 'X_test_data_00' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): ( - 'LR_fitted_01', 'X_test_data_01' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): ( - 'LR_fitted_10', 'X_test_data_10' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): ( - 'LR_fitted_11', 'X_test_data_11' - ), - } + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "modeling"), + ): "RF_fitted_00", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "modeling"), + ): "RF_fitted_01", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "modeling"), + ): "RF_fitted_10", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "modeling"), + ): "RF_fitted_11", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("LR", "modeling"), + ): "LR_fitted_00", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("LR", "modeling"), + ): "LR_fitted_01", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("LR", "modeling"), + ): "LR_fitted_10", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("LR", "modeling"), + ): "LR_fitted_11", + }, + { + ( + sm("X_test", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + ): "X_test_data_00", + ( + sm("X_test", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + ): "X_test_data_01", + ( + sm("X_test", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + ): "X_test_data_10", + ( + sm("X_test", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + ): "X_test_data_11", + }, + ], + # out_dict + { + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + ): ("RF_fitted_00", "X_test_data_00"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + ): ("RF_fitted_01", "X_test_data_01"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + ): ("RF_fitted_10", "X_test_data_10"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + ): ("RF_fitted_11", "X_test_data_11"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("LR", "modeling"), + sm("X_test", "init"), + ): ("LR_fitted_00", "X_test_data_00"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("LR", "modeling"), + sm("X_test", "init"), + ): ("LR_fitted_01", "X_test_data_01"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("LR", "modeling"), + sm("X_test", "init"), + ): ("LR_fitted_10", "X_test_data_10"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("LR", "modeling"), + sm("X_test", "init"), + ): ("LR_fitted_11", "X_test_data_11"), + }, ), ( - # in_dicts - [ - { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling')): 'RF_fitted_00', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling')): 'RF_fitted_01', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling')): 'RF_fitted_10', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling')): 'RF_fitted_11', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling')): 'LR_fitted_00', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling')): 'LR_fitted_01', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling')): 'LR_fitted_10', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling')): 'LR_fitted_11' - }, - { - (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True)): 'X_test_data_00', - (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True)): 'X_test_data_01', - (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True)): 'X_test_data_10', - (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True)): 'X_test_data_11' - } - ], - # out_dict + # in_dicts + [ { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): ( - 'RF_fitted_00', 'X_test_data_00' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): ( - 'RF_fitted_01', 'X_test_data_01' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): ( - 'RF_fitted_10', 'X_test_data_10' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): ( - 'RF_fitted_11', 'X_test_data_11' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): ( - 'LR_fitted_00', 'X_test_data_00' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): ( - 'LR_fitted_01', 'X_test_data_01' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): ( - 'LR_fitted_10', 'X_test_data_10' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): ( - 'LR_fitted_11', 'X_test_data_11' - ), - } + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "modeling"), + ): "RF_fitted_00", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "modeling"), + ): "RF_fitted_01", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "modeling"), + ): "RF_fitted_10", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "modeling"), + ): "RF_fitted_11", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("LR", "modeling"), + ): "LR_fitted_00", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("LR", "modeling"), + ): "LR_fitted_01", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("LR", "modeling"), + ): "LR_fitted_10", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("LR", "modeling"), + ): "LR_fitted_11", + }, + { + ( + sm("X_test", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + ): "X_test_data_00", + ( + sm("X_test", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + ): "X_test_data_01", + ( + sm("X_test", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + ): "X_test_data_10", + ( + sm("X_test", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + ): "X_test_data_11", + }, + ], + # out_dict + { + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + ): ("RF_fitted_00", "X_test_data_00"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + ): ("RF_fitted_01", "X_test_data_01"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + ): ("RF_fitted_10", "X_test_data_10"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + ): ("RF_fitted_11", "X_test_data_11"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("LR", "modeling"), + sm("X_test", "init"), + ): ("LR_fitted_00", "X_test_data_00"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("LR", "modeling"), + sm("X_test", "init"), + ): ("LR_fitted_01", "X_test_data_01"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("LR", "modeling"), + sm("X_test", "init"), + ): ("LR_fitted_10", "X_test_data_10"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("LR", "modeling"), + sm("X_test", "init"), + ): ("LR_fitted_11", "X_test_data_11"), + }, ), ( - # in_dicts - [ - { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): [ - 'RF_fitted_00', 'X_test_data_00' - ], - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): [ - 'RF_fitted_01', 'X_test_data_01' - ], - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): [ - 'RF_fitted_10', 'X_test_data_10' - ], - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): [ - 'RF_fitted_11', 'X_test_data_11' - ], - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): [ - 'LR_fitted_00', 'X_test_data_00' - ], - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): [ - 'LR_fitted_01', 'X_test_data_01' - ], - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): [ - 'LR_fitted_10', 'X_test_data_10' - ], - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): [ - 'LR_fitted_11', 'X_test_data_11' - ], - }, - { - (sm('y_test', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True)): 'y_test_data_00', - (sm('y_test', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True)): 'y_test_data_01', - (sm('y_test', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True)): 'y_test_data_10', - (sm('y_test', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True)): 'y_test_data_11' - } - ], - # out_dict + # in_dicts + [ { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init'), - sm('y_test', 'init')): ( - ['RF_fitted_00', 'X_test_data_00'], 'y_test_data_00' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init'), - sm('y_test', 'init')): ( - ['RF_fitted_01', 'X_test_data_01'], 'y_test_data_01' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init'), - sm('y_test', 'init')): ( - ['RF_fitted_10', 'X_test_data_10'], 'y_test_data_10' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init'), - sm('y_test', 'init')): ( - ['RF_fitted_11', 'X_test_data_11'], 'y_test_data_11' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init'), - sm('y_test', 'init')): ( - ['LR_fitted_00', 'X_test_data_00'], 'y_test_data_00' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init'), - sm('y_test', 'init')): ( - ['LR_fitted_01', 'X_test_data_01'], 'y_test_data_01' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init'), - sm('y_test', 'init')): ( - ['LR_fitted_10', 'X_test_data_10'], 'y_test_data_10' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init'), - sm('y_test', 'init')): ( - ['LR_fitted_11', 'X_test_data_11'], 'y_test_data_11' - ), - } + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + ): ["RF_fitted_00", "X_test_data_00"], + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + ): ["RF_fitted_01", "X_test_data_01"], + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + ): ["RF_fitted_10", "X_test_data_10"], + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + ): ["RF_fitted_11", "X_test_data_11"], + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("LR", "modeling"), + sm("X_test", "init"), + ): ["LR_fitted_00", "X_test_data_00"], + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("LR", "modeling"), + sm("X_test", "init"), + ): ["LR_fitted_01", "X_test_data_01"], + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("LR", "modeling"), + sm("X_test", "init"), + ): ["LR_fitted_10", "X_test_data_10"], + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("LR", "modeling"), + sm("X_test", "init"), + ): ["LR_fitted_11", "X_test_data_11"], + }, + { + ( + sm("y_test", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + ): "y_test_data_00", + ( + sm("y_test", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + ): "y_test_data_01", + ( + sm("y_test", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + ): "y_test_data_10", + ( + sm("y_test", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + ): "y_test_data_11", + }, + ], + # out_dict + { + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + sm("y_test", "init"), + ): (["RF_fitted_00", "X_test_data_00"], "y_test_data_00"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + sm("y_test", "init"), + ): (["RF_fitted_01", "X_test_data_01"], "y_test_data_01"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + sm("y_test", "init"), + ): (["RF_fitted_10", "X_test_data_10"], "y_test_data_10"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + sm("y_test", "init"), + ): (["RF_fitted_11", "X_test_data_11"], "y_test_data_11"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("LR", "modeling"), + sm("X_test", "init"), + sm("y_test", "init"), + ): (["LR_fitted_00", "X_test_data_00"], "y_test_data_00"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("LR", "modeling"), + sm("X_test", "init"), + sm("y_test", "init"), + ): (["LR_fitted_01", "X_test_data_01"], "y_test_data_01"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("LR", "modeling"), + sm("X_test", "init"), + sm("y_test", "init"), + ): (["LR_fitted_10", "X_test_data_10"], "y_test_data_10"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("LR", "modeling"), + sm("X_test", "init"), + sm("y_test", "init"), + ): (["LR_fitted_11", "X_test_data_11"], "y_test_data_11"), + }, ), ( - # in_dicts - [ - { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init')): [ - ['RF_fitted_00', 'X_test_data_00'], 'y_test_data' - ], - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init')): [ - ['RF_fitted_01', 'X_test_data_01'], 'y_test_data' - ], - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init')): [ - ['RF_fitted_10', 'X_test_data_10'], 'y_test_data' - ], - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init')): [ - ['RF_fitted_11', 'X_test_data_11'], 'y_test_data' - ], - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init')): [ - ['LR_fitted_00', 'X_test_data_00'], 'y_test_data' - ], - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init')): [ - ['LR_fitted_01', 'X_test_data_01'], 'y_test_data' - ], - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init')): [ - ['LR_fitted_10', 'X_test_data_10'], 'y_test_data' - ], - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init')): [ - ['LR_fitted_11', 'X_test_data_11'], 'y_test_data' - ], - }, - { - (sm('LR', 'm_origin', True), sm('acc', 'metrics')): 'LR_acc_func', - (sm('LR', 'm_origin', True), sm('bal_acc', 'metrics')): 'LR_bal_acc_func', - (sm('RF', 'm_origin', True), sm('acc', 'metrics')): 'RF_acc_func', - (sm('RF', 'm_origin', True), sm('feat_imp', 'metrics')): 'RF_feat_imp_func' - } - ], - # out_dict + # in_dicts + [ { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init'), sm('acc', 'metrics')): ( - [['RF_fitted_00', 'X_test_data_00'], 'y_test_data'], 'RF_acc_func' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init'), sm('feat_imp', 'metrics')): ( - [['RF_fitted_00', 'X_test_data_00'], 'y_test_data'], 'RF_feat_imp_func' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init'), sm('acc', 'metrics')): ( - [['RF_fitted_01', 'X_test_data_01'], 'y_test_data'], 'RF_acc_func' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init'), sm('feat_imp', 'metrics')): ( - [['RF_fitted_01', 'X_test_data_01'], 'y_test_data'], 'RF_feat_imp_func' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init'), sm('acc', 'metrics')): ( - [['RF_fitted_10', 'X_test_data_10'], 'y_test_data'], 'RF_acc_func' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init'), sm('feat_imp', 'metrics')): ( - [['RF_fitted_10', 'X_test_data_10'], 'y_test_data'], 'RF_feat_imp_func' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init'), sm('acc', 'metrics')): ( - [['RF_fitted_11', 'X_test_data_11'], 'y_test_data'], 'RF_acc_func' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init'), sm('feat_imp', 'metrics')): ( - [['RF_fitted_11', 'X_test_data_11'], 'y_test_data'], 'RF_feat_imp_func' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init'), sm('acc', 'metrics')): ( - [['LR_fitted_00', 'X_test_data_00'], 'y_test_data'], 'LR_acc_func' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init'), sm('bal_acc', 'metrics')): ( - [['LR_fitted_00', 'X_test_data_00'], 'y_test_data'], 'LR_bal_acc_func' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init'), sm('acc', 'metrics')): ( - [['LR_fitted_01', 'X_test_data_01'], 'y_test_data'], 'LR_acc_func' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init'), sm('bal_acc', 'metrics')): ( - [['LR_fitted_01', 'X_test_data_01'], 'y_test_data'], 'LR_bal_acc_func' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init'), sm('acc', 'metrics')): ( - [['LR_fitted_10', 'X_test_data_10'], 'y_test_data'], 'LR_acc_func' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_0', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init'), sm('bal_acc', 'metrics')): ( - [['LR_fitted_10', 'X_test_data_10'], 'y_test_data'], 'LR_bal_acc_func' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init'), sm('acc', 'metrics')): ( - [['LR_fitted_11', 'X_test_data_11'], 'y_test_data'], 'LR_acc_func' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True), - sm('voxel_extract_1', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'), - sm('y_test', 'init'), sm('bal_acc', 'metrics')): ( - [['LR_fitted_11', 'X_test_data_11'], 'y_test_data'], 'LR_bal_acc_func' - ), - } + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + ): [["RF_fitted_00", "X_test_data_00"], "y_test_data"], + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + ): [["RF_fitted_01", "X_test_data_01"], "y_test_data"], + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + ): [["RF_fitted_10", "X_test_data_10"], "y_test_data"], + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + ): [["RF_fitted_11", "X_test_data_11"], "y_test_data"], + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("LR", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + ): [["LR_fitted_00", "X_test_data_00"], "y_test_data"], + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("LR", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + ): [["LR_fitted_01", "X_test_data_01"], "y_test_data"], + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("LR", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + ): [["LR_fitted_10", "X_test_data_10"], "y_test_data"], + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("LR", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + ): [["LR_fitted_11", "X_test_data_11"], "y_test_data"], + }, + { + (sm("LR", "m_origin", True), sm("acc", "metrics")): "LR_acc_func", + ( + sm("LR", "m_origin", True), + sm("bal_acc", "metrics"), + ): "LR_bal_acc_func", + (sm("RF", "m_origin", True), sm("acc", "metrics")): "RF_acc_func", + ( + sm("RF", "m_origin", True), + sm("feat_imp", "metrics"), + ): "RF_feat_imp_func", + }, + ], + # out_dict + { + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + sm("acc", "metrics"), + ): ([["RF_fitted_00", "X_test_data_00"], "y_test_data"], "RF_acc_func"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + sm("feat_imp", "metrics"), + ): ( + [["RF_fitted_00", "X_test_data_00"], "y_test_data"], + "RF_feat_imp_func", + ), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + sm("acc", "metrics"), + ): ([["RF_fitted_01", "X_test_data_01"], "y_test_data"], "RF_acc_func"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + sm("feat_imp", "metrics"), + ): ( + [["RF_fitted_01", "X_test_data_01"], "y_test_data"], + "RF_feat_imp_func", + ), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + sm("acc", "metrics"), + ): ([["RF_fitted_10", "X_test_data_10"], "y_test_data"], "RF_acc_func"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + sm("feat_imp", "metrics"), + ): ( + [["RF_fitted_10", "X_test_data_10"], "y_test_data"], + "RF_feat_imp_func", + ), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + sm("acc", "metrics"), + ): ([["RF_fitted_11", "X_test_data_11"], "y_test_data"], "RF_acc_func"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + sm("feat_imp", "metrics"), + ): ( + [["RF_fitted_11", "X_test_data_11"], "y_test_data"], + "RF_feat_imp_func", + ), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("LR", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + sm("acc", "metrics"), + ): ([["LR_fitted_00", "X_test_data_00"], "y_test_data"], "LR_acc_func"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("LR", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + sm("bal_acc", "metrics"), + ): ( + [["LR_fitted_00", "X_test_data_00"], "y_test_data"], + "LR_bal_acc_func", + ), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("LR", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + sm("acc", "metrics"), + ): ([["LR_fitted_01", "X_test_data_01"], "y_test_data"], "LR_acc_func"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_0", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("LR", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + sm("bal_acc", "metrics"), + ): ( + [["LR_fitted_01", "X_test_data_01"], "y_test_data"], + "LR_bal_acc_func", + ), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("LR", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + sm("acc", "metrics"), + ): ([["LR_fitted_10", "X_test_data_10"], "y_test_data"], "LR_acc_func"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_0", "v_origin", True), + sm("LR", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + sm("bal_acc", "metrics"), + ): ( + [["LR_fitted_10", "X_test_data_10"], "y_test_data"], + "LR_bal_acc_func", + ), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("LR", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + sm("acc", "metrics"), + ): ([["LR_fitted_11", "X_test_data_11"], "y_test_data"], "LR_acc_func"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("subgroup_1", "s_origin", True), + sm("voxel_extract_1", "v_origin", True), + sm("LR", "m_origin", True), + sm("X_test", "init"), + sm("y_test", "init"), + sm("bal_acc", "metrics"), + ): ( + [["LR_fitted_11", "X_test_data_11"], "y_test_data"], + "LR_bal_acc_func", + ), + }, ), ( - # in_dicts - [ - { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_00', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_01', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_10', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_11', - }, - { - (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True)): 'X_test_data_0', - (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True)): 'X_test_data_1' - }, - { - (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_0', 's_origin', True)): 'y_test_data_00', - (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_1', 's_origin', True)): 'y_test_data_01', - (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_0', 's_origin', True)): 'y_test_data_10', - (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_1', 's_origin', True)): 'y_test_data_11', - } - ], - # out_dict + # in_dicts + [ { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_0', 's_origin', True), - sm('RF', 'modeling'), sm('X_test', 'init'), sm('y_test', 'init')): ( - 'RF_00', 'X_test_data_0', 'y_test_data_00' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_1', 's_origin', True), - sm('RF', 'modeling'), sm('X_test', 'init'), sm('y_test', 'init')): ( - 'RF_01', 'X_test_data_1', 'y_test_data_01' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_0', 's_origin', True), - sm('RF', 'modeling'), sm('X_test', 'init'), sm('y_test', 'init')): ( - 'RF_10', 'X_test_data_0', 'y_test_data_10' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_1', 's_origin', True), - sm('RF', 'modeling'), sm('X_test', 'init'), sm('y_test', 'init')): ( - 'RF_11', 'X_test_data_1', 'y_test_data_11' - ) + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("RF", "modeling"), + ): "RF_00", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("RF", "modeling"), + ): "RF_01", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("RF", "modeling"), + ): "RF_10", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("RF", "modeling"), + ): "RF_11", + }, + { + ( + sm("X_test", "init"), + sm("subgroup_0", "s_origin", True), + ): "X_test_data_0", + ( + sm("X_test", "init"), + sm("subgroup_1", "s_origin", True), + ): "X_test_data_1", + }, + { + ( + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_0", "s_origin", True), + ): "y_test_data_00", + ( + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_1", "s_origin", True), + ): "y_test_data_01", + ( + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_0", "s_origin", True), + ): "y_test_data_10", + ( + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_1", "s_origin", True), + ): "y_test_data_11", }, + ], + # out_dict + { + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + sm("y_test", "init"), + ): ("RF_00", "X_test_data_0", "y_test_data_00"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + sm("y_test", "init"), + ): ("RF_01", "X_test_data_1", "y_test_data_01"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + sm("y_test", "init"), + ): ("RF_10", "X_test_data_0", "y_test_data_10"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("RF", "modeling"), + sm("X_test", "init"), + sm("y_test", "init"), + ): ("RF_11", "X_test_data_1", "y_test_data_11"), + }, ), ( - # in_dicts - [ - { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_00', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_01', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_10', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_11', - }, - { - (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_0', 's_origin', True)): 'y_test_data_00', - (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_1', 's_origin', True)): 'y_test_data_01', - (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_0', 's_origin', True)): 'y_test_data_10', - (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_1', 's_origin', True)): 'y_test_data_11', - }, - { - (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True)): 'X_test_data_0', - (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True)): 'X_test_data_1' - }, - ], - # out_dict + # in_dicts + [ { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_0', 's_origin', True), sm('RF', 'modeling'), sm('y_test', 'init'), - sm('X_test', 'init')): ( - 'RF_00', 'y_test_data_00', 'X_test_data_0' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_1', 's_origin', True), sm('RF', 'modeling'), sm('y_test', 'init'), - sm('X_test', 'init')): ( - 'RF_01', 'y_test_data_01', 'X_test_data_1' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_0', 's_origin', True), sm('RF', 'modeling'), sm('y_test', 'init'), - sm('X_test', 'init')): ( - 'RF_10', 'y_test_data_10', 'X_test_data_0' - ), - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_1', 's_origin', True), sm('RF', 'modeling'), sm('y_test', 'init'), - sm('X_test', 'init')): ( - 'RF_11', 'y_test_data_11', 'X_test_data_1' - ) + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("RF", "modeling"), + ): "RF_00", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("RF", "modeling"), + ): "RF_01", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("RF", "modeling"), + ): "RF_10", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("RF", "modeling"), + ): "RF_11", }, + { + ( + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_0", "s_origin", True), + ): "y_test_data_00", + ( + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_1", "s_origin", True), + ): "y_test_data_01", + ( + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_0", "s_origin", True), + ): "y_test_data_10", + ( + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_1", "s_origin", True), + ): "y_test_data_11", + }, + { + ( + sm("X_test", "init"), + sm("subgroup_0", "s_origin", True), + ): "X_test_data_0", + ( + sm("X_test", "init"), + sm("subgroup_1", "s_origin", True), + ): "X_test_data_1", + }, + ], + # out_dict + { + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("RF", "modeling"), + sm("y_test", "init"), + sm("X_test", "init"), + ): ("RF_00", "y_test_data_00", "X_test_data_0"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("RF", "modeling"), + sm("y_test", "init"), + sm("X_test", "init"), + ): ("RF_01", "y_test_data_01", "X_test_data_1"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("RF", "modeling"), + sm("y_test", "init"), + sm("X_test", "init"), + ): ("RF_10", "y_test_data_10", "X_test_data_0"), + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("RF", "modeling"), + sm("y_test", "init"), + sm("X_test", "init"), + ): ("RF_11", "y_test_data_11", "X_test_data_1"), + }, ), ( - # in_dicts - [ - { - (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True)): 'X_test_data_0', - (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True)): 'X_test_data_1' - }, - { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_00', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_01', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_10', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_11', - }, - { - (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_0', 's_origin', True)): 'y_test_data_00', - (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_1', 's_origin', True)): 'y_test_data_01', - (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_0', 's_origin', True)): 'y_test_data_10', - (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_1', 's_origin', True)): 'y_test_data_11', - } - ], - # out_dict + # in_dicts + [ { - (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True), sm('X_train', 'init'), - sm('y_train', 'init'), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('y_test', 'init')): ( - 'X_test_data_0', 'RF_00', 'y_test_data_00' - ), - (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True), sm('X_train', 'init'), - sm('y_train', 'init'), - sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('y_test', 'init')): ( - 'X_test_data_1', 'RF_01', 'y_test_data_01' - ), - (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True), sm('X_train', 'init'), - sm('y_train', 'init'), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('y_test', 'init')): ( - 'X_test_data_0', 'RF_10', 'y_test_data_10' - ), - (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True), sm('X_train', 'init'), - sm('y_train', 'init'), - sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('y_test', 'init')): ( - 'X_test_data_1', 'RF_11', 'y_test_data_11' - ) + ( + sm("X_test", "init"), + sm("subgroup_0", "s_origin", True), + ): "X_test_data_0", + ( + sm("X_test", "init"), + sm("subgroup_1", "s_origin", True), + ): "X_test_data_1", + }, + { + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("RF", "modeling"), + ): "RF_00", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("RF", "modeling"), + ): "RF_01", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("RF", "modeling"), + ): "RF_10", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("RF", "modeling"), + ): "RF_11", + }, + { + ( + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_0", "s_origin", True), + ): "y_test_data_00", + ( + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_1", "s_origin", True), + ): "y_test_data_01", + ( + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_0", "s_origin", True), + ): "y_test_data_10", + ( + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_1", "s_origin", True), + ): "y_test_data_11", }, + ], + # out_dict + { + ( + sm("X_test", "init"), + sm("subgroup_0", "s_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "modeling"), + sm("y_test", "init"), + ): ("X_test_data_0", "RF_00", "y_test_data_00"), + ( + sm("X_test", "init"), + sm("subgroup_1", "s_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "modeling"), + sm("y_test", "init"), + ): ("X_test_data_1", "RF_01", "y_test_data_01"), + ( + sm("X_test", "init"), + sm("subgroup_0", "s_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "modeling"), + sm("y_test", "init"), + ): ("X_test_data_0", "RF_10", "y_test_data_10"), + ( + sm("X_test", "init"), + sm("subgroup_1", "s_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "modeling"), + sm("y_test", "init"), + ): ("X_test_data_1", "RF_11", "y_test_data_11"), + }, ), ( - # in_dicts - [ - { - (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True)): 'X_test_data_0', - (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True)): 'X_test_data_1' - }, - { - (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_0', 's_origin', True)): 'y_test_data_00', - (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_1', 's_origin', True)): 'y_test_data_01', - (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_0', 's_origin', True)): 'y_test_data_10', - (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_1', 's_origin', True)): 'y_test_data_11', - }, - { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_00', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_01', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_10', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_11', - }, - ], - # out_dict + # in_dicts + [ { - (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True), sm('y_test', 'init'), - sm('voxel_extract_0', 'v_origin', True), sm('X_train', 'init'), sm('y_train', 'init'), - sm('RF', 'modeling')): ( - 'X_test_data_0', 'y_test_data_00', 'RF_00' - ), - (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True), sm('y_test', 'init'), - sm('voxel_extract_0', 'v_origin', True), sm('X_train', 'init'), sm('y_train', 'init'), - sm('RF', 'modeling')): ( - 'X_test_data_1', 'y_test_data_01', 'RF_01' - ), - (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True), sm('y_test', 'init'), - sm('voxel_extract_1', 'v_origin', True), sm('X_train', 'init'), sm('y_train', 'init'), - sm('RF', 'modeling')): ( - 'X_test_data_0', 'y_test_data_10', 'RF_10' - ), - (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True), sm('y_test', 'init'), - sm('voxel_extract_1', 'v_origin', True), sm('X_train', 'init'), sm('y_train', 'init'), - sm('RF', 'modeling')): ( - 'X_test_data_1', 'y_test_data_11', 'RF_11' - ) + ( + sm("X_test", "init"), + sm("subgroup_0", "s_origin", True), + ): "X_test_data_0", + ( + sm("X_test", "init"), + sm("subgroup_1", "s_origin", True), + ): "X_test_data_1", + }, + { + ( + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_0", "s_origin", True), + ): "y_test_data_00", + ( + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_1", "s_origin", True), + ): "y_test_data_01", + ( + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_0", "s_origin", True), + ): "y_test_data_10", + ( + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_1", "s_origin", True), + ): "y_test_data_11", }, + { + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("RF", "modeling"), + ): "RF_00", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("RF", "modeling"), + ): "RF_01", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("RF", "modeling"), + ): "RF_10", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("RF", "modeling"), + ): "RF_11", + }, + ], + # out_dict + { + ( + sm("X_test", "init"), + sm("subgroup_0", "s_origin", True), + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + ): ("X_test_data_0", "y_test_data_00", "RF_00"), + ( + sm("X_test", "init"), + sm("subgroup_1", "s_origin", True), + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + ): ("X_test_data_1", "y_test_data_01", "RF_01"), + ( + sm("X_test", "init"), + sm("subgroup_0", "s_origin", True), + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + ): ("X_test_data_0", "y_test_data_10", "RF_10"), + ( + sm("X_test", "init"), + sm("subgroup_1", "s_origin", True), + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + ): ("X_test_data_1", "y_test_data_11", "RF_11"), + }, ), ( - # in_dicts - [ - { - (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_0', 's_origin', True)): 'y_test_data_00', - (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_1', 's_origin', True)): 'y_test_data_01', - (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_0', 's_origin', True)): 'y_test_data_10', - (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_1', 's_origin', True)): 'y_test_data_11', - }, - { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_00', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_01', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_10', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_11', - }, - { - (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True)): 'X_test_data_0', - (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True)): 'X_test_data_1' - }, - ], - # out_dict + # in_dicts + [ { - (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), sm('subgroup_0', 's_origin', True), - sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling'), sm('X_test', 'init')): ( - 'y_test_data_00', 'RF_00', 'X_test_data_0' - ), - (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), sm('subgroup_1', 's_origin', True), - sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling'), sm('X_test', 'init')): ( - 'y_test_data_01', 'RF_01', 'X_test_data_1' - ), - (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), sm('subgroup_0', 's_origin', True), - sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling'), sm('X_test', 'init')): ( - 'y_test_data_10', 'RF_10', 'X_test_data_0' - ), - (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), sm('subgroup_1', 's_origin', True), - sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling'), sm('X_test', 'init')): ( - 'y_test_data_11', 'RF_11', 'X_test_data_1' - ) + ( + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_0", "s_origin", True), + ): "y_test_data_00", + ( + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_1", "s_origin", True), + ): "y_test_data_01", + ( + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_0", "s_origin", True), + ): "y_test_data_10", + ( + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_1", "s_origin", True), + ): "y_test_data_11", + }, + { + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("RF", "modeling"), + ): "RF_00", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("RF", "modeling"), + ): "RF_01", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("RF", "modeling"), + ): "RF_10", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("RF", "modeling"), + ): "RF_11", + }, + { + ( + sm("X_test", "init"), + sm("subgroup_0", "s_origin", True), + ): "X_test_data_0", + ( + sm("X_test", "init"), + sm("subgroup_1", "s_origin", True), + ): "X_test_data_1", }, + ], + # out_dict + { + ( + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + sm("X_test", "init"), + ): ("y_test_data_00", "RF_00", "X_test_data_0"), + ( + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + sm("X_test", "init"), + ): ("y_test_data_01", "RF_01", "X_test_data_1"), + ( + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + sm("X_test", "init"), + ): ("y_test_data_10", "RF_10", "X_test_data_0"), + ( + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + sm("X_test", "init"), + ): ("y_test_data_11", "RF_11", "X_test_data_1"), + }, ), ( - # in_dicts - [ - { - (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_0', 's_origin', True)): 'y_test_data_00', - (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_1', 's_origin', True)): 'y_test_data_01', - (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_0', 's_origin', True)): 'y_test_data_10', - (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_1', 's_origin', True)): 'y_test_data_11', - }, - { - (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True)): 'X_test_data_0', - (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True)): 'X_test_data_1' - }, - { - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_00', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_01', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_10', - (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_11', - }, - ], - # out_dict + # in_dicts + [ { - (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), sm('subgroup_0', 's_origin', True), - sm('X_test', 'init'), sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): ( - 'y_test_data_00', 'X_test_data_0', 'RF_00' - ), - (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), sm('subgroup_1', 's_origin', True), - sm('X_test', 'init'), sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): ( - 'y_test_data_01', 'X_test_data_1', 'RF_01' - ), - (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), sm('subgroup_0', 's_origin', True), - sm('X_test', 'init'), sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): ( - 'y_test_data_10', 'X_test_data_0', 'RF_10' - ), - (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), sm('subgroup_1', 's_origin', True), - sm('X_test', 'init'), sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): ( - 'y_test_data_11', 'X_test_data_1', 'RF_11' - ) + ( + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_0", "s_origin", True), + ): "y_test_data_00", + ( + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_1", "s_origin", True), + ): "y_test_data_01", + ( + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_0", "s_origin", True), + ): "y_test_data_10", + ( + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_1", "s_origin", True), + ): "y_test_data_11", + }, + { + ( + sm("X_test", "init"), + sm("subgroup_0", "s_origin", True), + ): "X_test_data_0", + ( + sm("X_test", "init"), + sm("subgroup_1", "s_origin", True), + ): "X_test_data_1", + }, + { + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("RF", "modeling"), + ): "RF_00", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("RF", "modeling"), + ): "RF_01", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("RF", "modeling"), + ): "RF_10", + ( + sm("X_train", "init"), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("RF", "modeling"), + ): "RF_11", }, + ], + # out_dict + { + ( + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("X_test", "init"), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + ): ("y_test_data_00", "X_test_data_0", "RF_00"), + ( + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("X_test", "init"), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + ): ("y_test_data_01", "X_test_data_1", "RF_01"), + ( + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_0", "s_origin", True), + sm("X_test", "init"), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + ): ("y_test_data_10", "X_test_data_0", "RF_10"), + ( + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_1", "s_origin", True), + sm("X_test", "init"), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + ): ("y_test_data_11", "X_test_data_1", "RF_11"), + }, ), ( - # in_dicts - [ - { - (sm('X_test', 'init'), sm('feature_extraction_0', 'f_origin', True), - sm('subgroup_0', 's_origin', True)): 'X_test_data_00', - (sm('X_test', 'init'), sm('feature_extraction_0', 'f_origin', True), - sm('subgroup_1', 's_origin', True)): 'X_test_data_01', - (sm('X_test', 'init'), sm('feature_extraction_1', 'f_origin', True), - sm('subgroup_0', 's_origin', True)): 'X_test_data_10', - (sm('X_test', 'init'), sm('feature_extraction_1', 'f_origin', True), - sm('subgroup_1', 's_origin', True)): 'X_test_data_11', - }, - { - (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_0', 's_origin', True)): 'y_test_data_00', - (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('subgroup_1', 's_origin', True)): 'y_test_data_01', - (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_0', 's_origin', True)): 'y_test_data_10', - (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('subgroup_1', 's_origin', True)): 'y_test_data_11', - }, - { - (sm('X_train', 'init'), sm('feature_extraction_0', 'f_origin', True), - sm('subgroup_0', 's_origin', True), - sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('RF', 'modeling')): 'RF_000', - (sm('X_train', 'init'), sm('feature_extraction_0', 'f_origin', True), - sm('subgroup_0', 's_origin', True), - sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('RF', 'modeling')): 'RF_001', - (sm('X_train', 'init'), sm('feature_extraction_0', 'f_origin', True), - sm('subgroup_1', 's_origin', True), - sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('RF', 'modeling')): 'RF_010', - (sm('X_train', 'init'), sm('feature_extraction_0', 'f_origin', True), - sm('subgroup_1', 's_origin', True), - sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('RF', 'modeling')): 'RF_011', - (sm('X_train', 'init'), sm('feature_extraction_1', 'f_origin', True), - sm('subgroup_0', 's_origin', True), - sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('RF', 'modeling')): 'RF_100', - (sm('X_train', 'init'), sm('feature_extraction_1', 'f_origin', True), - sm('subgroup_0', 's_origin', True), - sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('RF', 'modeling')): 'RF_101', - (sm('X_train', 'init'), sm('feature_extraction_1', 'f_origin', True), - sm('subgroup_1', 's_origin', True), - sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('RF', 'modeling')): 'RF_110', - (sm('X_train', 'init'), sm('feature_extraction_1', 'f_origin', True), - sm('subgroup_1', 's_origin', True), - sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('RF', 'modeling')): 'RF_111', - }, - ], - # out_dict + # in_dicts + [ { - (sm('X_test', 'init'), sm('feature_extraction_0', 'f_origin', True), - sm('subgroup_0', 's_origin', True), - sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): ( - 'X_test_data_00', 'y_test_data_00', 'RF_000'), - (sm('X_test', 'init'), sm('feature_extraction_0', 'f_origin', True), - sm('subgroup_0', 's_origin', True), - sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): ( - 'X_test_data_00', 'y_test_data_10', 'RF_001'), - (sm('X_test', 'init'), sm('feature_extraction_0', 'f_origin', True), - sm('subgroup_1', 's_origin', True), - sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): ( - 'X_test_data_01', 'y_test_data_01', 'RF_010'), - (sm('X_test', 'init'), sm('feature_extraction_0', 'f_origin', True), - sm('subgroup_1', 's_origin', True), - sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): ( - 'X_test_data_01', 'y_test_data_11', 'RF_011'), - (sm('X_test', 'init'), sm('feature_extraction_1', 'f_origin', True), - sm('subgroup_0', 's_origin', True), - sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): ( - 'X_test_data_10', 'y_test_data_00', 'RF_100'), - (sm('X_test', 'init'), sm('feature_extraction_1', 'f_origin', True), - sm('subgroup_0', 's_origin', True), - sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): ( - 'X_test_data_10', 'y_test_data_10', 'RF_101'), - (sm('X_test', 'init'), sm('feature_extraction_1', 'f_origin', True), - sm('subgroup_1', 's_origin', True), - sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), - sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): ( - 'X_test_data_11', 'y_test_data_01', 'RF_110'), - (sm('X_test', 'init'), sm('feature_extraction_1', 'f_origin', True), - sm('subgroup_1', 's_origin', True), - sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), - sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): ( - 'X_test_data_11', 'y_test_data_11', 'RF_111'), - - } + ( + sm("X_test", "init"), + sm("feature_extraction_0", "f_origin", True), + sm("subgroup_0", "s_origin", True), + ): "X_test_data_00", + ( + sm("X_test", "init"), + sm("feature_extraction_0", "f_origin", True), + sm("subgroup_1", "s_origin", True), + ): "X_test_data_01", + ( + sm("X_test", "init"), + sm("feature_extraction_1", "f_origin", True), + sm("subgroup_0", "s_origin", True), + ): "X_test_data_10", + ( + sm("X_test", "init"), + sm("feature_extraction_1", "f_origin", True), + sm("subgroup_1", "s_origin", True), + ): "X_test_data_11", + }, + { + ( + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_0", "s_origin", True), + ): "y_test_data_00", + ( + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("subgroup_1", "s_origin", True), + ): "y_test_data_01", + ( + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_0", "s_origin", True), + ): "y_test_data_10", + ( + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("subgroup_1", "s_origin", True), + ): "y_test_data_11", + }, + { + ( + sm("X_train", "init"), + sm("feature_extraction_0", "f_origin", True), + sm("subgroup_0", "s_origin", True), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "modeling"), + ): "RF_000", + ( + sm("X_train", "init"), + sm("feature_extraction_0", "f_origin", True), + sm("subgroup_0", "s_origin", True), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "modeling"), + ): "RF_001", + ( + sm("X_train", "init"), + sm("feature_extraction_0", "f_origin", True), + sm("subgroup_1", "s_origin", True), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "modeling"), + ): "RF_010", + ( + sm("X_train", "init"), + sm("feature_extraction_0", "f_origin", True), + sm("subgroup_1", "s_origin", True), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "modeling"), + ): "RF_011", + ( + sm("X_train", "init"), + sm("feature_extraction_1", "f_origin", True), + sm("subgroup_0", "s_origin", True), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "modeling"), + ): "RF_100", + ( + sm("X_train", "init"), + sm("feature_extraction_1", "f_origin", True), + sm("subgroup_0", "s_origin", True), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "modeling"), + ): "RF_101", + ( + sm("X_train", "init"), + sm("feature_extraction_1", "f_origin", True), + sm("subgroup_1", "s_origin", True), + sm("y_train", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("RF", "modeling"), + ): "RF_110", + ( + sm("X_train", "init"), + sm("feature_extraction_1", "f_origin", True), + sm("subgroup_1", "s_origin", True), + sm("y_train", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("RF", "modeling"), + ): "RF_111", + }, + ], + # out_dict + { + ( + sm("X_test", "init"), + sm("feature_extraction_0", "f_origin", True), + sm("subgroup_0", "s_origin", True), + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + ): ("X_test_data_00", "y_test_data_00", "RF_000"), + ( + sm("X_test", "init"), + sm("feature_extraction_0", "f_origin", True), + sm("subgroup_0", "s_origin", True), + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + ): ("X_test_data_00", "y_test_data_10", "RF_001"), + ( + sm("X_test", "init"), + sm("feature_extraction_0", "f_origin", True), + sm("subgroup_1", "s_origin", True), + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + ): ("X_test_data_01", "y_test_data_01", "RF_010"), + ( + sm("X_test", "init"), + sm("feature_extraction_0", "f_origin", True), + sm("subgroup_1", "s_origin", True), + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + ): ("X_test_data_01", "y_test_data_11", "RF_011"), + ( + sm("X_test", "init"), + sm("feature_extraction_1", "f_origin", True), + sm("subgroup_0", "s_origin", True), + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + ): ("X_test_data_10", "y_test_data_00", "RF_100"), + ( + sm("X_test", "init"), + sm("feature_extraction_1", "f_origin", True), + sm("subgroup_0", "s_origin", True), + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + ): ("X_test_data_10", "y_test_data_10", "RF_101"), + ( + sm("X_test", "init"), + sm("feature_extraction_1", "f_origin", True), + sm("subgroup_1", "s_origin", True), + sm("y_test", "init"), + sm("voxel_extract_0", "v_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + ): ("X_test_data_11", "y_test_data_01", "RF_110"), + ( + sm("X_test", "init"), + sm("feature_extraction_1", "f_origin", True), + sm("subgroup_1", "s_origin", True), + sm("y_test", "init"), + sm("voxel_extract_1", "v_origin", True), + sm("X_train", "init"), + sm("y_train", "init"), + sm("RF", "modeling"), + ): ("X_test_data_11", "y_test_data_11", "RF_111"), + }, ), ( - # in_dicts - [ - { - (sm('X_train', 'init'), sm('standardize_0', 's_origin', True)): 'X_train_0', - (sm('X_train', 'init'), sm('standardize_1', 's_origin', True)): 'X_train_1', - }, - {(sm('y_train', 'init'),): 'y_train_data'} - ], - # out_dict + # in_dicts + [ { - (sm('X_train', 'init'), sm('standardize_0', 's_origin', True), sm('y_train', 'init')): ( - 'X_train_0', 'y_train_data'), - (sm('X_train', 'init'), sm('standardize_1', 's_origin', True), sm('y_train', 'init')): ( - 'X_train_1', 'y_train_data') - } + ( + sm("X_train", "init"), + sm("standardize_0", "s_origin", True), + ): "X_train_0", + ( + sm("X_train", "init"), + sm("standardize_1", "s_origin", True), + ): "X_train_1", + }, + {(sm("y_train", "init"),): "y_train_data"}, + ], + # out_dict + { + ( + sm("X_train", "init"), + sm("standardize_0", "s_origin", True), + sm("y_train", "init"), + ): ("X_train_0", "y_train_data"), + ( + sm("X_train", "init"), + sm("standardize_1", "s_origin", True), + sm("y_train", "init"), + ): ("X_train_1", "y_train_data"), + }, ), - ] + ], ) class TestCombineDicts: - def test_combine_dicts(self, in_dicts, out_dict): result_dict = combine_dicts(*in_dicts) print(result_dict) @@ -930,173 +1872,326 @@ def test_combine_dicts(self, in_dicts, out_dict): @pytest.mark.parametrize( - 'in_dicts,out_dict', + "in_dicts,out_dict", [ ( - # in_dicts - [ - # modules - { - (sm('module_0', 'm_origin'),): lambda x, y: x + y, - (sm('module_1', 'm_origin'),): lambda x, y: x * y, - }, - # data_dict - {(sm('data', 'init'),): [2, 3]} - ], - # out_dict + # in_dicts + [ + # modules { - (sm('data', 'init'), sm('module_0', 'm_origin')): 5, - (sm('data', 'init'), sm('module_1', 'm_origin')): 6 - } + (sm("module_0", "m_origin"),): lambda x, y: x + y, + (sm("module_1", "m_origin"),): lambda x, y: x * y, + }, + # data_dict + {(sm("data", "init"),): [2, 3]}, + ], + # out_dict + { + (sm("data", "init"), sm("module_0", "m_origin")): 5, + (sm("data", "init"), sm("module_1", "m_origin")): 6, + }, ), ( - # in_dicts - [ - # modules - { - (sm('group_0', 'g_origin', True), sm('module_0', 'm_origin'),): lambda x, y: x + y, - (sm('group_1', 'g_origin', True), sm('module_1', 'm_origin'),): lambda x, y: x * y, - }, - # data_dict - { - (sm('data', 'init'), sm('group_0', 'g_origin', True)): [np.array([1, 2, 3]), - np.array([4, 5, 6])], - (sm('data', 'init'), sm('group_1', 'g_origin', True)): [np.array([1, 2, 3]), - np.array([4, 5, 6])], - } - ], - # out_dict + # in_dicts + [ + # modules { - (sm('data', 'init'), sm('group_0', 'g_origin', True), sm('module_0', 'm_origin')): np.array( - [5, 7, 9]), - (sm('data', 'init'), sm('group_1', 'g_origin', True), sm('module_1', 'm_origin')): np.array( - [4, 10, 18]), - } + ( + sm("group_0", "g_origin", True), + sm("module_0", "m_origin"), + ): lambda x, y: x + + y, + ( + sm("group_1", "g_origin", True), + sm("module_1", "m_origin"), + ): lambda x, y: x + * y, + }, + # data_dict + { + (sm("data", "init"), sm("group_0", "g_origin", True)): [ + np.array([1, 2, 3]), + np.array([4, 5, 6]), + ], + (sm("data", "init"), sm("group_1", "g_origin", True)): [ + np.array([1, 2, 3]), + np.array([4, 5, 6]), + ], + }, + ], + # out_dict + { + ( + sm("data", "init"), + sm("group_0", "g_origin", True), + sm("module_0", "m_origin"), + ): np.array([5, 7, 9]), + ( + sm("data", "init"), + sm("group_1", "g_origin", True), + sm("module_1", "m_origin"), + ): np.array([4, 10, 18]), + }, ), ( - # in_dicts - [ - # modules - { - (sm('data', 'init'), sm('group_0', 'g_origin', True), sm('module_0', 'm_origin'),): lambda x, - y: x + y, - (sm('data', 'init'), sm('group_1', 'g_origin', True), sm('module_1', 'm_origin'),): lambda x, - y: x * y, - }, - # data_dict - { - (sm('data', 'init2'), sm('group_0', 'g_origin', True)): [np.array([1, 2, 3]), - np.array([4, 5, 6])], - (sm('data', 'init2'), sm('group_1', 'g_origin', True)): [np.array([1, 2, 3]), - np.array([4, 5, 6])], - } - ], - # out_dict + # in_dicts + [ + # modules + { + ( + sm("data", "init"), + sm("group_0", "g_origin", True), + sm("module_0", "m_origin"), + ): lambda x, y: x + + y, + ( + sm("data", "init"), + sm("group_1", "g_origin", True), + sm("module_1", "m_origin"), + ): lambda x, y: x + * y, + }, + # data_dict { - (sm('data', 'init2'), sm('group_0', 'g_origin', True), sm('data', 'init'), - sm('module_0', 'm_origin')): np.array([5, 7, 9]), - (sm('data', 'init2'), sm('group_1', 'g_origin', True), sm('data', 'init'), - sm('module_1', 'm_origin')): np.array([4, 10, 18]), + (sm("data", "init2"), sm("group_0", "g_origin", True)): [ + np.array([1, 2, 3]), + np.array([4, 5, 6]), + ], + (sm("data", "init2"), sm("group_1", "g_origin", True)): [ + np.array([1, 2, 3]), + np.array([4, 5, 6]), + ], }, + ], + # out_dict + { + ( + sm("data", "init2"), + sm("group_0", "g_origin", True), + sm("data", "init"), + sm("module_0", "m_origin"), + ): np.array([5, 7, 9]), + ( + sm("data", "init2"), + sm("group_1", "g_origin", True), + sm("data", "init"), + sm("module_1", "m_origin"), + ): np.array([4, 10, 18]), + }, ), - ] + ], ) class TestApplyVfuncs: - def test_apply_vfuncs(self, in_dicts, out_dict): result_dict = apply_vfuncs(*in_dicts) assert_equal(result_dict, out_dict) -class TestUtils: +class TestUtils: def test_to_list(self): - assert to_list((['x1', 'x2', 'x3'], ['y1', 'y2', 'y3'])) == [['x1', 'y1'], ['x2', 'y2'], ['x3', 'y3']] - assert to_list((['x1'], ['y1'])) == [['x1', 'y1']] - assert to_list((['x1', 'x2', 'x3'],)) == [['x1'], ['x2'], ['x3']] - assert to_list(('x1',)) == [['x1']] - assert to_list(('x1', 'y1')) == [['x1', 'y1']] - assert to_list(('x1', 'x2', 'x3', 'y1', 'y2', 'y3')) == [['x1', 'y1'], ['x2', 'y2'], ['x3', 'y3']] + assert to_list((["x1", "x2", "x3"], ["y1", "y2", "y3"])) == [ + ["x1", "y1"], + ["x2", "y2"], + ["x3", "y3"], + ] + assert to_list((["x1"], ["y1"])) == [["x1", "y1"]] + assert to_list((["x1", "x2", "x3"],)) == [["x1"], ["x2"], ["x3"]] + assert to_list(("x1",)) == [["x1"]] + assert to_list(("x1", "y1")) == [["x1", "y1"]] + assert to_list(("x1", "x2", "x3", "y1", "y2", "y3")) == [ + ["x1", "y1"], + ["x2", "y2"], + ["x3", "y3"], + ] with pytest.raises(ValueError): - to_list(('x1', 'x2', 'x3', 'y1', 'y2')) + to_list(("x1", "x2", "x3", "y1", "y2")) def test_dict_to_df(self): - in_dict_1 = {(sm('X_train', 'init'), sm('feat_extract_0', 'feat_extract'), - sm('y_train', 'init'), sm('DT', 'modeling'), sm('acc', 'metrics')): 0.9, - (sm('X_train', 'init'), sm('feat_extract_1', 'feat_extract'), - sm('y_train', 'init'), sm('DT', 'modeling'), sm('acc', 'metrics')): 0.95} - out_df_1 = pd.DataFrame(data={'init-feat_extract': ['X_train', 'X_train'], - 'feat_extract': ['feat_extract_0', 'feat_extract_1'], - 'init-modeling': ['y_train', 'y_train'], - 'modeling': ['DT', 'DT'], - 'metrics': ['acc', 'acc'], - 'out': [0.9, 0.95]}) - in_dict_2 = {(sm('X_train', 'init'), sm('sample_0', 'sample'), sm('y_train', 'init'), - sm(('k=10', 'e=1e-3'), 'modeling'), sm('s_0', 'stability')): 0.333, - (sm('X_train', 'init'), sm('sample_0', 'sample'), sm('y_train', 'init'), - sm(('k=10', 'e=1e-5'), 'modeling'), sm('s_0', 'stability')): 0.452} - out_df_2 = pd.DataFrame(data={'init-sample': ['X_train', 'X_train'], - 'sample': ['sample_0', 'sample_0'], - 'init-modeling': ['y_train', 'y_train'], - 'k-modeling': ['10', '10'], - 'e-modeling': ['1e-3', '1e-5'], - 'stability': ['s_0', 's_0'], - 'out': [0.333, 0.452]}) - in_dict_3 = {(sm('X_train', 'init'), sm('sample_0', 'sample'), sm('y_train', 'init'), - sm(('k=10', 'e=1e-3'), 'modeling'), sm('s_0', 'stability')): [0.333, 0.222], - (sm('X_train', 'init'), sm('sample_0', 'sample'), sm('y_train', 'init'), - sm(('k=10', 'e=1e-5'), 'modeling'), sm('s_0', 'stability')): [0.452, 0.322]} - out_df_3 = pd.DataFrame(data={'init-sample': ['X_train', 'X_train'], - 'sample': ['sample_0', 'sample_0'], - 'init-modeling': ['y_train', 'y_train'], - 'modeling': [('k=10', 'e=1e-3'), ('k=10', 'e=1e-5')], - 'stability': ['s_0', 's_0'], - 'out': [[0.333, 0.222], [0.452, 0.322]], - 'out-0': [0.333, 0.452], - 'out-1': [0.222, 0.322]}) - in_dict_4 = {(sm('X_train', 'init'), sm('sample_0', 'sample'), sm('y_train', 'init'), - sm(('k=10', 'e=1e-3'), 'modeling'), sm('s_0', 'stability')): {'k1': 0.333, 'k2': 0.222}, - (sm('X_train', 'init'), sm('sample_0', 'sample'), sm('y_train', 'init'), - sm(('k=10', 'e=1e-5'), 'modeling'), sm('s_0', 'stability')): {'k1': 0.452, 'k2': 0.322}} - out_df_4 = pd.DataFrame(data={'init-sample': ['X_train', 'X_train'], - 'sample': ['sample_0', 'sample_0'], - 'init-modeling': ['y_train', 'y_train'], - 'modeling': [('k=10', 'e=1e-3'), ('k=10', 'e=1e-5')], - 'stability': ['s_0', 's_0'], - 'out': [{'k1': 0.333, 'k2': 0.222}, {'k1': 0.452, 'k2': 0.322}], - 'out-k1': [0.333, 0.452], - 'out-k2': [0.222, 0.322]}) + in_dict_1 = { + ( + sm("X_train", "init"), + sm("feat_extract_0", "feat_extract"), + sm("y_train", "init"), + sm("DT", "modeling"), + sm("acc", "metrics"), + ): 0.9, + ( + sm("X_train", "init"), + sm("feat_extract_1", "feat_extract"), + sm("y_train", "init"), + sm("DT", "modeling"), + sm("acc", "metrics"), + ): 0.95, + } + out_df_1 = pd.DataFrame( + data={ + "init-feat_extract": ["X_train", "X_train"], + "feat_extract": ["feat_extract_0", "feat_extract_1"], + "init-modeling": ["y_train", "y_train"], + "modeling": ["DT", "DT"], + "metrics": ["acc", "acc"], + "out": [0.9, 0.95], + } + ) + in_dict_2 = { + ( + sm("X_train", "init"), + sm("sample_0", "sample"), + sm("y_train", "init"), + sm(("k=10", "e=1e-3"), "modeling"), + sm("s_0", "stability"), + ): 0.333, + ( + sm("X_train", "init"), + sm("sample_0", "sample"), + sm("y_train", "init"), + sm(("k=10", "e=1e-5"), "modeling"), + sm("s_0", "stability"), + ): 0.452, + } + out_df_2 = pd.DataFrame( + data={ + "init-sample": ["X_train", "X_train"], + "sample": ["sample_0", "sample_0"], + "init-modeling": ["y_train", "y_train"], + "k-modeling": ["10", "10"], + "e-modeling": ["1e-3", "1e-5"], + "stability": ["s_0", "s_0"], + "out": [0.333, 0.452], + } + ) + in_dict_3 = { + ( + sm("X_train", "init"), + sm("sample_0", "sample"), + sm("y_train", "init"), + sm(("k=10", "e=1e-3"), "modeling"), + sm("s_0", "stability"), + ): [0.333, 0.222], + ( + sm("X_train", "init"), + sm("sample_0", "sample"), + sm("y_train", "init"), + sm(("k=10", "e=1e-5"), "modeling"), + sm("s_0", "stability"), + ): [0.452, 0.322], + } + out_df_3 = pd.DataFrame( + data={ + "init-sample": ["X_train", "X_train"], + "sample": ["sample_0", "sample_0"], + "init-modeling": ["y_train", "y_train"], + "modeling": [("k=10", "e=1e-3"), ("k=10", "e=1e-5")], + "stability": ["s_0", "s_0"], + "out": [[0.333, 0.222], [0.452, 0.322]], + "out-0": [0.333, 0.452], + "out-1": [0.222, 0.322], + } + ) + in_dict_4 = { + ( + sm("X_train", "init"), + sm("sample_0", "sample"), + sm("y_train", "init"), + sm(("k=10", "e=1e-3"), "modeling"), + sm("s_0", "stability"), + ): {"k1": 0.333, "k2": 0.222}, + ( + sm("X_train", "init"), + sm("sample_0", "sample"), + sm("y_train", "init"), + sm(("k=10", "e=1e-5"), "modeling"), + sm("s_0", "stability"), + ): {"k1": 0.452, "k2": 0.322}, + } + out_df_4 = pd.DataFrame( + data={ + "init-sample": ["X_train", "X_train"], + "sample": ["sample_0", "sample_0"], + "init-modeling": ["y_train", "y_train"], + "modeling": [("k=10", "e=1e-3"), ("k=10", "e=1e-5")], + "stability": ["s_0", "s_0"], + "out": [{"k1": 0.333, "k2": 0.222}, {"k1": 0.452, "k2": 0.322}], + "out-k1": [0.333, 0.452], + "out-k2": [0.222, 0.322], + } + ) assert dict_to_df(in_dict_1).equals(out_df_1) - assert dict_to_df(in_dict_2, param_key='modeling').equals(out_df_2) - assert dict_to_df(in_dict_3, param_key='out').equals(out_df_3) - assert dict_to_df(in_dict_4, param_key='out').equals(out_df_4) + assert dict_to_df(in_dict_2, param_key="modeling").equals(out_df_2) + assert dict_to_df(in_dict_3, param_key="out").equals(out_df_3) + assert dict_to_df(in_dict_4, param_key="out").equals(out_df_4) def test_perturbation_stats(self): - in_dict = {(sm('X_train', 'init'), sm('feat_extract_0', 'feat_extract'), - sm('y_train', 'init'), sm('DT', 'modeling'), sm('feat_imp', 'metrics')): 0.455, - (sm('X_train', 'init'), sm('feat_extract_0', 'feat_extract'), - sm('y_train', 'init'), sm('LR', 'modeling'), sm('feat_imp', 'metrics')): 0.522, - (sm('X_train', 'init'), sm('feat_extract_1', 'feat_extract'), - sm('y_train', 'init'), sm('DT', 'modeling'), sm('feat_imp', 'metrics')): 0.76, - (sm('X_train', 'init'), sm('feat_extract_1', 'feat_extract'), - sm('y_train', 'init'), sm('LR', 'modeling'), sm('feat_imp', 'metrics')): 0.95} + in_dict = { + ( + sm("X_train", "init"), + sm("feat_extract_0", "feat_extract"), + sm("y_train", "init"), + sm("DT", "modeling"), + sm("feat_imp", "metrics"), + ): 0.455, + ( + sm("X_train", "init"), + sm("feat_extract_0", "feat_extract"), + sm("y_train", "init"), + sm("LR", "modeling"), + sm("feat_imp", "metrics"), + ): 0.522, + ( + sm("X_train", "init"), + sm("feat_extract_1", "feat_extract"), + sm("y_train", "init"), + sm("DT", "modeling"), + sm("feat_imp", "metrics"), + ): 0.76, + ( + sm("X_train", "init"), + sm("feat_extract_1", "feat_extract"), + sm("y_train", "init"), + sm("LR", "modeling"), + sm("feat_imp", "metrics"), + ): 0.95, + } df = dict_to_df(in_dict) - stats = perturbation_stats(df, 'feat_extract') - cols = ['feat_extract', 'count', 'mean', 'std'] + stats = perturbation_stats(df, "feat_extract") + cols = ["feat_extract", "count", "mean", "std"] assert all(c in cols for c in stats.columns) - assert round(stats.loc[0]['mean'], 4) == 0.4885 - assert round(stats.loc[1]['std'], 6) == 0.134350 + assert round(stats.loc[0]["mean"], 4) == 0.4885 + assert round(stats.loc[1]["std"], 6) == 0.134350 - in_dict = {(sm('X_train', 'init'), sm('feat_extract_0', 'feat_extract'), - sm('y_train', 'init'), sm('DT', 'modeling'), sm('feat_imp', 'metrics')): [0.6, 0.3, 0.4], - (sm('X_train', 'init'), sm('feat_extract_0', 'feat_extract'), - sm('y_train', 'init'), sm('LR', 'modeling'), sm('feat_imp', 'metrics')): [0.94, 0.33, 0.24], - (sm('X_train', 'init'), sm('feat_extract_1', 'feat_extract'), - sm('y_train', 'init'), sm('DT', 'modeling'), sm('feat_imp', 'metrics')): [0.26, 0.31, 0.47], - (sm('X_train', 'init'), sm('feat_extract_1', 'feat_extract'), - sm('y_train', 'init'), sm('LR', 'modeling'), sm('feat_imp', 'metrics')): [0.76, 0.883, 0.354]} + in_dict = { + ( + sm("X_train", "init"), + sm("feat_extract_0", "feat_extract"), + sm("y_train", "init"), + sm("DT", "modeling"), + sm("feat_imp", "metrics"), + ): [0.6, 0.3, 0.4], + ( + sm("X_train", "init"), + sm("feat_extract_0", "feat_extract"), + sm("y_train", "init"), + sm("LR", "modeling"), + sm("feat_imp", "metrics"), + ): [0.94, 0.33, 0.24], + ( + sm("X_train", "init"), + sm("feat_extract_1", "feat_extract"), + sm("y_train", "init"), + sm("DT", "modeling"), + sm("feat_imp", "metrics"), + ): [0.26, 0.31, 0.47], + ( + sm("X_train", "init"), + sm("feat_extract_1", "feat_extract"), + sm("y_train", "init"), + sm("LR", "modeling"), + sm("feat_imp", "metrics"), + ): [0.76, 0.883, 0.354], + } df = dict_to_df(in_dict) - stats = perturbation_stats(df, 'feat_extract', prefix='o', split=True) + stats = perturbation_stats(df, "feat_extract", prefix="o", split=True) assert len(stats.columns) == 8 - assert stats.columns[1] == 'o-count' - assert stats.columns[-1] == 'o2-std' - assert stats.loc[1]['o2-std'] == 0.0820243866176395 + assert stats.columns[1] == "o-count" + assert stats.columns[-1] == "o2-std" + assert stats.loc[1]["o2-std"] == 0.0820243866176395 diff --git a/vflow/__init__.py b/vflow/__init__.py index 6ad3911..58822ee 100644 --- a/vflow/__init__.py +++ b/vflow/__init__.py @@ -1,8 +1,60 @@ """ .. include:: ../README.md """ -from .vfunc import * -from .vset import * -from .pipeline import * -from .utils import * -from .helpers import * + +from .helpers import ( + build_vset, + cum_acc_by_uncertainty, + filter_vset_by_metric, + init_args, +) +from .pipeline import PCSPipeline, build_graph +from .subkey import Subkey +from .utils import ( + apply_vfuncs, + base_dict, + combine_dicts, + combine_keys, + dict_data, + dict_keys, + dict_to_df, + init_step, + perturbation_stats, + sep_dicts, + to_list, + to_tuple, +) +from .vfunc import AsyncVfunc, Vfunc, VfuncPromise +from .vset import Vset + +__all__ = [ + # vflow.helpers + "init_args", + "build_vset", + "filter_vset_by_metric", + "cum_acc_by_uncertainty", + # vflow.pipeline + "PCSPipeline", + "build_graph", + # vflow.subkey + "Subkey", + # vflow.utils + "apply_vfuncs", + "base_dict", + "combine_dicts", + "combine_keys", + "dict_data", + "dict_keys", + "dict_to_df", + "init_step", + "perturbation_stats", + "sep_dicts", + "to_list", + "to_tuple", + # vflow.vfunc + "Vfunc", + "AsyncVfunc", + "VfuncPromise", + # vflow.vset + "Vset", +] diff --git a/vflow/helpers.py b/vflow/helpers.py index b8c1e70..9364c37 100644 --- a/vflow/helpers.py +++ b/vflow/helpers.py @@ -1,5 +1,6 @@ """User-facing helper functions included at import vflow """ + from functools import partial from itertools import product from typing import Union @@ -7,9 +8,9 @@ import mlflow import numpy as np -from vflow.utils import dict_to_df, dict_keys, dict_data +from vflow.utils import dict_data, dict_keys, dict_to_df from vflow.vfunc import Vfunc -from vflow.vset import Vset, Subkey, PREV_KEY, FILTER_PREV_KEY +from vflow.vset import FILTER_PREV_KEY, PREV_KEY, Subkey, Vset def init_args(args_tuple: Union[tuple, list], names=None): @@ -21,28 +22,40 @@ def init_args(args_tuple: Union[tuple, list], names=None): given names for each of the arguments in the tuple """ if names is None: - names = ['start'] * len(args_tuple) + names = ["start"] * len(args_tuple) else: - assert len(names) == len(args_tuple), 'names should be same length as args_tuple' + assert len(names) == len( + args_tuple + ), "names should be same length as args_tuple" output_dicts = [] for i, _ in enumerate(args_tuple): - output_dicts.append({ - (Subkey(names[i], 'init'),): args_tuple[i], - PREV_KEY: ('init',), - }) + output_dicts.append( + { + (Subkey(names[i], "init"),): args_tuple[i], + PREV_KEY: ("init",), + } + ) return output_dicts -def build_vset(name: str, func, param_dict=None, reps: int = 1, - is_async: bool = False, output_matching: bool = False, - lazy: bool = False, cache_dir: str = None, - tracking_dir: str = None, **kwargs) -> Vset: +def build_vset( + name: str, + func, + param_dict=None, + reps: int = 1, + is_async: bool = False, + output_matching: bool = False, + lazy: bool = False, + cache_dir: str = None, + tracking_dir: str = None, + **kwargs, +) -> Vset: """Builds a new Vset by currying or instantiating callable `func` with all combinations of parameters in `param_dict` and optional additional `**kwargs`. - If `func` and `param_dict` are lists, then the ith entry of `func` will be - curried with ith entry of `param_dict`. If only one of `func` or `param_dict` + If `func` and `param_dict` are lists, then the ith entry of `func` will be + curried with ith entry of `param_dict`. If only one of `func` or `param_dict` is a list, the same `func`/`param_dict` will be curried for all entries in the - list. Vfuncs are named with `param_dict` items as tuples of + list. Vfuncs are named with `param_dict` items as tuples of str("param_name=param_val"). Parameters @@ -53,7 +66,7 @@ def build_vset(name: str, func, param_dict=None, reps: int = 1, A callable to use as the base for Vfuncs in the output Vset. Can also be a class object, in which case the class is immediately instantiated with the parameter combinations from `param_dict`. Can also be a list of - callables, where the ith entry corresponds to `param_dict` or the ith + callables, where the ith entry corresponds to `param_dict` or the ith entry of `param_dict` (if `param_dict` is a list). param_dict : dict[str, list] or list[dict[str, list]], optional A dict with string keys corresponding to argument names of `func` and @@ -88,8 +101,9 @@ def build_vset(name: str, func, param_dict=None, reps: int = 1, pd_list = [] if isinstance(func, list): if isinstance(param_dict, list): - assert len(param_dict) == len(func), \ - 'list of param_dicts must be same length as list of funcs' + assert len(param_dict) == len( + func + ), "list of param_dicts must be same length as list of funcs" f_list.extend(func) pd_list.extend(param_dict) else: @@ -101,29 +115,29 @@ def build_vset(name: str, func, param_dict=None, reps: int = 1, else: f_list.append(func) pd_list.append(param_dict) - + vfuncs = [] vkeys = [] for f, pd in zip(f_list, pd_list): if pd is None: pd = {} - assert callable(f), 'func must be callable' - + assert callable(f), "func must be callable" + kwargs_tuples = product(*list(pd.values())) for tup in kwargs_tuples: kwargs_dict = {} - vkey_tup = (f'func={f.__name__}', ) + vkey_tup = (f"func={f.__name__}",) for param_name, param_val in zip(list(pd.keys()), tup): kwargs_dict[param_name] = param_val - vkey_tup += (f'{param_name}={param_val}', ) + vkey_tup += (f"{param_name}={param_val}",) # add additional fixed kwargs to kwargs_dict for k, v in kwargs.items(): kwargs_dict[k] = v for i in range(reps): # add vfunc key to vkeys if reps > 1: - vkeys.append((f'rep={i}', ) + vkey_tup) + vkeys.append((f"rep={i}",) + vkey_tup) else: vkeys.append(vkey_tup) # check if func is a class @@ -132,18 +146,33 @@ def build_vset(name: str, func, param_dict=None, reps: int = 1, vfuncs.append(Vfunc(vfunc=f(**kwargs_dict), name=str(vkey_tup))) else: # use partial to wrap func - vfuncs.append(Vfunc(vfunc=partial(f, **kwargs_dict), name=str(vkey_tup))) + vfuncs.append( + Vfunc(vfunc=partial(f, **kwargs_dict), name=str(vkey_tup)) + ) if all(pd is None for pd in pd_list) and reps == 1: vkeys = None - - return Vset(name, vfuncs, is_async=is_async, vfunc_keys=vkeys, - output_matching=output_matching, lazy=lazy, - cache_dir=cache_dir, tracking_dir=tracking_dir) + + return Vset( + name, + vfuncs, + is_async=is_async, + vfunc_keys=vkeys, + output_matching=output_matching, + lazy=lazy, + cache_dir=cache_dir, + tracking_dir=tracking_dir, + ) -def filter_vset_by_metric(metric_dict: dict, vset: Vset, *vsets: Vset, n_keep: int = 1, - bigger_is_better: bool = True, filter_on=None, - group: bool = False) -> Union[Vset, list]: +def filter_vset_by_metric( + metric_dict: dict, + vset: Vset, + *vsets: Vset, + n_keep: int = 1, + bigger_is_better: bool = True, + filter_on=None, + group: bool = False, +) -> Union[Vset, list]: """Returns a new Vset by filtering `vset.vfuncs` based on values in filter_dict. Parameters @@ -178,28 +207,45 @@ def filter_vset_by_metric(metric_dict: dict, vset: Vset, *vsets: Vset, n_keep: i vset_names = [] for vset_i in vsets: if vset_i.name not in df.columns: - raise ValueError((f'{vset_i.name} should be one ' - 'of the columns of dict_to_df(metric_dict)')) + raise ValueError( + ( + f"{vset_i.name} should be one " + "of the columns of dict_to_df(metric_dict)" + ) + ) vset_names.append(vset_i.name) if len(filter_on) > 0: filter_col = list(metric_dict.keys())[0][-1].origin df = df[df[filter_col].isin(filter_on)] if group: - df = df.groupby(by=vset_names, as_index=False).mean() + df = df.groupby(by=vset_names, as_index=False).mean(numeric_only=True) if bigger_is_better: - df = df.sort_values(by='out', ascending=False) + df = df.sort_values(by="out", ascending=False) else: - df = df.sort_values(by='out') + df = df.sort_values(by="out") df = df.iloc[0:n_keep] for i, vset_i in enumerate(vsets): vfuncs = vset_i.vfuncs vfunc_filter = [str(name) for name in df[vset_i.name].to_numpy()] new_vfuncs = {k: v for k, v in vfuncs.items() if str(v.name) in vfunc_filter} tracking_dir = None if vset_i._mlflow is None else mlflow.get_tracking_uri() - new_vset = Vset('filtered_' + vset_i.name, new_vfuncs, is_async=vset_i._async, - output_matching=vset_i._output_matching, lazy=vset_i._lazy, - cache_dir=vset_i._cache_dir, tracking_dir=tracking_dir) - setattr(new_vset, FILTER_PREV_KEY, (metric_dict[PREV_KEY], vset_i,)) + new_vset = Vset( + "filtered_" + vset_i.name, + new_vfuncs, + is_async=vset_i._async, + output_matching=vset_i._output_matching, + lazy=vset_i._lazy, + cache_dir=vset_i._cache_dir, + tracking_dir=tracking_dir, + ) + setattr( + new_vset, + FILTER_PREV_KEY, + ( + metric_dict[PREV_KEY], + vset_i, + ), + ) setattr(new_vset, PREV_KEY, getattr(new_vset, FILTER_PREV_KEY)) vsets[i] = new_vset if len(vsets) == 1: @@ -221,20 +267,26 @@ def cum_acc_by_uncertainty(mean_preds, std_preds, true_labels): TODO: generalize to multi-class classification """ - assert dict_keys(mean_preds) == dict_keys(std_preds), \ - "mean_preds and std_preds must share the same keys" + assert dict_keys(mean_preds) == dict_keys( + std_preds + ), "mean_preds and std_preds must share the same keys" # match predictions on keys - paired_preds = [[d[k] for d in (mean_preds, std_preds)] for k in dict_keys(mean_preds)] - mean_preds, std_preds = (np.array(p)[:,:,1] for p in zip(*paired_preds)) + paired_preds = [ + [d[k] for d in (mean_preds, std_preds)] for k in dict_keys(mean_preds) + ] + mean_preds, std_preds = (np.array(p)[:, :, 1] for p in zip(*paired_preds)) if isinstance(true_labels, dict): true_labels = dict_data(true_labels) - assert len(true_labels) == 1, 'true_labels should have a single 1D vector entry' + assert len(true_labels) == 1, "true_labels should have a single 1D vector entry" true_labels = true_labels[0] n_obs = len(mean_preds[0]) - assert len(true_labels) == n_obs, \ - f'true_labels has {len(true_labels)} obs. but should have same as predictions ({n_obs})' + assert ( + len(true_labels) == n_obs + ), f"true_labels has {len(true_labels)} obs. but should have same as predictions ({n_obs})" sorted_idx = np.argsort(std_preds, axis=1) - correct_labels = np.take_along_axis(np.around(mean_preds) - true_labels == 0, sorted_idx, 1) + correct_labels = np.take_along_axis( + np.around(mean_preds) - true_labels == 0, sorted_idx, 1 + ) uncertainty = np.take_along_axis(std_preds, sorted_idx, 1) - cum_acc = np.cumsum(correct_labels, axis=1) / range(1, n_obs+1) + cum_acc = np.cumsum(correct_labels, axis=1) / range(1, n_obs + 1) return uncertainty, cum_acc, sorted_idx diff --git a/vflow/pipeline.py b/vflow/pipeline.py index 352fddf..04d6632 100644 --- a/vflow/pipeline.py +++ b/vflow/pipeline.py @@ -1,5 +1,6 @@ """Class that stores the entire pipeline of steps in a data-science workflow """ + import itertools import joblib @@ -27,21 +28,19 @@ def __init__(self, steps=None, cache_dir=None): self.memory = joblib.Memory(location=cache_dir) def run(self, *args, **kwargs): - """Runs the pipeline - """ + """Runs the pipeline""" run_step_cached = self.memory.cache(_run_step) for i, step in enumerate(self.steps): try: step_name = step.name except AttributeError: - step_name = f'Step {i}' + step_name = f"Step {i}" print(step_name) _, fitted_step = run_step_cached(step, *args, **kwargs) self.steps[i] = fitted_step def __getitem__(self, i): - """Accesses ith step of pipeline - """ + """Accesses ith step of pipeline""" return self.steps[i] def __len__(self): @@ -51,13 +50,13 @@ def generate_names(self, as_pandas=True): name_lists = [] if as_pandas: for step in self.steps: - name_lists.append([f'{i}_{str(mod)[:8]}' - for i, mod in enumerate(step)]) + name_lists.append([f"{i}_{str(mod)[:8]}" for i, mod in enumerate(step)]) indexes = list(itertools.product(*name_lists)) return pd.DataFrame(indexes, columns=[step.name for step in self.steps]) for step in self.steps: - name_lists.append([f'{step.name}_{i}_{str(mod)[:8]}' - for i, mod in enumerate(step)]) + name_lists.append( + [f"{step.name}_{i}_{str(mod)[:8]}" for i, mod in enumerate(step)] + ) return list(itertools.product(*name_lists)) @@ -85,7 +84,7 @@ def unnest_node(node): unnested_node: str, Vset, or None """ node_type = type(node) - if node_type is str or 'Vset' in str(node_type): + if node_type is str or "Vset" in str(node_type): return node if node_type is tuple: return unnest_node(node[0]) @@ -109,7 +108,7 @@ def build_graph_recur(node, G): # initial case: starting at dict if isinstance(node, dict): - s_node = 'End' + s_node = "End" nodes_prev = node[PREV_KEY] G.add_edge(nodes_prev[0], s_node) for node_prev in nodes_prev[1:]: @@ -118,7 +117,7 @@ def build_graph_recur(node, G): return G # main case: at a vfuncset - if 'Vset' in str(type(node)): + if "Vset" in str(type(node)): if hasattr(node, PREV_KEY): nodes_prev = getattr(node, PREV_KEY) for node_prev in nodes_prev: @@ -140,7 +139,7 @@ def build_graph_recur(node, G): G = nx.DiGraph() G = build_graph_recur(node, G) if draw: - nx.draw(G, with_labels=True, node_color='#CCCCCC') + nx.draw(G, with_labels=True, node_color="#CCCCCC") return G diff --git a/vflow/subkey.py b/vflow/subkey.py index bc91a49..9467d35 100644 --- a/vflow/subkey.py +++ b/vflow/subkey.py @@ -1,7 +1,8 @@ """Defines a parameter from some origin Vset """ -class Subkey: + +class Subkey: def __init__(self, value, origin: str, output_matching: bool = False): """ Parameters @@ -21,16 +22,16 @@ def __init__(self, value, origin: str, output_matching: bool = False): self.sep_dicts_id = None def is_matching(self): - """Checks if subkey should be matched in other Vsets - """ + """Checks if subkey should be matched in other Vsets""" return self.output_matching or self.sep_dicts_id is not None def matches_sep_dict_id(self, other: object): - """Helper to match Subkey by _sep_dict_id - """ + """Helper to match Subkey by _sep_dict_id""" if isinstance(other, self.__class__): - return self.sep_dicts_id is not None \ - and self.sep_dicts_id == other.sep_dicts_id + return ( + self.sep_dicts_id is not None + and self.sep_dicts_id == other.sep_dicts_id + ) return False def matches(self, other: object): @@ -44,8 +45,9 @@ def matches(self, other: object): # value and origins match cond1 = self.value == other.value and self.origin == other.origin # sep_dicts_id matches - cond2 = self.sep_dicts_id == other.sep_dicts_id or \ - (self.output_matching and other.output_matching) + cond2 = self.sep_dicts_id == other.sep_dicts_id or ( + self.output_matching and other.output_matching + ) return cond0 and cond1 and cond2 return False @@ -68,8 +70,7 @@ def mismatches(self, other: object): return True def __eq__(self, other: object): - """Mainly used for testing purposes. - """ + """Mainly used for testing purposes.""" if isinstance(other, self.__class__): # value and origins match return self.value == other.value and self.origin == other.origin @@ -79,6 +80,5 @@ def __repr__(self): return str(self.value) def __hash__(self): - """Mainly used for testing purposes. - """ + """Mainly used for testing purposes.""" return hash(self.value) ^ hash(self.origin) ^ hash(self.output_matching) diff --git a/vflow/utils.py b/vflow/utils.py index 606b68c..3c425cd 100644 --- a/vflow/utils.py +++ b/vflow/utils.py @@ -1,27 +1,19 @@ """Useful functions for converting between different types (dicts, lists, tuples, etc.) """ + from copy import deepcopy from typing import Union from uuid import uuid4 import numpy as np import pandas as pd - import ray from ray.remote_function import RemoteFunction as RayRemoteFun from vflow.subkey import Subkey from vflow.vfunc import VfuncPromise - -PREV_KEY = '__prev__' - -def s(x): - """Gets shape of a list/tuple/ndarray - """ - if type(x) in [list, tuple]: - return len(x) - return x.shape +PREV_KEY = "__prev__" def init_step(idx, cols): @@ -36,26 +28,23 @@ def init_step(idx, cols): List of column names. """ for i in range(idx, len(cols)): - if cols[i] != 'init': - return 'init-' + cols[i] + if cols[i] != "init": + return "init-" + cols[i] return None def base_dict(d: dict): - """Remove PREV_KEY from dict d if present - """ - return {k:v for k,v in d.items() if k != PREV_KEY} + """Remove PREV_KEY from dict d if present""" + return {k: v for k, v in d.items() if k != PREV_KEY} def dict_data(d: dict): - """Returns a list containing all data in dict d - """ + """Returns a list containing all data in dict d""" return list(base_dict(d).values()) def dict_keys(d: dict): - """Returns a list containing all keys in dict d - """ + """Returns a list containing all keys in dict d""" return list(base_dict(d).keys()) @@ -129,10 +118,12 @@ def to_list(tup: tuple): if n_tup == 1: return [list(tup)] if n_tup % 2 != 0: - raise ValueError('Don\'t know how to handle uneven number of args ' - 'without a list. Please wrap your args in a list.') + raise ValueError( + "Don't know how to handle uneven number of args " + "without a list. Please wrap your args in a list." + ) # assume first half of args is input and second half is outcome - return [list(el) for el in zip(tup[:(n_tup // 2)], tup[(n_tup // 2):])] + return [list(el) for el in zip(tup[: (n_tup // 2)], tup[(n_tup // 2) :])] if n_tup == 1: return [[x] for x in tup[0]] n_mods = len(tup[0]) @@ -170,7 +161,7 @@ def sep_dicts(d: dict, n_out: int = 1, keys=None): if keys is None: keys = [] if len(keys) > 0 and len(keys) != n_out: - raise ValueError(f'keys should be empty or have length n_out={n_out}') + raise ValueError(f"keys should be empty or have length n_out={n_out}") # empty dict -- return empty dict if n_out <= 1: return d @@ -184,7 +175,9 @@ def sep_dicts(d: dict, n_out: int = 1, keys=None): if len(keys) == 0: new_key = (key[i],) + key[n_out:] else: - new_sub = Subkey(value=keys[i], origin=key[-1].origin + '-' + str(i)) + new_sub = Subkey( + value=keys[i], origin=key[-1].origin + "-" + str(i) + ) new_key = (new_sub,) + key new_key[-1].sep_dicts_id = sep_dicts_id if isinstance(value, VfuncPromise): @@ -197,6 +190,7 @@ def sep_dicts(d: dict, n_out: int = 1, keys=None): return sep_dicts_list + def dict_to_df(d: dict, param_key=None): """Converts a dictionary with tuple keys into a pandas DataFrame, optionally seperating @@ -219,31 +213,47 @@ def dict_to_df(d: dict, param_key=None): if len(d_copy.keys()) > 0: key_list = list(d.keys()) subkey_list = key_list[0] if key_list[0] != PREV_KEY else key_list[1] - cols = [sk.origin for sk in subkey_list] + ['out'] + cols = [sk.origin for sk in subkey_list] + ["out"] # set each init col to init-{next_vfunc_set} - cols = [c if c != 'init' else init_step(idx, cols) for idx, c in enumerate(cols)] - df.set_axis(cols, axis=1, inplace=True) + cols = [ + c if c != "init" else init_step(idx, cols) for idx, c in enumerate(cols) + ] + df = df.set_axis(cols, axis=1) if param_key: - param_keys = df[param_key].tolist() # pylint: disable=unsubscriptable-object - if param_key == 'out' and hasattr(param_keys[0], '__iter__'): + param_keys = df[ + param_key + ].tolist() # pylint: disable=unsubscriptable-object + if param_key == "out" and hasattr(param_keys[0], "__iter__"): param_df = pd.DataFrame(param_keys) - param_df.columns = [f'{param_key}-{col}' for col in param_df.columns] + param_df.columns = [f"{param_key}-{col}" for col in param_df.columns] df = df.join(param_df) else: param_loc = df.columns.get_loc(param_key) - param_key_cols = [f"{p.split('=')[0]}-{param_key}" for p in param_keys[0]] - param_keys = [[s.split('=')[1] for s in t] for t in param_keys] + param_key_cols = [ + f"{p.split('=')[0]}-{param_key}" for p in param_keys[0] + ] + param_keys = [[s.split("=")[1] for s in t] for t in param_keys] df = df.join(pd.DataFrame(param_keys)).drop(columns=param_key) - new_cols = df.columns[:len(cols)-1].tolist() + param_key_cols - df.set_axis(new_cols, axis=1, inplace=True) + new_cols = df.columns[: len(cols) - 1].tolist() + param_key_cols + df = df.set_axis(new_cols, axis=1) new_idx = list(range(len(new_cols))) - new_idx = new_idx[:param_loc] + new_idx[len(cols)-1:] + new_idx[param_loc:len(cols)-1] + new_idx = ( + new_idx[:param_loc] + + new_idx[len(cols) - 1 :] + + new_idx[param_loc : len(cols) - 1] + ) df = df.iloc[:, new_idx] return df -def perturbation_stats(data: Union[pd.DataFrame, dict], *group_by: str, wrt: str = 'out', - func=None, prefix: str = None, split: bool = False): +def perturbation_stats( + data: Union[pd.DataFrame, dict], + *group_by: str, + wrt: str = "out", + func=None, + prefix: str = None, + split: bool = False, +): """Compute statistics for `wrt` in `data`, conditional on `group_by` Parameters @@ -275,7 +285,7 @@ def perturbation_stats(data: Union[pd.DataFrame, dict], *group_by: str, wrt: str A DataFrame with summary statistics on `wrt`. """ if func is None: - func = ['count', 'mean', 'std'] + func = ["count", "mean", "std"] if prefix is None: prefix = wrt if isinstance(data, dict): @@ -287,36 +297,45 @@ def perturbation_stats(data: Union[pd.DataFrame, dict], *group_by: str, wrt: str gb = df.groupby(group_by)[wrt] else: gb = df.groupby(lambda x: True)[wrt] - if (isinstance(func, list) and 'mean' in func or 'std' in func) and \ - (type(df[wrt].iloc[0]) in [list, np.ndarray]): - wrt_arrays = [np.stack(d.tolist()) for d in (gb.get_group(grp) for grp in gb.groups)] + if (isinstance(func, list) and "mean" in func or "std" in func) and ( + type(df[wrt].iloc[0]) in [list, np.ndarray] + ): + wrt_arrays = [ + np.stack(d.tolist()) for d in (gb.get_group(grp) for grp in gb.groups) + ] n_cols = wrt_arrays[0].shape[1] - df_out = pd.DataFrame(gb.agg('count')) - df_out.columns = [f'{prefix}-count'] - if 'mean' in func: + df_out = pd.DataFrame(gb.agg("count")) + df_out.columns = [f"{prefix}-count"] + if "mean" in func: if split: col_means = [arr.mean(axis=0) for arr in wrt_arrays] - wrt_means = pd.DataFrame(col_means, - columns=[f'{prefix}{i}-mean' for i in range(n_cols)], - index=gb.groups.keys()) + wrt_means = pd.DataFrame( + col_means, + columns=[f"{prefix}{i}-mean" for i in range(n_cols)], + index=gb.groups.keys(), + ) else: - col_means = [{f'{prefix}-mean': arr.mean(axis=0)} for arr in wrt_arrays] + col_means = [{f"{prefix}-mean": arr.mean(axis=0)} for arr in wrt_arrays] wrt_means = pd.DataFrame(col_means, index=gb.groups.keys()) wrt_means.index.names = df_out.index.names df_out = df_out.join(wrt_means) - if 'std' in func: + if "std" in func: if split: col_stds = [arr.std(axis=0, ddof=1) for arr in wrt_arrays] - wrt_stds = pd.DataFrame(col_stds, - columns=[f'{prefix}{i}-std' for i in range(n_cols)], - index=gb.groups.keys()) + wrt_stds = pd.DataFrame( + col_stds, + columns=[f"{prefix}{i}-std" for i in range(n_cols)], + index=gb.groups.keys(), + ) else: - col_stds = [{f'{prefix}-std': arr.std(axis=0, ddof=1)} for arr in wrt_arrays] + col_stds = [ + {f"{prefix}-std": arr.std(axis=0, ddof=1)} for arr in wrt_arrays + ] wrt_stds = pd.DataFrame(col_stds, index=gb.groups.keys()) wrt_stds.index.names = df_out.index.names df_out = df_out.join(wrt_stds) - if 'count' not in func: - df_out = df_out.drop(f'{prefix}-count') + if "count" not in func: + df_out = df_out.drop(f"{prefix}-count") else: df_out = gb.agg(func) df_out = df_out.reindex(sorted(df_out.columns), axis=1) @@ -367,7 +386,9 @@ def combine_keys(left_key, right_key): return () if len(matched_subkeys) > 0: # always filter on right key - filtered_key = tuple(subkey for subkey in right_key if subkey not in matched_subkeys) + filtered_key = tuple( + subkey for subkey in right_key if subkey not in matched_subkeys + ) combined_key = left_key + filtered_key return combined_key return left_key + right_key @@ -404,7 +425,6 @@ def combine_dicts(*args: dict, base_case=True): if n_args == 2: for k0 in args[0]: for k1 in args[1]: - if PREV_KEY in (k0, k1): continue @@ -421,7 +441,7 @@ def combine_dicts(*args: dict, base_case=True): return combine_dicts(combine_dicts(args[0], args[1]), *args[2:], base_case=False) -def apply_vfuncs(vfuncs: dict, data_dict: dict, lazy: bool=False): +def apply_vfuncs(vfuncs: dict, data_dict: dict, lazy: bool = False): """Apply a dictionary of functions `vfuncs` to each item of `data_dict`, optionally returning a dictionary of `vflow.vfunc.VfuncPromise` objects if `lazy` is True @@ -469,7 +489,9 @@ def apply_vfuncs(vfuncs: dict, data_dict: dict, lazy: bool=False): for i, data in enumerate(data_list): if isinstance(data, VfuncPromise): data_list[i] = data() - if isinstance(func, RayRemoteFun) and not isinstance(data_list[i], ray.ObjectRef): + if isinstance(func, RayRemoteFun) and not isinstance( + data_list[i], ray.ObjectRef + ): # send data to Ray's remote object store data_list[i] = ray.put(data_list[i]) elif isinstance(data_list[i], ray.ObjectRef): diff --git a/vflow/vfunc.py b/vflow/vfunc.py index 507b26a..42950fc 100644 --- a/vflow/vfunc.py +++ b/vflow/vfunc.py @@ -10,29 +10,27 @@ class Vfunc: If none of these is supported, it need only be a function """ - def __init__(self, name: str = '', vfunc=lambda x: x): - assert hasattr(vfunc, 'fit') or callable(vfunc), \ - 'vfunc must be an object with a fit method or a callable' + def __init__(self, name: str = "", vfunc=lambda x: x): + assert hasattr(vfunc, "fit") or callable( + vfunc + ), "vfunc must be an object with a fit method or a callable" self.name = name self.vfunc = vfunc def fit(self, *args, **kwargs): - """This function fits params for this vfunc - """ - if hasattr(self.vfunc, 'fit'): + """This function fits params for this vfunc""" + if hasattr(self.vfunc, "fit"): return self.vfunc.fit(*args, **kwargs) return self.vfunc(*args, **kwargs) def transform(self, *args, **kwargs): - """This function transforms its input in some way - """ - if hasattr(self.vfunc, 'transform'): + """This function transforms its input in some way""" + if hasattr(self.vfunc, "transform"): return self.vfunc.transform(*args, **kwargs) return self.vfunc(*args, **kwargs) def __call__(self, *args, **kwargs): - """This should decide what to call - """ + """This should decide what to call""" return self.fit(*args, **kwargs) @@ -42,41 +40,37 @@ def _remote_fun(vfunc, *args, **kwargs): class AsyncVfunc: - """An asynchronous version of the Vfunc class. - """ + """An asynchronous version of the Vfunc class.""" - def __init__(self, name: str = '', vfunc=lambda x: x): + def __init__(self, name: str = "", vfunc=lambda x: x): self.name = name if isinstance(vfunc, Vfunc): self.vfunc = vfunc.vfunc else: - assert hasattr(vfunc, 'fit') or callable(vfunc), \ - 'vfunc must be an object with a fit method or a callable' + assert hasattr(vfunc, "fit") or callable( + vfunc + ), "vfunc must be an object with a fit method or a callable" self.vfunc = vfunc def fit(self, *args, **kwargs): - """This function fits params for this vfunc - """ - if hasattr(self.vfunc, 'fit'): + """This function fits params for this vfunc""" + if hasattr(self.vfunc, "fit"): return _remote_fun.remote(self.vfunc.fit, *args, **kwargs) return _remote_fun.remote(self.vfunc, *args, **kwargs) def transform(self, *args, **kwargs): - """This function transforms its input in some way - """ - if hasattr(self.vfunc, 'transform'): + """This function transforms its input in some way""" + if hasattr(self.vfunc, "transform"): return _remote_fun.remote(self.vfunc.transform, *args, **kwargs) return _remote_fun.remote(self.vfunc, *args, **kwargs) def __call__(self, *args, **kwargs): - """This should decide what to call - """ + """This should decide what to call""" return self.fit(*args, **kwargs) class VfuncPromise: - """A Vfunc promise used for lazy evaluation. - """ + """A Vfunc promise used for lazy evaluation.""" def __init__(self, vfunc: callable, *args): self.vfunc = vfunc @@ -85,8 +79,7 @@ def __init__(self, vfunc: callable, *args): self.value = None def __call__(self): - """This should decide what to call - """ + """This should decide what to call""" if self.called: return self.value tmp_args = [] @@ -106,21 +99,18 @@ def _get_value(self): return self.value def transform(self, *args): - """This function transforms its input in some way - """ + """This function transforms its input in some way""" return self._get_value().transform(*args) def predict(self, *args): - """This function calls predict on its inputs - """ + """This function calls predict on its inputs""" return self._get_value().predict(*args) def predict_proba(self, *args): - """This function calls predict_proba on its inputs - """ + """This function calls predict_proba on its inputs""" return self._get_value().predict_proba(*args) def __repr__(self): if self.called: - return f'Fulfilled VfuncPromise({self.value})' - return f'Unfulfilled VfuncPromise(func={self.vfunc}, args={self.args})' + return f"Fulfilled VfuncPromise({self.value})" + return f"Unfulfilled VfuncPromise(func={self.vfunc}, args={self.args})" diff --git a/vflow/vset.py b/vflow/vset.py index bf3ed91..7e8884e 100644 --- a/vflow/vset.py +++ b/vflow/vset.py @@ -1,29 +1,40 @@ """Set of vfuncs to be parallelized over in a pipeline. Function arguments are each a list """ + from copy import deepcopy -import numpy as np import joblib +import numpy as np import ray - from mlflow.tracking import MlflowClient from vflow.subkey import Subkey -from vflow.utils import apply_vfuncs, combine_dicts, dict_to_df, perturbation_stats, sep_dicts, \ - PREV_KEY -from vflow.vfunc import Vfunc, AsyncVfunc - +from vflow.utils import ( + PREV_KEY, + apply_vfuncs, + combine_dicts, + dict_to_df, + perturbation_stats, + sep_dicts, +) +from vflow.vfunc import AsyncVfunc, Vfunc -FILTER_PREV_KEY = '__filter_prev__' +FILTER_PREV_KEY = "__filter_prev__" class Vset: - - def __init__(self, name: str, vfuncs, vfunc_keys: list = None, - is_async: bool = False, output_matching: bool = False, - lazy: bool = False, cache_dir: str = None, - tracking_dir: str = None): + def __init__( + self, + name: str, + vfuncs, + vfunc_keys: list = None, + is_async: bool = False, + output_matching: bool = False, + lazy: bool = False, + cache_dir: str = None, + tracking_dir: str = None, + ): """ Parameters ---------- @@ -73,13 +84,18 @@ def __init__(self, name: str, vfuncs, vfunc_keys: list = None, self.vfuncs = vfuncs elif isinstance(vfuncs, list): if vfunc_keys is not None: - assert isinstance(vfunc_keys, list), 'vfuncs passed as list but vfunc_keys is not a list' + assert isinstance( + vfunc_keys, list + ), "vfuncs passed as list but vfunc_keys is not a list" assert len(vfuncs) == len( - vfunc_keys), 'vfuncs list and vfunc_keys list do not have the same length' + vfunc_keys + ), "vfuncs list and vfunc_keys list do not have the same length" # TODO: how best to handle tuple subkeys? vfunc_keys = [(self.__create_subkey(k),) for k in vfunc_keys] else: - vfunc_keys = [(self.__create_subkey(f'{name}_{i}'),) for i in range(len(vfuncs))] + vfunc_keys = [ + (self.__create_subkey(f"{name}_{i}"),) for i in range(len(vfuncs)) + ] # convert vfunc keys to singleton tuples self.vfuncs = dict(zip(vfunc_keys, vfuncs)) # if needed, wrap the vfuncs in the Vfunc or AsyncVfunc class @@ -135,9 +151,7 @@ def _apply_func(self, *args, out_dict: dict = None): continue origins = np.array([subk.origin for subk in k]) # ignore init origins and the last origin (this Vset) - param_idx = [ - i for i in range(len(k[:-1])) if origins[i] != 'init' - ] + param_idx = [i for i in range(len(k[:-1])) if origins[i] != "init"] # get or create mlflow run run_dict_key = tuple(subk.value for subk in k[:-1]) if run_dict_key in run_dict: @@ -154,15 +168,12 @@ def _apply_func(self, *args, out_dict: dict = None): if np.sum(origins == param_name) > 1: occurence = np.sum(origins[:idx] == param_name) param_name = param_name + str(occurence) - self._mlflow.log_param( - run_id, param_name, subkey.value - ) + self._mlflow.log_param(run_id, param_name, subkey.value) self._mlflow.log_metric(run_id, k[-1].value, v) return out_dict def fit(self, *args): - """Fits to args using `_apply_func` - """ + """Fits to args using `_apply_func`""" out_dict = {} for k, v in self.vfuncs.items(): out_dict[k] = v.fit @@ -175,43 +186,45 @@ def fit(self, *args): return self def fit_transform(self, *args): - """Fits to args and transforms only the first arg. - """ + """Fits to args and transforms only the first arg.""" return self.fit(*args).transform(args[0]) def transform(self, *args): - """Transforms args using `_apply_func` - """ + """Transforms args using `_apply_func`""" if not self._fitted: - raise AttributeError('Please fit the Vset object before calling the transform method.') + raise AttributeError( + "Please fit the Vset object before calling the transform method." + ) out_dict = {} for k, v in self.fitted_vfuncs.items(): - if hasattr(v, 'transform'): + if hasattr(v, "transform"): out_dict[k] = v.transform return self._apply_func(*args, out_dict=out_dict) - def predict(self, *args, with_uncertainty: bool=False, group_by: list=None): - """Predicts args using `_apply_func` - """ + def predict(self, *args, with_uncertainty: bool = False, group_by: list = None): + """Predicts args using `_apply_func`""" if not self._fitted: - raise AttributeError('Please fit the Vset object before calling predict.') + raise AttributeError("Please fit the Vset object before calling predict.") pred_dict = {} for k, v in self.fitted_vfuncs.items(): - if hasattr(v, 'predict'): + if hasattr(v, "predict"): pred_dict[k] = v.predict preds = self._apply_func(*args, out_dict=pred_dict) if with_uncertainty: return prediction_uncertainty(preds, group_by) return preds - def predict_proba(self, *args, with_uncertainty: bool=False, group_by: list=None): - """Calls predict_proba on args using `_apply_func` - """ + def predict_proba( + self, *args, with_uncertainty: bool = False, group_by: list = None + ): + """Calls predict_proba on args using `_apply_func`""" if not self._fitted: - raise AttributeError('Please fit the Vset object before calling predict_proba.') + raise AttributeError( + "Please fit the Vset object before calling predict_proba." + ) pred_dict = {} for k, v in self.fitted_vfuncs.items(): - if hasattr(v, 'predict_proba'): + if hasattr(v, "predict_proba"): pred_dict[k] = v.predict_proba preds = self._apply_func(*args, out_dict=pred_dict) if with_uncertainty: @@ -219,8 +232,7 @@ def predict_proba(self, *args, with_uncertainty: bool=False, group_by: list=None return preds def evaluate(self, *args): - """Combines dicts before calling `_apply_func` - """ + """Combines dicts before calling `_apply_func`""" return self._apply_func(*args) def __call__(self, *args, n_out: int = None, keys=None, **kwargs): @@ -245,20 +257,17 @@ def __call__(self, *args, n_out: int = None, keys=None, **kwargs): return out_dicts def __getitem__(self, i): - """Accesses ith item in the vfunc set - """ + """Accesses ith item in the vfunc set""" return self.vfuncs[i] def __contains__(self, key): - """Returns true if vfuncs is a dict and key is one of its keys - """ + """Returns true if vfuncs is a dict and key is one of its keys""" if isinstance(self.vfuncs, dict): return key in self.vfuncs.keys() return False def keys(self): - """Returns Vset vfunc keys - """ + """Returns Vset vfunc keys""" if isinstance(self.vfuncs, dict): return self.vfuncs.keys() return {}.keys() @@ -267,7 +276,7 @@ def __len__(self): return len(self.vfuncs) def __str__(self): - return 'Vset(' + self.name + ')' + return "Vset(" + self.name + ")" def __create_subkey(self, value): """Helper function to construct `Subkey` with @@ -298,7 +307,9 @@ def _apply_func_cached(out_dict: dict, is_async: bool, lazy: bool, *args): """ for in_dict in args: if not isinstance(in_dict, dict): - raise Exception('Run init_args on data before using it when calling a Vset!') + raise Exception( + "Run init_args on data before using it when calling a Vset!" + ) data_dict = combine_dicts(*args) out_dict = apply_vfuncs(out_dict, data_dict, lazy) @@ -313,7 +324,7 @@ def _apply_func_cached(out_dict: dict, is_async: bool, lazy: bool, *args): return out_dict -def prediction_uncertainty(preds, group_by: list=None): +def prediction_uncertainty(preds, group_by: list = None): """Returns the mean and std predictions conditional on group_by Params @@ -331,15 +342,15 @@ def prediction_uncertainty(preds, group_by: list=None): if group_by is None: # just average over all predictions preds_stats = perturbation_stats(preds_df) - group_by = ['index'] + group_by = ["index"] else: preds_stats = perturbation_stats(preds_df, *group_by) origins = preds_stats[group_by].columns keys = preds_stats[group_by].to_numpy() # wrap subkey values in Subkey keys = [tuple(Subkey(sk, origins[idx]) for idx, sk in enumerate(x)) for x in keys] - mean_dict = dict(zip(keys, preds_stats['out-mean'])) - std_dict = dict(zip(keys, preds_stats['out-std'])) + mean_dict = dict(zip(keys, preds_stats["out-mean"])) + std_dict = dict(zip(keys, preds_stats["out-std"])) # add PREV_KEY to out dicts mean_dict[PREV_KEY] = preds[PREV_KEY] std_dict[PREV_KEY] = preds[PREV_KEY]