From 22d06eb5cb6b47917b6519439a3bd7637a2b65a7 Mon Sep 17 00:00:00 2001
From: James Duncan <jpdunc23@users.noreply.github.com>
Date: Fri, 9 Feb 2024 15:24:58 -0800
Subject: [PATCH] Move to hatchling build system (#55)

* Bumps minimum Python version to 3.8 and minimum Pandas version to 2.0.0.
* Incorporates linting via ruff, black, and isort.
* Expands python-package GitHub workflow to test more OSes and Python version.
* Fixes failing tests and adds new tests.
---
 .github/workflows/python-package.yml |  101 +-
 .gitignore                           |   53 +
 Makefile                             |   22 +
 README.md                            |   12 +-
 docs/troubleshooting.md              |    7 -
 hatch.toml                           |   62 +
 pyproject.toml                       |   56 +-
 requirements.txt                     |    9 +
 setup.py                             |   53 -
 tests/test_basic.py                  |   65 +-
 tests/test_helpers.py                |  745 +++++--
 tests/test_pipelines.py              |  393 ++--
 tests/test_utils.py                  | 3093 +++++++++++++++++---------
 vflow/__init__.py                    |   62 +-
 vflow/helpers.py                     |  152 +-
 vflow/pipeline.py                    |   25 +-
 vflow/subkey.py                      |   26 +-
 vflow/utils.py                       |  140 +-
 vflow/vfunc.py                       |   62 +-
 vflow/vset.py                        |  117 +-
 20 files changed, 3557 insertions(+), 1698 deletions(-)
 create mode 100644 Makefile
 delete mode 100644 docs/troubleshooting.md
 create mode 100644 hatch.toml
 create mode 100644 requirements.txt
 delete mode 100644 setup.py

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 71713ba..d62d4ae 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -1,35 +1,96 @@
-# This workflow will install Python dependencies, run tests
-# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
-
+  # based on https://github.com/pypa/hatch/blob/master/.github/workflows/test.yml
 name: tests
 
-on: [ push ]
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  STABLE_PYTHON_VERSION: '3.11'
+  PYTHONUNBUFFERED: "1"
+  FORCE_COLOR: "1"
 
 jobs:
   build:
-
-    runs-on: ubuntu-latest
+    name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }}
+    runs-on: ${{ matrix.os }}
     strategy:
+      fail-fast: false
       matrix:
-        python-version: [3.7.15, 3.10.8]
+        python-version: ["3.9", "3.10", "3.11"]
+        os: [ubuntu-latest, windows-latest, macos-latest]
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
+
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install .[dev] --no-cache-dir
+
+      - name: Ensure latest pip
+        run: python -m pip install --upgrade pip
+
+      - name: Install Hatch
+        run: pip install hatch
+
+      - name: Install vflow
+        run: python -m pip install -e .
+
+      - name: Check styles
+        run: hatch run style:check
+
       - name: Test with pytest
-        run: |
-          pytest --cov=./ --cov-report=xml
-      - name: Lint with pylint
-        run: |
-          pylint vflow *.py --rcfile=.pylintrc
-      - name: "Upload coverage to Codecov"
-        uses: codecov/codecov-action@v2
+        run: hatch run full
+
+      - name: Disambiguate coverage filename
+        run: mv .coverage ".coverage.${{ matrix.os }}.${{ matrix.python-version }}"
+
+      - name: Upload coverage data
+        uses: actions/upload-artifact@v3
+        with:
+          name: coverage-data
+          path: .coverage.*
+
+  coverage:
+    name: Report coverage
+    runs-on: ubuntu-latest
+    needs:
+      - build
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python ${{ env.STABLE_PYTHON_VERSION }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ env.STABLE_PYTHON_VERSION }}
+
+      - name: Install Hatch
+        run: pip install hatch
+
+      - name: Download coverage data
+        uses: actions/download-artifact@v3
+        with:
+          name: coverage-data
+
+      - name: Combine coverage data
+        run: hatch run coverage:combine
+
+      - name: Export coverage reports
+        run: hatch run coverage:report-xml
+
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v4
         with:
           fail_ci_if_error: true
+          token: ${{ secrets.CODECOV_TOKEN }}
+          verbose: true
diff --git a/.gitignore b/.gitignore
index e47fdb2..e554f41 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+# -*- mode: gitignore; -*-
+
 **mlruns
 **.ipynb_checkpoints
 **cache*
@@ -48,3 +50,54 @@ notebooks/data/*
 .hypothesis
 **coverage*
 codecov
+
+# -- Emacs
+# https://github.com/github/gitignore/blob/main/Global/Emacs.gitignore
+
+*~
+\#*\#
+/.emacs.desktop
+/.emacs.desktop.lock
+*.elc
+auto-save-list
+tramp
+.\#*
+
+# Org-mode
+.org-id-locations
+*_archive
+
+# flymake-mode
+*_flymake.*
+
+# eshell files
+/eshell/history
+/eshell/lastdir
+
+# elpa packages
+/elpa/
+
+# reftex files
+*.rel
+
+# AUCTeX auto folder
+/auto/
+
+# cask packages
+.cask/
+dist/
+
+# Flycheck
+flycheck_*.el
+
+# server auth directory
+/server/
+
+# projectiles files
+.projectile
+
+# directory configuration
+.dir-locals.el
+
+# network security
+/network-security.data
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..4f5b0b2
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,22 @@
+VERSION ?= $(shell git rev-parse --short HEAD)
+CONDA_ENV_NAME ?= vflow
+HATCH_ENV_NAME ?= test
+
+.PHONY: build_conda_env build_ipykernel test_% run_tests fix_styles
+
+build_conda_env:
+	conda create -n $(CONDA_ENV_NAME) -y python==3.10 pip
+	conda run -n $(CONDA_ENV_NAME) --no-capture-output pip install -r requirements.txt
+	conda run -n $(CONDA_ENV_NAME) --no-capture-output pip install . ipykernel
+
+build_ipykernel:
+	conda run -n $(CONDA_ENV_NAME) python -m ipykernel install --user --name $(CONDA_ENV_NAME) --display-name "Python [conda:$(CONDA_ENV_NAME)]"
+
+test_%:
+	hatch -v run dev $(PYTEST_ARGS) tests/test_$*.py
+
+run_tests:
+	hatch -v run cov
+
+fix_styles:
+	hatch -v run style:fmt
diff --git a/README.md b/README.md
index 78d011e..22b5df0 100644
--- a/README.md
+++ b/README.md
@@ -98,7 +98,17 @@ See the [docs](https://yu-group.github.io/veridical-flow/) for reference on the
 
 ## Installation
 
-Install with `pip install vflow` (see [here](https://github.com/Yu-Group/veridical-flow/blob/master/docs/troubleshooting.md) for help). For dev version (unstable), clone the repo and run `python setup.py develop` from the repo directory.
+### Stable version
+
+```bash
+pip install vflow
+```
+
+### Development version (unstable)
+
+```bash
+pip install vflow@git+https://github.com/Yu-Group/veridical-flow
+```
 
 # References
 
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
deleted file mode 100644
index f553b2b..0000000
--- a/docs/troubleshooting.md
+++ /dev/null
@@ -1,7 +0,0 @@
-In case you run into issues with installation, here are some things that could help:
-
-If you don't have permissions to install on your machine, use the --user flag:
-
-`pip install git+https://github.com/Yu-group/pcs-pipline --user`
-
-To develop locally, run `python3 setup.py develop`
\ No newline at end of file
diff --git a/hatch.toml b/hatch.toml
new file mode 100644
index 0000000..ddcf5bb
--- /dev/null
+++ b/hatch.toml
@@ -0,0 +1,62 @@
+# based on https://github.com/pypa/hatch/blob/master/hatch.toml
+
+[envs.default]
+dependencies = [
+  "coverage[toml]",
+  "pytest-cov",
+  "pytest-randomly",
+  "pytest-rerunfailures",
+  "pytest-xdist",
+]
+
+[envs.default.scripts]
+# --cov must not come before an argument in order to use the sources defined by config
+_cov = "pytest --cov --cov-report=term-missing --cov-config=pyproject.toml"
+dev = "pytest -p no:randomly --no-cov {args:tests}"
+cov = "_cov -p no:randomly {args:tests}"
+full = "_cov -n auto --reruns 5 --reruns-delay 3 -r aR {args:tests}"
+
+[envs.dev]
+template = "default"
+dependencies = [
+  "jupyterlab",
+  "torch>=1.0.0",
+  "torchvision",
+  "tqdm",
+  "scikit-learn>=0.23.0",
+]
+
+[envs.dev.env-vars]
+PIP_INDEX_URL = "https://download.pytorch.org/whl/cpu"
+PIP_EXTRA_INDEX_URL = "https://pypi.org/simple/"
+
+[envs.style]
+detached = true
+dependencies = [
+  "ruff",
+  "black",
+  "isort",
+]
+
+[envs.style.scripts]
+check = [
+  "ruff vflow tests",
+  "black --check --diff vflow tests",
+  "isort --check --diff --profile black vflow tests",
+]
+fmt = [
+  "isort --profile black ./vflow ./tests",
+  "black ./vflow ./tests",
+  "check",
+]
+
+[envs.coverage]
+detached = true
+dependencies = [
+  "coverage[toml]",
+  "lxml",
+]
+
+[envs.coverage.scripts]
+combine = "coverage combine {args}"
+report-xml = "coverage xml -i"
diff --git a/pyproject.toml b/pyproject.toml
index fed528d..85a8ba3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,55 @@
 [build-system]
-requires = ["setuptools"]
-build-backend = "setuptools.build_meta"
+requires = ["hatchling", "hatch-requirements-txt"]
+build-backend = "hatchling.build"
+
+[project]
+name = "vflow"
+version = "0.1.2"
+authors = [
+  { name="Chandan Singh", email="chandan_singh@berkeley.edu" },
+  { name="James Duncan", email="jpduncan@berkeley.edu" },
+  { name="Abhineet Agarwal", email="aa3797@berkeley.edu" },
+  { name="Rush Kapoor", email="rush.kapoor@berkeley.edu " },
+]
+maintainers = [
+  { name="James Duncan", email="jpduncan@berkeley.edu" },
+]
+description = "A framework for doing stability analysis with PCS."
+readme = "README.md"
+requires-python = ">=3.9"
+classifiers = [
+  "Intended Audience :: Science/Research",
+  "Development Status :: 3 - Alpha",
+  "License :: OSI Approved :: MIT License",
+  "Programming Language :: Python :: 3",
+  "Topic :: Scientific/Engineering :: Artificial Intelligence",
+  "Operating System :: OS Independent",
+]
+license = {text = "MIT"}
+dynamic = ["dependencies"]
+
+[project.urls]
+Homepage = "https://vflow.csinva.io/"
+Issues = "https://github.com/Yu-Group/veridical-flow/issues"
+
+[project.optional-dependencies]
+gpu = ["torch"]
+
+[tool.hatch.metadata]
+allow-direct-references = true
+
+[tool.hatch.metadata.hooks.requirements_txt]
+files = ["requirements.txt"]
+
+[tool.hatch.build.targets.sdist]
+exclude = [
+  "/.github",
+  "/notebooks",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["vflow"]
+
+[tool.coverage.run]
+branch = true
+source_pkgs = ["vflow", "tests"]
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..a2f6b90
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,9 @@
+numpy
+scipy
+matplotlib
+networkx
+pandas>=2.0.0
+joblib
+pytest
+ray
+mlflow
diff --git a/setup.py b/setup.py
deleted file mode 100644
index eb60502..0000000
--- a/setup.py
+++ /dev/null
@@ -1,53 +0,0 @@
-from os import path
-
-import setuptools
-
-path_to_repo = path.abspath(path.dirname(__file__))
-with open(path.join(path_to_repo, 'README.md'), encoding='utf-8') as f:
-    long_description = f.read()
-
-setuptools.setup(
-    name="vflow",
-    version="0.1.2",
-    author="Yu Group",
-    author_email="chandan_singh@berkeley.edu",
-    description="A framework for doing stability analysis with PCS.",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    url="https://github.com/Yu-Group/pcs-pipeline",
-    packages=setuptools.find_packages(),
-    install_requires=[
-        'numpy',
-        'scipy',
-        'matplotlib',
-        'networkx',
-        'pandas',
-        'joblib',
-        'pytest',
-        'ray',
-        'mlflow',
-    ],
-    extras_require={
-        'dev': [
-            'pytest',
-            'pytest-cov',
-            'pylint==2.12.2',
-            'tqdm',
-            'scikit-learn >=0.23.0',  # 0.23+ only works on py3.6+)
-        ],
-        'notebooks': [
-            'tqdm',
-            'jupyter',
-            'jupyterlab',
-            'scikit-learn >=0.23.0',  # 0.23+ only works on py3.6+)
-            'torch >= 1.0.0',
-            'torchvision',
-        ],
-    },
-    python_requires='>=3.6',
-    classifiers=[
-        "Programming Language :: Python :: 3",
-        "License :: OSI Approved :: MIT License",
-        "Operating System :: OS Independent",
-    ],
-)
diff --git a/tests/test_basic.py b/tests/test_basic.py
index 004240f..0eafbe5 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -1,13 +1,13 @@
 import pytest
 
 import vflow
-from vflow.utils import to_tuple, to_list
+from vflow.utils import to_list, to_tuple
 
 
 class TestBasic:
     def setup_method(self):
         self.pipeline = vflow.PCSPipeline()
-        self.vfunc_set = vflow.Vset(name='s', vfuncs={})
+        self.vfunc_set = vflow.Vset(name="s", vfuncs={})
         self.vfunc = vflow.Vfunc()
 
     def test_class_initializations(self):
@@ -16,29 +16,58 @@ def test_class_initializations(self):
         assert self.vfunc is not None
 
     def test_iteration(self):
-        """Tests that iterating over pipeline is same as iterating over its steps
-        """
+        """Tests that iterating over pipeline is same as iterating over its steps"""
         self.pipeline.steps = [0, 1, 2]
         assert self.pipeline.steps[0] == 0
-        assert self.pipeline[0] == 0, 'accessing pipeline steps'
+        assert self.pipeline[0] == 0, "accessing pipeline steps"
         for i, x in enumerate(self.pipeline):
-            assert x == i, 'iterating over pipeline steps'
-        assert self.pipeline[1:] == [1, 2], 'slicing pipeline'
+            assert x == i, "iterating over pipeline steps"
+        assert self.pipeline[1:] == [1, 2], "slicing pipeline"
 
     def test_list_packing(self):
-        """Test that packing / unpacking lists works appropriately
-        """
+        """Test that packing / unpacking lists works appropriately"""
         start = [[0, 10], [1, 11], [2, 12]]
         X, y = to_tuple(start)
         packed = to_list((X, y))
-        assert start == packed, 'unpacking/packing works'
-    
+        assert start == packed, "unpacking/packing works"
+
     def test_to_list(self):
-        assert to_list((['x1', 'x2', 'x3'], ['y1', 'y2', 'y3'])) == [['x1', 'y1'], ['x2', 'y2'], ['x3', 'y3']]
-        assert to_list((['x1'], ['y1'])) == [['x1', 'y1']]
-        assert to_list((['x1', 'x2', 'x3'],)) == [['x1'], ['x2'], ['x3']]
-        assert to_list(('x1', )) == [['x1']]
-        assert to_list(('x1', 'y1')) == [['x1', 'y1']]
-        assert to_list(('x1', 'x2', 'x3', 'y1', 'y2', 'y3')) == [['x1', 'y1'], ['x2', 'y2'], ['x3', 'y3']]
+        assert to_list((["x1", "x2", "x3"], ["y1", "y2", "y3"])) == [
+            ["x1", "y1"],
+            ["x2", "y2"],
+            ["x3", "y3"],
+        ]
+        assert to_list((["x1"], ["y1"])) == [["x1", "y1"]]
+        assert to_list((["x1", "x2", "x3"],)) == [["x1"], ["x2"], ["x3"]]
+        assert to_list(("x1",)) == [["x1"]]
+        assert to_list(("x1", "y1")) == [["x1", "y1"]]
+        assert to_list(("x1", "x2", "x3", "y1", "y2", "y3")) == [
+            ["x1", "y1"],
+            ["x2", "y2"],
+            ["x3", "y3"],
+        ]
         with pytest.raises(ValueError):
-            to_list(('x1', 'x2', 'x3', 'y1', 'y2'))
+            to_list(("x1", "x2", "x3", "y1", "y2"))
+
+    def test_build_graph(self):
+        v0 = vflow.Vset("v0", [lambda x: x + 1], ["add1"])
+        v1 = vflow.Vset("v1", [lambda x: 2 * x], ["mult2"])
+        v2 = vflow.Vset("v2", [lambda x: x % 3], ["mod3"])
+
+        x = vflow.init_args([1.5], ["x"])[0]
+        x0 = v0.fit_transform(x)
+        x1 = v1.fit_transform(x0)
+        x2 = v2.fit_transform(x1)
+
+        graph = vflow.build_graph(x2)
+        assert graph.is_directed()
+        assert graph.size() == 4  # edges
+        assert graph.order() == 5  # nodes: init + 3 Vsets + End
+        in_degrees = dict(graph.in_degree).values()
+        assert max(in_degrees) == 1
+        assert sum(in_degrees) == 4
+        edges = list(graph.edges)
+        assert ("init", v0) in edges
+        assert (v0, v1) in edges
+        assert (v1, v2) in edges
+        assert (v2, "End") in edges
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 06b9e8e..2004f16 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -1,292 +1,591 @@
+import numpy as np
 from numpy.testing import assert_equal
+from sklearn.datasets import make_classification
+from sklearn.metrics import accuracy_score, balanced_accuracy_score
+from sklearn.model_selection import train_test_split
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.utils import resample
 
-from vflow.helpers import *
+from vflow.helpers import (
+    build_vset,
+    cum_acc_by_uncertainty,
+    filter_vset_by_metric,
+    init_args,
+)
+from vflow.subkey import Subkey
+from vflow.utils import dict_to_df
+from vflow.vset import Vset
 
-class TestHelpers:
 
+class TestHelpers:
     def test_build_vset(self):
-
-        def my_func(param1: str, param2: str, param3: str='a'):
+        def my_func(param1: str, param2: str, param3: str = "a"):
             return (param1, param2, param3)
 
-        def my_func2(param1: str, param2: str, param3: str='b'):
-            return (param1 + '1', param2 + '2', param3)
+        def my_func2(param1: str, param2: str, param3: str = "b"):
+            return (param1 + "1", param2 + "2", param3)
 
-        param_dict1 = { 'param1': ['hello', 'foo'], 'param2': ['world', 'bar'] }
-        param_dict2 = { 'param1': ['hello'], 'param2': ['world', 'there']}
+        param_dict1 = {"param1": ["hello", "foo"], "param2": ["world", "bar"]}
+        param_dict2 = {"param1": ["hello"], "param2": ["world", "there"]}
 
         # my_func without param_dict
-        vset = build_vset("vset", my_func, param1='hello', param2='world', param3='b')
-        assert len(vset) == 1, \
-            'build_vset with my_func fails'
+        vset = build_vset("vset", my_func, param1="hello", param2="world", param3="b")
+        assert len(vset) == 1, "build_vset with my_func fails"
         d_key = [key[0] for key in list(vset.vfuncs.keys())][0]
-        assert d_key.value == 'vset_0', \
-            'build_vset with my_func fails'
+        assert d_key.value == "vset_0", "build_vset with my_func fails"
         d_keyword = [val.vfunc.keywords for val in list(vset.vfuncs.values())][0]
-        assert d_keyword == {'param1': 'hello', 'param2': 'world', 'param3': 'b'}, \
-            'build_vset with my_func fails'
+        assert d_keyword == {
+            "param1": "hello",
+            "param2": "world",
+            "param3": "b",
+        }, "build_vset with my_func fails"
+        assert next(iter(vset.vfuncs.values())).transform() == ("hello", "world", "b")
 
         # my_func without param_dict, reps
-        vset = build_vset("vset", my_func, reps=2, param1='hello', param2='world', param3='b')
-        assert len(vset) == 2, \
-            'build_vset with my_func + reps fails'
+        vset = build_vset(
+            "vset", my_func, reps=2, param1="hello", param2="world", param3="b"
+        )
+        assert len(vset) == 2, "build_vset with my_func + reps fails"
         d_keys = [key[0].value[0] for key in list(vset.vfuncs.keys())]
-        assert d_keys[0] == 'rep=0', \
-            'build_vset with my_func + reps fails'
-        assert d_keys[1] == 'rep=1', \
-            'build_vset with my_func + reps fails'
+        assert d_keys[0] == "rep=0", "build_vset with my_func + reps fails"
+        assert d_keys[1] == "rep=1", "build_vset with my_func + reps fails"
         d_keywords = [val.vfunc.keywords for val in list(vset.vfuncs.values())]
-        assert d_keywords[0] == {'param1': 'hello', 'param2': 'world', 'param3': 'b'}, \
-            'build_vset with my_func + reps fails'
-        assert d_keywords[1] == {'param1': 'hello', 'param2': 'world', 'param3': 'b'}, \
-            'build_vset with my_func + reps fails'
+        assert d_keywords[0] == {
+            "param1": "hello",
+            "param2": "world",
+            "param3": "b",
+        }, "build_vset with my_func + reps fails"
+        assert d_keywords[1] == {
+            "param1": "hello",
+            "param2": "world",
+            "param3": "b",
+        }, "build_vset with my_func + reps fails"
 
         # my_func with param_dict1
-        vset = build_vset("vset", my_func, param_dict1, param3='b')
-        assert len(vset) == 4, \
-            'build_vset with my_func + param_dict1 fails'
+        vset = build_vset("vset", my_func, param_dict1, param3="b")
+        assert len(vset) == 4, "build_vset with my_func + param_dict1 fails"
         d_keys = [key[0] for key in list(vset.vfuncs.keys())]
-        assert d_keys[0].value == ('func=my_func', 'param1=hello', 'param2=world'), \
-            'build_vset with my_func + param_dict1 fails'
-        assert d_keys[1].value == ('func=my_func', 'param1=hello', 'param2=bar'), \
-            'build_vset with my_func + param_dict1 fails'
-        assert d_keys[2].value == ('func=my_func', 'param1=foo', 'param2=world'), \
-            'build_vset with my_func + param_dict1 fails'
-        assert d_keys[3].value == ('func=my_func', 'param1=foo', 'param2=bar'), \
-            'build_vset with my_func + param_dict1 fails'
+        assert d_keys[0].value == (
+            "func=my_func",
+            "param1=hello",
+            "param2=world",
+        ), "build_vset with my_func + param_dict1 fails"
+        assert d_keys[1].value == (
+            "func=my_func",
+            "param1=hello",
+            "param2=bar",
+        ), "build_vset with my_func + param_dict1 fails"
+        assert d_keys[2].value == (
+            "func=my_func",
+            "param1=foo",
+            "param2=world",
+        ), "build_vset with my_func + param_dict1 fails"
+        assert d_keys[3].value == (
+            "func=my_func",
+            "param1=foo",
+            "param2=bar",
+        ), "build_vset with my_func + param_dict1 fails"
         d_keywords = [val.vfunc.keywords for val in list(vset.vfuncs.values())]
-        assert d_keywords[0] == {'param1': 'hello', 'param2': 'world', 'param3': 'b'}, \
-            'build_vset with my_func + param_dict1 fails'
-        assert d_keywords[1] == {'param1': 'hello', 'param2': 'bar', 'param3': 'b'}, \
-            'build_vset with my_func + param_dict1 fails'
-        assert d_keywords[2] == {'param1': 'foo', 'param2': 'world', 'param3': 'b'}, \
-            'build_vset with my_func + param_dict1 fails'
-        assert d_keywords[3] == {'param1': 'foo', 'param2': 'bar', 'param3': 'b'}, \
-            'build_vset with my_func + param_dict1 fails'
+        assert d_keywords[0] == {
+            "param1": "hello",
+            "param2": "world",
+            "param3": "b",
+        }, "build_vset with my_func + param_dict1 fails"
+        assert d_keywords[1] == {
+            "param1": "hello",
+            "param2": "bar",
+            "param3": "b",
+        }, "build_vset with my_func + param_dict1 fails"
+        assert d_keywords[2] == {
+            "param1": "foo",
+            "param2": "world",
+            "param3": "b",
+        }, "build_vset with my_func + param_dict1 fails"
+        assert d_keywords[3] == {
+            "param1": "foo",
+            "param2": "bar",
+            "param3": "b",
+        }, "build_vset with my_func + param_dict1 fails"
 
         # my_func with param_dict2, reps
-        vset = build_vset("vset", my_func, param_dict2, reps=2, lazy=True, param3='b')
-        assert vset._lazy, \
-            'build_vset with my_func + param_dict2 + reps fails'
-        assert len(vset) == 4, \
-            'build_vset with my_func + param_dict2 + reps fails'
+        vset = build_vset("vset", my_func, param_dict2, reps=2, lazy=True, param3="b")
+        assert vset._lazy, "build_vset with my_func + param_dict2 + reps fails"
+        assert len(vset) == 4, "build_vset with my_func + param_dict2 + reps fails"
         d_keys = [key[0] for key in list(vset.vfuncs.keys())]
-        assert d_keys[0].value == ('rep=0', 'func=my_func', 'param1=hello', 'param2=world'), \
-            'build_vset with my_func + param_dict2 + reps fails'
-        assert d_keys[1].value == ('rep=1', 'func=my_func', 'param1=hello', 'param2=world'), \
-            'build_vset with my_func + param_dict2 + reps fails'
-        assert d_keys[2].value == ('rep=0', 'func=my_func', 'param1=hello', 'param2=there'), \
-            'build_vset with my_func + param_dict2 + reps fails'
-        assert d_keys[3].value == ('rep=1', 'func=my_func', 'param1=hello', 'param2=there'), \
-            'build_vset with my_func + param_dict2 + reps fails'
+        assert d_keys[0].value == (
+            "rep=0",
+            "func=my_func",
+            "param1=hello",
+            "param2=world",
+        ), "build_vset with my_func + param_dict2 + reps fails"
+        assert d_keys[1].value == (
+            "rep=1",
+            "func=my_func",
+            "param1=hello",
+            "param2=world",
+        ), "build_vset with my_func + param_dict2 + reps fails"
+        assert d_keys[2].value == (
+            "rep=0",
+            "func=my_func",
+            "param1=hello",
+            "param2=there",
+        ), "build_vset with my_func + param_dict2 + reps fails"
+        assert d_keys[3].value == (
+            "rep=1",
+            "func=my_func",
+            "param1=hello",
+            "param2=there",
+        ), "build_vset with my_func + param_dict2 + reps fails"
         d_keywords = [val.vfunc.keywords for val in list(vset.vfuncs.values())]
-        assert d_keywords[0] == {'param1': 'hello', 'param2': 'world', 'param3': 'b'}, \
-            'build_vset with my_func + param_dict2 fails'
-        assert d_keywords[1] == {'param1': 'hello', 'param2': 'world', 'param3': 'b'}, \
-            'build_vset with my_func + param_dict2 fails'
-        assert d_keywords[2] == {'param1': 'hello', 'param2': 'there', 'param3': 'b'}, \
-            'build_vset with my_func + param_dict2 fails'
-        assert d_keywords[3] == {'param1': 'hello', 'param2': 'there', 'param3': 'b'}, \
-            'build_vset with my_func + param_dict2 fails'
+        assert d_keywords[0] == {
+            "param1": "hello",
+            "param2": "world",
+            "param3": "b",
+        }, "build_vset with my_func + param_dict2 fails"
+        assert d_keywords[1] == {
+            "param1": "hello",
+            "param2": "world",
+            "param3": "b",
+        }, "build_vset with my_func + param_dict2 fails"
+        assert d_keywords[2] == {
+            "param1": "hello",
+            "param2": "there",
+            "param3": "b",
+        }, "build_vset with my_func + param_dict2 fails"
+        assert d_keywords[3] == {
+            "param1": "hello",
+            "param2": "there",
+            "param3": "b",
+        }, "build_vset with my_func + param_dict2 fails"
 
         # 1 func with list of param_dicts
-        vset = build_vset("vset", my_func, [param_dict1, param_dict2], param3='b')
-        assert len(vset) == 5, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
+        vset = build_vset("vset", my_func, [param_dict1, param_dict2], param3="b")
+        assert (
+            len(vset) == 5
+        ), "build_vset with my_func + [param_dict1, param_dict2] fails"
         d_keys = [key[0].value for key in list(vset.vfuncs.keys())]
-        assert ('func=my_func', 'param1=hello', 'param2=world') in d_keys, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert ('func=my_func', 'param1=hello', 'param2=bar') in d_keys, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert ('func=my_func', 'param1=foo', 'param2=world') in d_keys, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert ('func=my_func', 'param1=foo', 'param2=bar') in d_keys, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert ('func=my_func', 'param1=hello', 'param2=there') in d_keys, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
+        assert (
+            "func=my_func",
+            "param1=hello",
+            "param2=world",
+        ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert (
+            "func=my_func",
+            "param1=hello",
+            "param2=bar",
+        ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert (
+            "func=my_func",
+            "param1=foo",
+            "param2=world",
+        ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert (
+            "func=my_func",
+            "param1=foo",
+            "param2=bar",
+        ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert (
+            "func=my_func",
+            "param1=hello",
+            "param2=there",
+        ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails"
         d_keywords = [val.vfunc.keywords for val in list(vset.vfuncs.values())]
-        assert {'param1': 'hello', 'param2': 'world', 'param3': 'b'} in d_keywords, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert {'param1': 'hello', 'param2': 'bar', 'param3': 'b'} in d_keywords, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert {'param1': 'foo', 'param2': 'world', 'param3': 'b'} in d_keywords, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert {'param1': 'foo', 'param2': 'bar', 'param3': 'b'} in d_keywords, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert {'param1': 'hello', 'param2': 'there', 'param3': 'b'} in d_keywords, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
+        assert {
+            "param1": "hello",
+            "param2": "world",
+            "param3": "b",
+        } in d_keywords, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert {
+            "param1": "hello",
+            "param2": "bar",
+            "param3": "b",
+        } in d_keywords, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert {
+            "param1": "foo",
+            "param2": "world",
+            "param3": "b",
+        } in d_keywords, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert {
+            "param1": "foo",
+            "param2": "bar",
+            "param3": "b",
+        } in d_keywords, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert {
+            "param1": "hello",
+            "param2": "there",
+            "param3": "b",
+        } in d_keywords, "build_vset with my_func + [param_dict1, param_dict2] fails"
 
         # list of funcs with 1 param_dict
-        vset = build_vset("vset", [my_func, my_func2], param_dict1, param3='b')
-        assert len(vset) == 8, \
-            'build_vset with [my_func, my_func2] + param_dict1 fails'
+        vset = build_vset("vset", [my_func, my_func2], param_dict1, param3="b")
+        assert len(vset) == 8, "build_vset with [my_func, my_func2] + param_dict1 fails"
         d_keys = [key[0].value for key in list(vset.vfuncs.keys())]
-        assert ('func=my_func', 'param1=hello', 'param2=world') in d_keys, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert ('func=my_func', 'param1=hello', 'param2=bar') in d_keys, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert ('func=my_func', 'param1=foo', 'param2=world') in d_keys, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert ('func=my_func', 'param1=foo', 'param2=bar') in d_keys, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert ('func=my_func2', 'param1=hello', 'param2=world') in d_keys, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert ('func=my_func2', 'param1=hello', 'param2=bar') in d_keys, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert ('func=my_func2', 'param1=foo', 'param2=world') in d_keys, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert ('func=my_func2', 'param1=foo', 'param2=bar') in d_keys, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
+        assert (
+            "func=my_func",
+            "param1=hello",
+            "param2=world",
+        ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert (
+            "func=my_func",
+            "param1=hello",
+            "param2=bar",
+        ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert (
+            "func=my_func",
+            "param1=foo",
+            "param2=world",
+        ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert (
+            "func=my_func",
+            "param1=foo",
+            "param2=bar",
+        ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert (
+            "func=my_func2",
+            "param1=hello",
+            "param2=world",
+        ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert (
+            "func=my_func2",
+            "param1=hello",
+            "param2=bar",
+        ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert (
+            "func=my_func2",
+            "param1=foo",
+            "param2=world",
+        ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert (
+            "func=my_func2",
+            "param1=foo",
+            "param2=bar",
+        ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails"
         d_keywords = [val.vfunc.keywords for val in list(vset.vfuncs.values())]
-        assert d_keywords.count({'param1': 'hello', 'param2': 'world', 'param3': 'b'}) == 2, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert d_keywords.count({'param1': 'hello', 'param2': 'bar', 'param3': 'b'}) == 2, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert d_keywords.count({'param1': 'foo', 'param2': 'world', 'param3': 'b'}) == 2, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert d_keywords.count({'param1': 'foo', 'param2': 'bar', 'param3': 'b'}) == 2, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
+        assert (
+            d_keywords.count({"param1": "hello", "param2": "world", "param3": "b"}) == 2
+        ), "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert (
+            d_keywords.count({"param1": "hello", "param2": "bar", "param3": "b"}) == 2
+        ), "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert (
+            d_keywords.count({"param1": "foo", "param2": "world", "param3": "b"}) == 2
+        ), "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert (
+            d_keywords.count({"param1": "foo", "param2": "bar", "param3": "b"}) == 2
+        ), "build_vset with my_func + [param_dict1, param_dict2] fails"
+
+        for key, vfunc in vset.vfuncs.items():
+            subkey = key[0]
+            assert isinstance(subkey, Subkey)
+            assert len(subkey.value) == 3
+            assert all([isinstance(x, str) for x in subkey.value])
+            func_name = subkey.value[0][5:]
+            assert func_name in ["my_func", "my_func2"]
+            assert subkey.value[1][:6] == "param1"
+            assert subkey.value[2][:6] == "param2"
+            param1 = subkey.value[1][7:]
+            param2 = subkey.value[2][7:]
+            if func_name == "my_func":
+                expected_vfunc_output = (param1, param2, "b")
+            else:
+                expected_vfunc_output = (param1 + "1", param2 + "2", "b")
+            assert vfunc.transform() == expected_vfunc_output
 
-        # list of funcs with list of param_dicts 
+        # list of funcs with list of param_dicts
         vset = build_vset("vset", [my_func, my_func2], [param_dict1, param_dict2])
-        assert len(vset) == 6, \
-            'build_vset with [my_func, my_func2] + [param_dict1, param_dict2] fails'
+        assert (
+            len(vset) == 6
+        ), "build_vset with [my_func, my_func2] + [param_dict1, param_dict2] fails"
         d_keys = [key[0].value for key in list(vset.vfuncs.keys())]
-        assert ('func=my_func', 'param1=hello', 'param2=world') in d_keys, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert ('func=my_func', 'param1=hello', 'param2=bar') in d_keys, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert ('func=my_func', 'param1=foo', 'param2=world') in d_keys, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert ('func=my_func', 'param1=foo', 'param2=bar') in d_keys, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert ('func=my_func2', 'param1=hello', 'param2=world') in d_keys, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
-        assert ('func=my_func2', 'param1=hello', 'param2=there') in d_keys, \
-            'build_vset with my_func + [param_dict1, param_dict2] fails'
+        assert (
+            "func=my_func",
+            "param1=hello",
+            "param2=world",
+        ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert (
+            "func=my_func",
+            "param1=hello",
+            "param2=bar",
+        ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert (
+            "func=my_func",
+            "param1=foo",
+            "param2=world",
+        ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert (
+            "func=my_func",
+            "param1=foo",
+            "param2=bar",
+        ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert (
+            "func=my_func2",
+            "param1=hello",
+            "param2=world",
+        ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails"
+        assert (
+            "func=my_func2",
+            "param1=hello",
+            "param2=there",
+        ) in d_keys, "build_vset with my_func + [param_dict1, param_dict2] fails"
 
         class my_class:
-            def __init__(self, param1, param2, param3: str='a'):
+            def __init__(self, param1, param2, param3: str = "a"):
                 self.param1 = param1
                 self.param2 = param2
                 self.param3 = param3
 
-            def fit(self, arg1: str):
+            def fit(self, arg1: str = "default"):
                 self.arg1 = arg1
+                return self
 
         # my_class without param_dict
-        vset = build_vset("vset", my_class, param1='hello', param2='world', param3='b')
-        assert len(vset) == 1, \
-            'build_vset with my_class fails'
+        vset = build_vset("vset", my_class, param1="hello", param2="world", param3="b")
+        assert len(vset) == 1, "build_vset with my_class fails"
         d_key = [key[0] for key in list(vset.vfuncs.keys())][0]
-        assert d_key.value == 'vset_0', \
-            'build_vset with my_class fails'
+        assert d_key.value == "vset_0", "build_vset with my_class fails"
         d_val = [val.vfunc for val in list(vset.vfuncs.values())][0]
-        assert isinstance(d_val, my_class), \
-            'build_vset with my_class fails'
-        assert (d_val.param1, d_val.param2, d_val.param3) == ('hello', 'world', 'b'), \
-            'build_vset with my_class fails'
+        assert isinstance(d_val, my_class), "build_vset with my_class fails"
+        assert (d_val.param1, d_val.param2, d_val.param3) == (
+            "hello",
+            "world",
+            "b",
+        ), "build_vset with my_class fails"
 
         # my_class without param_dict, reps
-        vset = build_vset("vset", my_class, reps=2, param1='hello', param2='world', param3='b')
-        assert len(vset) == 2, \
-            'build_vset with my_class + reps fails'
+        vset = build_vset(
+            "vset", my_class, reps=2, param1="hello", param2="world", param3="b"
+        )
+        vset.fit()
+        objs = list(vset.fitted_vfuncs.values())
+        assert len(objs) == 3
+        assert all([isinstance(x, my_class) for x in objs[:-1]])
+        assert isinstance(objs[-1], tuple)
+        assert isinstance(objs[-1][0], Vset)
+
+        assert len(vset) == 2, "build_vset with my_class + reps fails"
         d_keys = [key[0].value[0] for key in list(vset.vfuncs.keys())]
-        assert d_keys[0] == 'rep=0', \
-            'build_vset with my_class + reps fails'
-        assert d_keys[1] == 'rep=1', \
-            'build_vset with my_class + reps fails'
+        assert d_keys[0] == "rep=0", "build_vset with my_class + reps fails"
+        assert d_keys[1] == "rep=1", "build_vset with my_class + reps fails"
         d_vals = [val.vfunc for val in list(vset.vfuncs.values())]
-        assert isinstance(d_vals[0], my_class), \
-            'build_vset with my_class + reps fails'
-        assert isinstance(d_vals[1], my_class), \
-            'build_vset with my_class + reps fails'
-        assert (d_vals[0].param1, d_vals[0].param2, d_vals[0].param3) == ('hello', 'world', 'b'), \
-            'build_vset with my_class + reps fails'
-        assert (d_vals[1].param1, d_vals[1].param2, d_vals[1].param3) == ('hello', 'world', 'b'), \
-            'build_vset with my_class + reps fails'
+        assert isinstance(d_vals[0], my_class), "build_vset with my_class + reps fails"
+        assert isinstance(d_vals[1], my_class), "build_vset with my_class + reps fails"
+        assert (d_vals[0].param1, d_vals[0].param2, d_vals[0].param3) == (
+            "hello",
+            "world",
+            "b",
+        ), "build_vset with my_class + reps fails"
+        assert (d_vals[1].param1, d_vals[1].param2, d_vals[1].param3) == (
+            "hello",
+            "world",
+            "b",
+        ), "build_vset with my_class + reps fails"
 
         # my_class with param_dict1
-        vset = build_vset("vset", my_class, param_dict1, param3='b')
-        assert len(vset) == 4, \
-            'build_vset with my_class + param_dict1 fails'
+        vset = build_vset("vset", my_class, param_dict1, param3="b")
+        assert len(vset) == 4, "build_vset with my_class + param_dict1 fails"
         d_keys = [key[0] for key in list(vset.vfuncs.keys())]
-        assert d_keys[0].value == ('func=my_class', 'param1=hello', 'param2=world'), \
-            'build_vset with my_class + param_dict1 fails'
-        assert d_keys[1].value == ('func=my_class', 'param1=hello', 'param2=bar'), \
-            'build_vset with my_class + param_dict1 fails'
-        assert d_keys[2].value == ('func=my_class', 'param1=foo', 'param2=world'), \
-            'build_vset with my_class + param_dict1 fails'
-        assert d_keys[3].value == ('func=my_class', 'param1=foo', 'param2=bar'), \
-            'build_vset with my_class + param_dict1 fails'
+        assert d_keys[0].value == (
+            "func=my_class",
+            "param1=hello",
+            "param2=world",
+        ), "build_vset with my_class + param_dict1 fails"
+        assert d_keys[1].value == (
+            "func=my_class",
+            "param1=hello",
+            "param2=bar",
+        ), "build_vset with my_class + param_dict1 fails"
+        assert d_keys[2].value == (
+            "func=my_class",
+            "param1=foo",
+            "param2=world",
+        ), "build_vset with my_class + param_dict1 fails"
+        assert d_keys[3].value == (
+            "func=my_class",
+            "param1=foo",
+            "param2=bar",
+        ), "build_vset with my_class + param_dict1 fails"
         d_vals = [val.vfunc for val in list(vset.vfuncs.values())]
-        assert isinstance(d_vals[0], my_class), \
-            'build_vset with my_class + param_dict1 fails'
-        assert isinstance(d_vals[1], my_class), \
-            'build_vset with my_class + param_dict1 fails'
-        assert isinstance(d_vals[1], my_class), \
-            'build_vset with my_class + param_dict1 fails'
-        assert isinstance(d_vals[1], my_class), \
-            'build_vset with my_class + param_dict1 fails'
-        assert (d_vals[0].param1, d_vals[0].param2, d_vals[0].param3) == ('hello', 'world', 'b'), \
-            'build_vset with my_class + param_dict1 fails'
-        assert (d_vals[1].param1, d_vals[1].param2, d_vals[1].param3) == ('hello', 'bar', 'b'), \
-            'build_vset with my_class + param_dict1 fails'
-        assert (d_vals[2].param1, d_vals[2].param2, d_vals[2].param3) == ('foo', 'world', 'b'), \
-            'build_vset with my_class + param_dict1 fails'
-        assert (d_vals[3].param1, d_vals[3].param2, d_vals[3].param3) == ('foo', 'bar', 'b'), \
-            'build_vset with my_class + param_dict1 fails'
+        assert isinstance(
+            d_vals[0], my_class
+        ), "build_vset with my_class + param_dict1 fails"
+        assert isinstance(
+            d_vals[1], my_class
+        ), "build_vset with my_class + param_dict1 fails"
+        assert isinstance(
+            d_vals[1], my_class
+        ), "build_vset with my_class + param_dict1 fails"
+        assert isinstance(
+            d_vals[1], my_class
+        ), "build_vset with my_class + param_dict1 fails"
+        assert (d_vals[0].param1, d_vals[0].param2, d_vals[0].param3) == (
+            "hello",
+            "world",
+            "b",
+        ), "build_vset with my_class + param_dict1 fails"
+        assert (d_vals[1].param1, d_vals[1].param2, d_vals[1].param3) == (
+            "hello",
+            "bar",
+            "b",
+        ), "build_vset with my_class + param_dict1 fails"
+        assert (d_vals[2].param1, d_vals[2].param2, d_vals[2].param3) == (
+            "foo",
+            "world",
+            "b",
+        ), "build_vset with my_class + param_dict1 fails"
+        assert (d_vals[3].param1, d_vals[3].param2, d_vals[3].param3) == (
+            "foo",
+            "bar",
+            "b",
+        ), "build_vset with my_class + param_dict1 fails"
 
         # my_class with param_dict2, reps
-        vset = build_vset("vset", my_class, param_dict2, reps=2, lazy=True, param3='b')
-        assert vset._lazy, \
-            'build_vset with my_class + param_dict2 + reps fails'
-        assert len(vset) == 4, \
-            'build_vset with my_class + param_dict2 + reps fails'
+        vset = build_vset("vset", my_class, param_dict2, reps=2, lazy=True, param3="b")
+        assert vset._lazy, "build_vset with my_class + param_dict2 + reps fails"
+        assert len(vset) == 4, "build_vset with my_class + param_dict2 + reps fails"
         d_keys = [key[0] for key in list(vset.vfuncs.keys())]
-        assert d_keys[0].value == ('rep=0', 'func=my_class', 'param1=hello', 'param2=world'), \
-            'build_vset with my_class + param_dict2 + reps fails'
-        assert d_keys[1].value == ('rep=1', 'func=my_class', 'param1=hello', 'param2=world'), \
-            'build_vset with my_class + param_dict2 + reps fails'
-        assert d_keys[2].value == ('rep=0', 'func=my_class', 'param1=hello', 'param2=there'), \
-            'build_vset with my_class + param_dict2 + reps fails'
-        assert d_keys[3].value == ('rep=1', 'func=my_class', 'param1=hello', 'param2=there'), \
-            'build_vset with my_class + param_dict2 + reps fails'
+        assert d_keys[0].value == (
+            "rep=0",
+            "func=my_class",
+            "param1=hello",
+            "param2=world",
+        ), "build_vset with my_class + param_dict2 + reps fails"
+        assert d_keys[1].value == (
+            "rep=1",
+            "func=my_class",
+            "param1=hello",
+            "param2=world",
+        ), "build_vset with my_class + param_dict2 + reps fails"
+        assert d_keys[2].value == (
+            "rep=0",
+            "func=my_class",
+            "param1=hello",
+            "param2=there",
+        ), "build_vset with my_class + param_dict2 + reps fails"
+        assert d_keys[3].value == (
+            "rep=1",
+            "func=my_class",
+            "param1=hello",
+            "param2=there",
+        ), "build_vset with my_class + param_dict2 + reps fails"
         d_vals = [val.vfunc for val in list(vset.vfuncs.values())]
-        assert isinstance(d_vals[0], my_class), \
-            'build_vset with my_class + param_dict2 + reps fails'
-        assert isinstance(d_vals[1], my_class), \
-            'build_vset with my_class + param_dict2 + reps fails'
-        assert isinstance(d_vals[1], my_class), \
-            'build_vset with my_class + param_dict2 + reps fails'
-        assert isinstance(d_vals[1], my_class), \
-            'build_vset with my_class + param_dict2 + reps fails'
-        assert (d_vals[0].param1, d_vals[0].param2, d_vals[0].param3) == ('hello', 'world', 'b'), \
-            'build_vset with my_class + param_dict2 + reps fails'
-        assert (d_vals[1].param1, d_vals[1].param2, d_vals[1].param3) == ('hello', 'world', 'b'), \
-            'build_vset with my_class + param_dict2 + reps fails'
-        assert (d_vals[2].param1, d_vals[2].param2, d_vals[2].param3) == ('hello', 'there', 'b'), \
-            'build_vset with my_class + param_dict2 + reps fails'
-        assert (d_vals[3].param1, d_vals[3].param2, d_vals[3].param3) == ('hello', 'there', 'b'), \
-            'build_vset with my_class + param_dict2 + reps fails'
-
+        assert isinstance(
+            d_vals[0], my_class
+        ), "build_vset with my_class + param_dict2 + reps fails"
+        assert isinstance(
+            d_vals[1], my_class
+        ), "build_vset with my_class + param_dict2 + reps fails"
+        assert isinstance(
+            d_vals[1], my_class
+        ), "build_vset with my_class + param_dict2 + reps fails"
+        assert isinstance(
+            d_vals[1], my_class
+        ), "build_vset with my_class + param_dict2 + reps fails"
+        assert (d_vals[0].param1, d_vals[0].param2, d_vals[0].param3) == (
+            "hello",
+            "world",
+            "b",
+        ), "build_vset with my_class + param_dict2 + reps fails"
+        assert (d_vals[1].param1, d_vals[1].param2, d_vals[1].param3) == (
+            "hello",
+            "world",
+            "b",
+        ), "build_vset with my_class + param_dict2 + reps fails"
+        assert (d_vals[2].param1, d_vals[2].param2, d_vals[2].param3) == (
+            "hello",
+            "there",
+            "b",
+        ), "build_vset with my_class + param_dict2 + reps fails"
+        assert (d_vals[3].param1, d_vals[3].param2, d_vals[3].param3) == (
+            "hello",
+            "there",
+            "b",
+        ), "build_vset with my_class + param_dict2 + reps fails"
 
     def test_cum_acc_by_uncertainty(self):
-        mean_dict = {'group_0': np.array([[0.2, 0.8], [0.25, 0.75], [0.1, 0.9]]),
-                     'group_1': np.array([[0.4, 0.6], [0.5, 0.5], [0.45, 0.55]])}
-        std_dict = {'group_0': np.array([[0.003, 0.003], [0.146, 0.146], [0.0023, 0.0023]]),
-                    'group_1': np.array([[0.0054, 0.0054], [0.2344, 0.2344], [0.5166, 0.5166]])}
+        mean_dict = {
+            "group_0": np.array([[0.2, 0.8], [0.25, 0.75], [0.1, 0.9]]),
+            "group_1": np.array([[0.4, 0.6], [0.5, 0.5], [0.45, 0.55]]),
+        }
+        std_dict = {
+            "group_0": np.array([[0.003, 0.003], [0.146, 0.146], [0.0023, 0.0023]]),
+            "group_1": np.array([[0.0054, 0.0054], [0.2344, 0.2344], [0.5166, 0.5166]]),
+        }
         true_labels = [0, 1, 1]
-        true_labels_dict = {'y': [0, 1, 1]}
+        true_labels_dict = {"y": [0, 1, 1]}
         u0, c0, idx0 = cum_acc_by_uncertainty(mean_dict, std_dict, true_labels)
         u1, c1, idx1 = cum_acc_by_uncertainty(mean_dict, std_dict, true_labels_dict)
         assert_equal(u0, u1)
         assert_equal(c0, c1)
         assert_equal(idx0, idx1)
         assert u0.shape == c0.shape == (2, 3)
-        assert_equal(u0[0], sorted(x[1] for x in std_dict['group_0']))
-        assert_equal(u0[1], sorted(x[1] for x in std_dict['group_1']))
-        assert_equal(c0[0], [1, 1/2, 2/3])
-        assert_equal(c0[1], [0, 0, 1/3])
+        assert_equal(u0[0], sorted(x[1] for x in std_dict["group_0"]))
+        assert_equal(u0[1], sorted(x[1] for x in std_dict["group_1"]))
+        assert_equal(c0[0], [1, 1 / 2, 2 / 3])
+        assert_equal(c0[1], [0, 0, 1 / 3])
         assert_equal(idx0[0], [2, 0, 1])
         assert_equal(idx0[1], [0, 1, 2])
+
+    def test_filter_vset_by_metric(self):
+        X, y = make_classification(n_samples=100, n_features=5)
+        X_train, X_test, y_train, y_test = train_test_split(
+            X, y, random_state=42
+        )  # ex. with another split?
+        X_train, X_test, y_train, y_test = init_args(
+            (X_train, X_test, y_train, y_test),
+            names=["X_train", "X_test", "y_train", "y_test"],
+        )  # optionally provide names for each of these
+
+        # subsample data
+        subsampling_set = build_vset(
+            "subsampling",
+            resample,
+            param_dict={"random_state": list(range(3))},
+            n_samples=20,
+        )
+        X_trains, y_trains = subsampling_set(X_train, y_train)
+
+        # fit models
+        dt_set = build_vset(
+            name="DT",
+            func=DecisionTreeClassifier,
+            param_dict={"criterion": ["gini", "entropy", "log_loss"]},
+        )
+        dt_set.fit(X_trains, y_trains)
+        preds_test = dt_set.predict(X_test)
+
+        # get metrics
+        hard_metrics_set = Vset(
+            name="hard_metrics",
+            vfuncs=[accuracy_score, balanced_accuracy_score],
+            vfunc_keys=["Acc", "Bal_Acc"],
+        )
+
+        hard_metrics = hard_metrics_set.evaluate(preds_test, y_test)
+        df = dict_to_df(hard_metrics)
+
+        filtered_dt_set = filter_vset_by_metric(
+            metric_dict=hard_metrics,
+            vset=dt_set,
+            n_keep=1,
+            filter_on=["Bal_Acc"],
+            group=False,
+        )
+
+        df_bal_acc = df[df["hard_metrics"] == "Bal_Acc"]
+        top_DT = df_bal_acc[df_bal_acc["out"] == df_bal_acc["out"].max()]["DT"].iloc[0]
+        subkey = next(iter(filtered_dt_set.vfuncs.keys()))[0].value
+        assert top_DT == subkey
+
+        filtered_dt_set = filter_vset_by_metric(
+            metric_dict=hard_metrics,
+            vset=dt_set,
+            n_keep=1,
+            filter_on=["Acc"],
+            group=True,
+        )
+
+        df_acc = df[df["hard_metrics"] == "Acc"]
+        df_acc_mean = df_acc.groupby("DT").mean(numeric_only=True)
+        top_DT = df_acc_mean[df_acc_mean["out"] == df_acc_mean["out"].max()].index[0]
+        subkey = next(iter(filtered_dt_set.vfuncs.keys()))[0].value
+        assert top_DT == subkey
diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
index 3d3205c..2ddeb87 100644
--- a/tests/test_pipelines.py
+++ b/tests/test_pipelines.py
@@ -1,108 +1,137 @@
-import time
 import os
+import sys
+import time
 from functools import partial
 from shutil import rmtree
 
 import numpy as np
 import pandas as pd
+import pytest
 import ray
-import sklearn
 from numpy.testing import assert_equal
-from sklearn.datasets import make_classification
+from sklearn.datasets import fetch_california_housing, make_classification
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.inspection import permutation_importance
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import accuracy_score, balanced_accuracy_score, r2_score
 from sklearn.model_selection import train_test_split
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+from sklearn.utils import resample
 
-from vflow import Vset, init_args, build_vset  # must install vflow first (pip install vflow)
+from vflow import Vset, build_vset, dict_to_df, init_args
 from vflow.subkey import Subkey as sm
 from vflow.vset import PREV_KEY
 
 
 class TestPipelines:
-
     def setup_method(self):
         pass
 
     def test_subsampling_fitting_metrics_pipeline(self):
-        """Simplest synthetic pipeline
-        """
+        """Simplest synthetic pipeline"""
         # initialize data
         np.random.seed(13)
-        X, y = sklearn.datasets.make_classification(n_samples=50, n_features=5)
-        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)  # ex. with another split?
-        X_train, X_test, y_train, y_test = init_args((X_train, X_test, y_train, y_test),
-                                                     names=['X_train', 'X_test', 'y_train',
-                                                            'y_test'])  # optionally provide names for each of these
+        X, y = make_classification(n_samples=50, n_features=5)
+        X_train, X_test, y_train, y_test = train_test_split(
+            X, y, random_state=42
+        )  # ex. with another split?
+        X_train, X_test, y_train, y_test = init_args(
+            (X_train, X_test, y_train, y_test),
+            names=["X_train", "X_test", "y_train", "y_test"],
+        )  # optionally provide names for each of these
 
         # subsample data
-        subsampling_set = build_vset('subsampling', sklearn.utils.resample,
-                                     param_dict={'random_state': list(range(3))},
-                                     n_samples=20)
+        subsampling_set = build_vset(
+            "subsampling",
+            resample,
+            param_dict={"random_state": list(range(3))},
+            n_samples=20,
+        )
         X_trains, y_trains = subsampling_set(X_train, y_train)
 
         # fit models
-        modeling_set = Vset(name='modeling',
-                            vfuncs=[LogisticRegression(max_iter=1000, tol=0.1),
-                                     DecisionTreeClassifier()],
-                            vfunc_keys=["LR", "DT"])
+        modeling_set = Vset(
+            name="modeling",
+            vfuncs=[
+                LogisticRegression(max_iter=1000, tol=0.1),
+                DecisionTreeClassifier(),
+            ],
+            vfunc_keys=["LR", "DT"],
+        )
 
         modeling_set.fit(X_trains, y_trains)
 
         preds_test = modeling_set.predict(X_test)
 
         # get metrics
-        hard_metrics_set = Vset(name='hard_metrics',
-                                vfuncs=[accuracy_score, balanced_accuracy_score],
-                                vfunc_keys=["Acc", "Bal_Acc"])
+        hard_metrics_set = Vset(
+            name="hard_metrics",
+            vfuncs=[accuracy_score, balanced_accuracy_score],
+            vfunc_keys=["Acc", "Bal_Acc"],
+        )
 
         hard_metrics = hard_metrics_set.evaluate(preds_test, y_test)
 
         # asserts
-        k1 = (sm('X_test', 'init'), sm('X_train', 'init'), 
-              sm(('func=resample', 'random_state=0'), 'subsampling'),
-              sm('y_train', 'init'), sm('LR', 'modeling'), 
-              sm('y_test', 'init'), sm('Acc', 'hard_metrics'))
-
-        assert k1 in hard_metrics, 'hard metrics should have ' + str(k1) + ' as key'
-        assert hard_metrics[k1] > 0.9  # 0.9090909090909091
+        k1 = (
+            sm("X_test", "init"),
+            sm("X_train", "init"),
+            sm(("func=resample", "random_state=0"), "subsampling"),
+            sm("y_train", "init"),
+            sm("LR", "modeling"),
+            sm("y_test", "init"),
+            sm("Acc", "hard_metrics"),
+        )
+
+        assert k1 in hard_metrics, "hard metrics should have " + str(k1) + " as key"
+        assert isinstance(hard_metrics[k1], float)
         assert PREV_KEY in hard_metrics
         assert len(hard_metrics.keys()) == 13
 
     def test_feat_engineering(self):
-        """Feature engineering pipeline
-        """
+        """Feature engineering pipeline"""
         # get data as df
         np.random.seed(13)
-        data = sklearn.datasets.fetch_california_housing()
-        df = pd.DataFrame.from_dict(data['data'])
-        df.columns = data['feature_names']
-        y = data['target']
-        X_train, X_test, y_train, y_test = init_args(train_test_split(df, y, random_state=123),
-                                                     names=['X_train', 'X_test', 'y_train', 'y_test'])
+        data = fetch_california_housing()
+        df = pd.DataFrame.from_dict(data["data"])
+        df.columns = data["feature_names"]
+        y = data["target"]
+        X_train, X_test, y_train, y_test = init_args(
+            train_test_split(df, y, random_state=123),
+            names=["X_train", "X_test", "y_train", "y_test"],
+        )
 
         # feature extraction - extracts two different sets of features from the same data
         def extract_feats(df: pd.DataFrame, feat_names=None):
-            """extract specific columns from dataframe
-            """
+            """extract specific columns from dataframe"""
             if feat_names is None:
-                feat_names = ['HouseAge', 'AveBedrms', 'Population']
+                feat_names = ["HouseAge", "AveBedrms", "Population"]
             return df[feat_names]
 
-        feat_extraction_funcs = [partial(extract_feats, feat_names=['HouseAge', 'AveBedrms', 'Population']),
-                                 partial(extract_feats, feat_names=['HouseAge', 'AveBedrms', 'Population', 'MedInc', 'AveOccup']),
-                                 ]
-        feat_extraction = Vset(name='feat_extraction',
-                               vfuncs=feat_extraction_funcs,
-                               output_matching=True)
+        feat_extraction_funcs = [
+            partial(extract_feats, feat_names=["HouseAge", "AveBedrms", "Population"]),
+            partial(
+                extract_feats,
+                feat_names=[
+                    "HouseAge",
+                    "AveBedrms",
+                    "Population",
+                    "MedInc",
+                    "AveOccup",
+                ],
+            ),
+        ]
+        feat_extraction = Vset(
+            name="feat_extraction", vfuncs=feat_extraction_funcs, output_matching=True
+        )
 
         X_feats_train = feat_extraction(X_train)
 
-        modeling_set = Vset(name='modeling',
-                            vfuncs=[DecisionTreeRegressor(), RandomForestRegressor()],
-                            vfunc_keys=["DT", "RF"])
+        modeling_set = Vset(
+            name="modeling",
+            vfuncs=[DecisionTreeRegressor(), RandomForestRegressor()],
+            vfunc_keys=["DT", "RF"],
+        )
 
         # how can we properly pass a y here so that it will fit properly?
         # this runs, but modeling_set.fitted_vfuncs is empty
@@ -112,77 +141,111 @@ def extract_feats(df: pd.DataFrame, feat_names=None):
         preds_all = modeling_set.predict(X_feats_train)
 
         # get metrics
-        hard_metrics_set = Vset(name='hard_metrics',
-                                vfuncs=[r2_score],
-                                vfunc_keys=["r2"])
+        hard_metrics_set = Vset(
+            name="hard_metrics", vfuncs=[r2_score], vfunc_keys=["r2"]
+        )
         hard_metrics = hard_metrics_set.evaluate(preds_all, y_train)
 
         # asserts
-        k1 = (sm('X_train', 'init'), sm('feat_extraction_0', 'feat_extraction', True), sm('X_train', 'init'),
-              sm('y_train', 'init'),
-              sm('DT', 'modeling'), sm('y_train', 'init'), sm('r2', 'hard_metrics'))
-        assert k1 in hard_metrics, 'hard metrics should have ' + str(k1) + ' as key'
-        assert hard_metrics[k1] > 0.99 # 0.9997246132375425
+        k1 = (
+            sm("X_train", "init"),
+            sm("feat_extraction_0", "feat_extraction", True),
+            sm("X_train", "init"),
+            sm("y_train", "init"),
+            sm("DT", "modeling"),
+            sm("y_train", "init"),
+            sm("r2", "hard_metrics"),
+        )
+        assert k1 in hard_metrics, "hard metrics should have " + str(k1) + " as key"
+        assert hard_metrics[k1] > 0.99  # 0.9997246132375425
         assert PREV_KEY in hard_metrics
         assert len(hard_metrics.keys()) == 5
 
     def test_feature_importance(self):
-        """Simplest synthetic pipeline for feature importance
-        """
+        """Simplest synthetic pipeline for feature importance"""
         # initialize data
         np.random.seed(13)
-        X, y = sklearn.datasets.make_classification(n_samples=50, n_features=5)
-        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)  # ex. with another split?
-        X_train, X_test, y_train, y_test = init_args((X_train, X_test, y_train, y_test),
-                                                     names=['X_train', 'X_test', 'y_train',
-                                                            'y_test'])  # optionally provide names for each of these
+        X, y = make_classification(n_samples=50, n_features=5)
+        X_train, X_test, y_train, y_test = train_test_split(
+            X, y, random_state=42
+        )  # ex. with another split?
+        X_train, X_test, y_train, y_test = init_args(
+            (X_train, X_test, y_train, y_test),
+            names=["X_train", "X_test", "y_train", "y_test"],
+        )  # optionally provide names for each of these
 
         # subsample data
-        subsampling_set = build_vset('subsampling', sklearn.utils.resample,
-                                     param_dict={'random_state': list(range(3))},
-                                     n_samples=20)
+        subsampling_set = build_vset(
+            "subsampling",
+            resample,
+            param_dict={"random_state": list(range(3))},
+            n_samples=20,
+        )
         X_trains, y_trains = subsampling_set(X_train, y_train)
 
         # fit models
-        modeling_set = Vset(name='modeling',
-                            vfuncs=[LogisticRegression(max_iter=1000, tol=0.1),
-                                     DecisionTreeClassifier()],
-                            vfunc_keys=["LR", "DT"])
+        modeling_set = Vset(
+            name="modeling",
+            vfuncs=[
+                LogisticRegression(max_iter=1000, tol=0.1),
+                DecisionTreeClassifier(),
+            ],
+            vfunc_keys=["LR", "DT"],
+        )
 
         modeling_set.fit(X_trains, y_trains)
-        preds_test = modeling_set.predict(X_test)
 
         # get metrics
-        feature_importance_set = Vset(name='feature_importance', vfuncs=[permutation_importance],
-                                      vfunc_keys=["permutation_importance"])
-        importances = feature_importance_set.evaluate(modeling_set.fitted_vfuncs, X_test, y_test)
+        feature_importance_set = Vset(
+            name="feature_importance",
+            vfuncs=[permutation_importance],
+            vfunc_keys=["permutation_importance"],
+        )
+        importances = feature_importance_set.evaluate(
+            modeling_set.fitted_vfuncs, X_test, y_test
+        )
 
         # asserts
-        k1 = (sm('X_train', 'init'), sm(('func=resample', 'random_state=0'), 'subsampling'),
-              sm('y_train', 'init'), sm('LR', 'modeling'), sm('X_test', 'init'),
-              sm('y_test', 'init'), sm('permutation_importance', 'feature_importance'))
-        assert k1 in importances, 'hard metrics should have ' + str(k1) + ' as key'
+        k1 = (
+            sm("X_train", "init"),
+            sm(("func=resample", "random_state=0"), "subsampling"),
+            sm("y_train", "init"),
+            sm("LR", "modeling"),
+            sm("X_test", "init"),
+            sm("y_test", "init"),
+            sm("permutation_importance", "feature_importance"),
+        )
+        assert k1 in importances, "hard metrics should have " + str(k1) + " as key"
         assert PREV_KEY in importances
         assert len(importances.keys()) == 7
 
     def test_repeated_subsampling(self):
         np.random.seed(13)
-        X, y = sklearn.datasets.make_classification(n_samples=50, n_features=5)
+        X, y = make_classification(n_samples=50, n_features=5)
         X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
-        X_train, X_test, y_train, y_test = init_args((X_train, X_test, y_train, y_test),
-                                                     names=['X_train', 'X_test', 'y_train', 'y_test'])
+        X_train, X_test, y_train, y_test = init_args(
+            (X_train, X_test, y_train, y_test),
+            names=["X_train", "X_test", "y_train", "y_test"],
+        )
 
         # subsample data
-        subsampling_set = build_vset('subsampling', sklearn.utils.resample,
-                                     param_dict={'random_state': list(range(3))},
-                                     n_samples=20)
+        subsampling_set = build_vset(
+            "subsampling",
+            resample,
+            param_dict={"random_state": list(range(3))},
+            n_samples=20,
+        )
         X_trains, y_trains = subsampling_set(X_train, y_train)
         X_tests, y_tests = subsampling_set(X_test, y_test)
 
-        modeling_set = Vset(name='modeling',
-                            vfuncs=[LogisticRegression(max_iter=1000, tol=0.1),
-                                     DecisionTreeClassifier()],
-                            vfunc_keys=["LR", "DT"])
+        modeling_set = Vset(
+            name="modeling",
+            vfuncs=[
+                LogisticRegression(max_iter=1000, tol=0.1),
+                DecisionTreeClassifier(),
+            ],
+            vfunc_keys=["LR", "DT"],
+        )
 
         modeling_set.fit(X_trains, y_trains)
         preds_test = modeling_set.predict(X_tests)
@@ -193,41 +256,41 @@ def test_repeated_subsampling(self):
         assert len(preds_test.keys()) == 19
 
     def test_lazy_eval(self):
-        def f(arg_name: str = '', i: int = 0):
-            return arg_name, f'f_iter={i}'
+        def f(arg_name: str = "", i: int = 0):
+            return arg_name, f"f_iter={i}"
 
         f_vfuncs = [partial(f, i=i) for i in range(3)]
-        f_arg = init_args(('f_arg',), names=['f_init'])[0]
+        f_arg = init_args(("f_arg",), names=["f_init"])[0]
 
-        f_set = Vset('f', vfuncs=f_vfuncs)
-        f_lazy_set = Vset('f', vfuncs=f_vfuncs, lazy=True)
+        f_set = Vset("f", vfuncs=f_vfuncs)
+        f_lazy_set = Vset("f", vfuncs=f_vfuncs, lazy=True)
 
         f_res = f_set(f_arg)
         f_lazy_res = f_lazy_set(f_arg)
 
         assert_equal(f_res.keys(), f_lazy_res.keys())
 
-        def g(tup, arg_name: str = '', i: int = 0):
-            return tup, arg_name, f'g_iter={i}'
+        def g(tup, arg_name: str = "", i: int = 0):
+            return tup, arg_name, f"g_iter={i}"
 
         g_vfuncs = [partial(g, i=i) for i in range(2)]
-        g_arg = init_args(('g_arg',), names=['g_init'])[0]
+        g_arg = init_args(("g_arg",), names=["g_init"])[0]
 
-        g_set = Vset('g', vfuncs=g_vfuncs)
-        g_lazy_set = Vset('g', vfuncs=g_vfuncs, lazy=True)
+        g_set = Vset("g", vfuncs=g_vfuncs)
+        g_lazy_set = Vset("g", vfuncs=g_vfuncs, lazy=True)
 
         g_res = g_set(f_res, g_arg, n_out=1)
         g_lazy_res = g_lazy_set(f_lazy_res, g_arg, n_out=1)
 
         assert_equal(g_res.keys(), g_lazy_res.keys())
 
-        def h(tup, arg_name: str = '', i: int = 0):
-            return tup, arg_name, f'h_iter={i}'
+        def h(tup, arg_name: str = "", i: int = 0):
+            return tup, arg_name, f"h_iter={i}"
 
         h_vfuncs = [partial(h, i=i) for i in range(2)]
-        h_arg = init_args(('h_arg',), names=['h_init'])[0]
+        h_arg = init_args(("h_arg",), names=["h_init"])[0]
 
-        h_set = Vset('h', vfuncs=h_vfuncs)
+        h_set = Vset("h", vfuncs=h_vfuncs)
 
         h_res = h_set(g_res, h_arg, n_out=1)
         h_lazy_res = h_set(g_lazy_res, h_arg, n_out=1)
@@ -248,12 +311,14 @@ def test_caching(self):
         try:
             np.random.seed(13)
             X, _ = make_classification(n_samples=50, n_features=5)
-            X = init_args([X], names=['X'])[0]
+            X = init_args([X], names=["X"])[0]
 
             subsampling_funcs = [partial(costly_compute, row_index=np.arange(25))]
 
-            uncached_set = Vset(name='subsampling', vfuncs=subsampling_funcs)
-            cached_set = Vset(name='subsampling', vfuncs=subsampling_funcs, cache_dir='./')
+            uncached_set = Vset(name="subsampling", vfuncs=subsampling_funcs)
+            cached_set = Vset(
+                name="subsampling", vfuncs=subsampling_funcs, cache_dir="./"
+            )
 
             # this always takes about 1 seconds
             begin = time.time()
@@ -265,47 +330,68 @@ def test_caching(self):
             cached_set.fit(X)
             assert time.time() - begin >= 1
 
-            assert_equal(uncached_set.fitted_vfuncs.keys(), cached_set.fitted_vfuncs.keys())
+            assert_equal(
+                uncached_set.fitted_vfuncs.keys(), cached_set.fitted_vfuncs.keys()
+            )
 
             # this should be very fast because it's using the already cached results
-            cached_set2 = Vset(name='subsampling', vfuncs=subsampling_funcs, cache_dir='./')
+            cached_set2 = Vset(
+                name="subsampling", vfuncs=subsampling_funcs, cache_dir="./"
+            )
             begin = time.time()
             cached_set2.fit(X)
             assert time.time() - begin < 1
-            assert_equal(uncached_set.fitted_vfuncs.keys(), cached_set2.fitted_vfuncs.keys())
+            assert_equal(
+                uncached_set.fitted_vfuncs.keys(), cached_set2.fitted_vfuncs.keys()
+            )
 
         finally:
             # clean up
-            rmtree('./joblib')
+            rmtree("./joblib")
 
+    @pytest.mark.skipif(sys.platform == "win32", reason="Does not work on Windows.")
     def test_mlflow_tracking(self, tmp_path):
         try:
-            runs_path = os.path.join(tmp_path, 'mlruns')
+            runs_path = os.path.join(tmp_path, "mlruns")
             np.random.seed(13)
             X, y = make_classification(n_samples=50, n_features=5)
-            X_train, X_test, y_train, y_test = init_args(train_test_split(X, y, random_state=42),
-                                                         names=['X_train', 'X_test', 'y_train', 'y_test'])
+            X_train, X_test, y_train, y_test = init_args(
+                train_test_split(X, y, random_state=42),
+                names=["X_train", "X_test", "y_train", "y_test"],
+            )
             # fit models
-            modeling_set = Vset(name='modeling',
-                                vfuncs=[LogisticRegression(C=1, max_iter=1000, tol=0.1)],
-                                vfunc_keys=["LR"])
+            modeling_set = Vset(
+                name="modeling",
+                vfuncs=[LogisticRegression(C=1, max_iter=1000, tol=0.1)],
+                vfunc_keys=["LR"],
+            )
 
             _ = modeling_set.fit(X_train, y_train)
             preds_test = modeling_set.predict(X_test)
-            hard_metrics_set = Vset(name='hard_metrics',
-                                    vfuncs=[accuracy_score, balanced_accuracy_score],
-                                    vfunc_keys=["Acc", "Bal_Acc"],
-                                    tracking_dir=runs_path)
-            hard_metrics = hard_metrics_set.evaluate(y_test, preds_test)
+            hard_metrics_set = Vset(
+                name="hard_metrics",
+                vfuncs=[accuracy_score, balanced_accuracy_score],
+                vfunc_keys=["Acc", "Bal_Acc"],
+                tracking_dir=runs_path,
+            )
+            df = dict_to_df(hard_metrics_set.evaluate(preds_test, y_test))
             runs_path = os.path.join(runs_path, hard_metrics_set._exp_id)
             assert os.path.isdir(runs_path)
             assert len(os.listdir(runs_path)) == 2
-            runs_path = os.path.join(runs_path, [d for d in os.listdir(runs_path) if d != 'meta.yaml'][0])
-            runs_path = os.path.join(runs_path, 'metrics')
-            with open(os.path.join(runs_path, 'Acc')) as acc:
-                assert len(acc.read().split(" ")) == 3
-            with open(os.path.join(runs_path, 'Bal_Acc')) as bal_acc:
-                assert len(bal_acc.read().split(" ")) == 3
+            runs_path = os.path.join(
+                runs_path, [d for d in os.listdir(runs_path) if d != "meta.yaml"][0]
+            )
+            runs_path = os.path.join(runs_path, "metrics")
+            with open(os.path.join(runs_path, "Acc")) as acc:
+                acc_split = acc.read().split(" ")
+            assert len(acc_split) == 3
+            acc_from_df = df["out"][df["hard_metrics"] == "Acc"]
+            assert np.isclose(float(acc_split[1]), acc_from_df)
+            with open(os.path.join(runs_path, "Bal_Acc")) as bal_acc:
+                bal_acc_split = bal_acc.read().split(" ")
+            assert len(bal_acc_split) == 3
+            bal_acc_from_df = df["out"][df["hard_metrics"] == "Bal_Acc"]
+            assert np.isclose(float(bal_acc_split[1]), bal_acc_from_df)
         finally:
             # clean up
             rmtree(tmp_path)
@@ -320,19 +406,25 @@ def fun1(a, b=1):
         def fun2(a, b=1):
             return a * b
 
-        data_param_dict = {'n': [1, 2, 3]}
-        data_vset = build_vset('data', gen_data, param_dict=data_param_dict, reps=5, lazy=True)
+        data_param_dict = {"n": [1, 2, 3]}
+        data_vset = build_vset(
+            "data", gen_data, param_dict=data_param_dict, reps=5, lazy=True
+        )
 
         assert len(data_vset.vfuncs) == 15
 
-        fun_param_dict = {'b': [1, 2, 3]}
-        fun1_vset = build_vset('fun1', fun1, param_dict=fun_param_dict, lazy=True)
-        fun1_vset_async = build_vset('fun1', fun1, param_dict=fun_param_dict, lazy=True, is_async=True)
-        fun2_vset = build_vset('fun2', fun2, param_dict=fun_param_dict)
-        fun2_vset_async = build_vset('fun2', fun2, param_dict=fun_param_dict, is_async=True)
+        fun_param_dict = {"b": [1, 2, 3]}
+        fun1_vset = build_vset("fun1", fun1, param_dict=fun_param_dict, lazy=True)
+        fun1_vset_async = build_vset(
+            "fun1", fun1, param_dict=fun_param_dict, lazy=True, is_async=True
+        )
+        fun2_vset = build_vset("fun2", fun2, param_dict=fun_param_dict)
+        fun2_vset_async = build_vset(
+            "fun2", fun2, param_dict=fun_param_dict, is_async=True
+        )
 
         np.random.seed(13)
-        ray.init(local_mode=True)
+        ray.init(num_cpus=1, ignore_reinit_error=True)
 
         data = data_vset()
 
@@ -363,26 +455,30 @@ class learner:
             def fit(self, a):
                 self.a = a
                 return self
+
             def transform(self, b):
                 return self.a + b
+
             def predict(self, x):
-                return self.a*x
+                return self.a * x
+
             def predict_proba(self, x):
-                y = np.exp(-self.a*x)
+                y = np.exp(-self.a * x)
                 return 1 / (1 + y)
 
         vset = Vset("learner", [learner()], is_async=True, lazy=True)
-        vset.fit(*init_args([.4]))
+        vset.fit(*init_args([0.4]))
         data = init_args([np.array([1, 2, 3])])[0]
         transformed = vset.transform(data)
         preds = vset.predict(transformed)
         preds_proba = vset.predict_proba(transformed)
 
         assert_equal(list(transformed.values())[0](), [1.4, 2.4, 3.4])
-        assert_equal(list(preds.values())[0](), np.array([1.4, 2.4, 3.4])*.4)
-        assert_equal(list(preds_proba.values())[0](),
-                     1 / (1 + np.exp(-np.array([1.4, 2.4, 3.4])*.4)))
-
+        assert_equal(list(preds.values())[0](), np.array([1.4, 2.4, 3.4]) * 0.4)
+        assert_equal(
+            list(preds_proba.values())[0](),
+            1 / (1 + np.exp(-np.array([1.4, 2.4, 3.4]) * 0.4)),
+        )
 
     def test_lazy_async_two_step(self):
         def add_a(arr, a=0.4):
@@ -392,10 +488,12 @@ class learner:
             def fit(self, a):
                 self.mean = sum(a) / len(a)
                 return self
+
             def predict(self, x):
-                return self.mean*x
+                return self.mean * x
+
             def predict_proba(self, x):
-                y = np.exp(-self.mean*x)
+                y = np.exp(-self.mean * x)
                 return 1 / (1 + y)
 
         add_a_vset = Vset("add_a", [add_a], lazy=True)
@@ -407,8 +505,11 @@ def predict_proba(self, x):
         preds_proba = vset.predict_proba(transformed)
 
         assert_equal(list(transformed.values())[0].value, [1.4, 2.4, 3.4])
-        assert_equal(list(preds.values())[0], np.array([1.4*2, 2.4*2, 3.4*2]))
-        assert_equal(list(preds_proba.values())[0], 1 / (1 + np.exp(-np.array([1.4*2, 2.4*2, 3.4*2]))))
+        assert_equal(list(preds.values())[0], np.array([1.4 * 2, 2.4 * 2, 3.4 * 2]))
+        assert_equal(
+            list(preds_proba.values())[0],
+            1 / (1 + np.exp(-np.array([1.4 * 2, 2.4 * 2, 3.4 * 2]))),
+        )
 
 
 def costly_compute(data, row_index=0):
diff --git a/tests/test_utils.py b/tests/test_utils.py
index c2716ce..9a98b19 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,928 +1,1870 @@
+import numpy as np
 import pandas as pd
 import pytest
 from numpy.testing import assert_equal
 
-from vflow.utils import *
 from vflow.subkey import Subkey as sm
+from vflow.utils import (
+    PREV_KEY,
+    apply_vfuncs,
+    combine_dicts,
+    dict_to_df,
+    perturbation_stats,
+    to_list,
+)
 
 
 @pytest.mark.parametrize(
-    'in_dicts,out_dict',
+    "in_dicts,out_dict",
     [
         # first or second dict has only one key
         (
-                # in_dicts
-                [
-                    {
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): 'RF_fitted',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('LR', 'modeling')): 'LR_fitted',
-                    },
-                    {(sm('X_test', 'init'),): 'X_test_data'}
-                ],
-                # out_dict
+            # in_dicts
+            [
                 {
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling'), sm('X_test', 'init')): (
-                            'RF_fitted', 'X_test_data'),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('LR', 'modeling'), sm('X_test', 'init')): (
-                            'LR_fitted', 'X_test_data')
-                }
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("RF", "modeling"),
+                    ): "RF_fitted",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("LR", "modeling"),
+                    ): "LR_fitted",
+                },
+                {(sm("X_test", "init"),): "X_test_data"},
+            ],
+            # out_dict
+            {
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                ): ("RF_fitted", "X_test_data"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("LR", "modeling"),
+                    sm("X_test", "init"),
+                ): ("LR_fitted", "X_test_data"),
+            },
         ),
         (
-                # in_dicts
-                [
-                    {
-                        PREV_KEY: ('prev_0', 'prev_1',),
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling'), sm('X_test', 'init')): [
-                            'RF_fitted', 'X_test_data'],
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('LR', 'modeling'), sm('X_test', 'init')): [
-                            'LR_fitted', 'X_test_data']
-                    },
-                    {
-                        (sm('y_test', 'init'),): 'y_test_data', (sm('y_test', 'init'),): 'y_test_data',
-                        PREV_KEY: ('prev_2',),
-                    }
-                ],
-                # out_dict
+            # in_dicts
+            [
                 {
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling'), sm('X_test', 'init'),
-                     sm('y_test', 'init')): (
-                            ['RF_fitted', 'X_test_data'], 'y_test_data'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('LR', 'modeling'), sm('X_test', 'init'),
-                     sm('y_test', 'init')): (
-                            ['LR_fitted', 'X_test_data'], 'y_test_data'
-                    )
-                }
+                    PREV_KEY: (
+                        "prev_0",
+                        "prev_1",
+                    ),
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("RF", "modeling"),
+                        sm("X_test", "init"),
+                    ): ["RF_fitted", "X_test_data"],
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("LR", "modeling"),
+                        sm("X_test", "init"),
+                    ): ["LR_fitted", "X_test_data"],
+                },
+                {
+                    (sm("y_test", "init"),): "y_test_data",
+                    PREV_KEY: ("prev_2",),
+                },
+            ],
+            # out_dict
+            {
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                ): (["RF_fitted", "X_test_data"], "y_test_data"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("LR", "modeling"),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                ): (["LR_fitted", "X_test_data"], "y_test_data"),
+            },
         ),
         (
-                # in_dicts
-                [
-                    {(sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_0', 'subsample'),
-                      sm('RF', 'modeling')): 'RF_fitted_0',
-                     (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_1', 'subsample'),
-                      sm('RF', 'modeling')): 'RF_fitted_1',
-                     (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_0', 'subsample'),
-                      sm('LR', 'modeling')): 'LR_fitted_0',
-                     (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_1', 'subsample'),
-                      sm('LR', 'modeling')): 'LR_fitted_1'},
-                    {(sm('X_test', 'init'),): 'X_test_data'},
-                    {(sm('y_test', 'init'),): 'y_test_data'}
-
-                ],
-                # out_dict
+            # in_dicts
+            [
                 {
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_0', 'subsample'),
-                     sm('RF', 'modeling'), sm('X_test', 'init'), sm('y_test', 'init')): (
-                            'RF_fitted_0', 'X_test_data', 'y_test_data'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_1', 'subsample'),
-                     sm('RF', 'modeling'), sm('X_test', 'init'), sm('y_test', 'init')): (
-                            'RF_fitted_1', 'X_test_data', 'y_test_data'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_0', 'subsample'),
-                     sm('LR', 'modeling'), sm('X_test', 'init'), sm('y_test', 'init')): (
-                            'LR_fitted_0', 'X_test_data', 'y_test_data'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_1', 'subsample'),
-                     sm('LR', 'modeling'), sm('X_test', 'init'), sm('y_test', 'init')): (
-                            'LR_fitted_1', 'X_test_data', 'y_test_data'
-                    ),
-                }
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subsampling_0", "subsample"),
+                        sm("RF", "modeling"),
+                    ): "RF_fitted_0",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subsampling_1", "subsample"),
+                        sm("RF", "modeling"),
+                    ): "RF_fitted_1",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subsampling_0", "subsample"),
+                        sm("LR", "modeling"),
+                    ): "LR_fitted_0",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subsampling_1", "subsample"),
+                        sm("LR", "modeling"),
+                    ): "LR_fitted_1",
+                },
+                {(sm("X_test", "init"),): "X_test_data"},
+                {(sm("y_test", "init"),): "y_test_data"},
+            ],
+            # out_dict
+            {
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subsampling_0", "subsample"),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                ): ("RF_fitted_0", "X_test_data", "y_test_data"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subsampling_1", "subsample"),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                ): ("RF_fitted_1", "X_test_data", "y_test_data"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subsampling_0", "subsample"),
+                    sm("LR", "modeling"),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                ): ("LR_fitted_0", "X_test_data", "y_test_data"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subsampling_1", "subsample"),
+                    sm("LR", "modeling"),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                ): ("LR_fitted_1", "X_test_data", "y_test_data"),
+            },
         ),
         (
-                # in_dicts
-                [
-                    {
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_0', 'origin_0', True),
-                         sm('RF', 'modeling')): 'RF_fitted_0',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_1', 'origin_0', True),
-                         sm('RF', 'modeling')): 'RF_fitted_1',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_0', 'origin_0', True),
-                         sm('LR', 'modeling')): 'LR_fitted_0',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_1', 'origin_0', True),
-                         sm('LR', 'modeling')): 'LR_fitted_1',
-                    },
-                    {
-                        (sm('X_train', 'init'), sm('subsampling_0', 'origin_0', True)): 'X_train_data_0',
-                        (sm('X_train', 'init'), sm('subsampling_1', 'origin_0', True)): 'X_train_data_1',
-                    },
-                    {
-                        (sm('y_train', 'init'), sm('subsampling_0', 'origin_0', True)): 'y_train_data_0',
-                        (sm('y_train', 'init'), sm('subsampling_1', 'origin_0', True)): 'y_train_data_1',
-                    }
-                ],
-                # out_dict
+            # in_dicts
+            [
                 {
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_0', 'origin_0', True),
-                     sm('RF', 'modeling'), sm('X_train', 'init'), sm('y_train', 'init')): (
-                            'RF_fitted_0', 'X_train_data_0', 'y_train_data_0'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_1', 'origin_0', True),
-                     sm('RF', 'modeling'), sm('X_train', 'init'), sm('y_train', 'init')): (
-                            'RF_fitted_1', 'X_train_data_1', 'y_train_data_1'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_0', 'origin_0', True),
-                     sm('LR', 'modeling'), sm('X_train', 'init'), sm('y_train', 'init')): (
-                            'LR_fitted_0', 'X_train_data_0', 'y_train_data_0'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subsampling_1', 'origin_0', True),
-                     sm('LR', 'modeling'), sm('X_train', 'init'), sm('y_train', 'init')): (
-                            'LR_fitted_1', 'X_train_data_1', 'y_train_data_1'
-                    ),
-                }
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subsampling_0", "origin_0", True),
+                        sm("RF", "modeling"),
+                    ): "RF_fitted_0",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subsampling_1", "origin_0", True),
+                        sm("RF", "modeling"),
+                    ): "RF_fitted_1",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subsampling_0", "origin_0", True),
+                        sm("LR", "modeling"),
+                    ): "LR_fitted_0",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subsampling_1", "origin_0", True),
+                        sm("LR", "modeling"),
+                    ): "LR_fitted_1",
+                },
+                {
+                    (
+                        sm("X_train", "init"),
+                        sm("subsampling_0", "origin_0", True),
+                    ): "X_train_data_0",
+                    (
+                        sm("X_train", "init"),
+                        sm("subsampling_1", "origin_0", True),
+                    ): "X_train_data_1",
+                },
+                {
+                    (
+                        sm("y_train", "init"),
+                        sm("subsampling_0", "origin_0", True),
+                    ): "y_train_data_0",
+                    (
+                        sm("y_train", "init"),
+                        sm("subsampling_1", "origin_0", True),
+                    ): "y_train_data_1",
+                },
+            ],
+            # out_dict
+            {
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subsampling_0", "origin_0", True),
+                    sm("RF", "modeling"),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                ): ("RF_fitted_0", "X_train_data_0", "y_train_data_0"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subsampling_1", "origin_0", True),
+                    sm("RF", "modeling"),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                ): ("RF_fitted_1", "X_train_data_1", "y_train_data_1"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subsampling_0", "origin_0", True),
+                    sm("LR", "modeling"),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                ): ("LR_fitted_0", "X_train_data_0", "y_train_data_0"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subsampling_1", "origin_0", True),
+                    sm("LR", "modeling"),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                ): ("LR_fitted_1", "X_train_data_1", "y_train_data_1"),
+            },
         ),
         (
-                # in_dicts
-                [
-                    {
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling')): 'RF_fitted_00',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling')): 'RF_fitted_01',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling')): 'RF_fitted_10',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling')): 'RF_fitted_11',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling')): 'LR_fitted_00',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling')): 'LR_fitted_01',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling')): 'LR_fitted_10',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling')): 'LR_fitted_11'
-                    },
-                    {
-                        (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True)): 'X_test_data_00',
-                        (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True)): 'X_test_data_01',
-                        (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True)): 'X_test_data_10',
-                        (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True)): 'X_test_data_11'
-                    }
-                ],
-                # out_dict
+            # in_dicts
+            [
                 {
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): (
-                            'RF_fitted_00', 'X_test_data_00'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): (
-                            'RF_fitted_01', 'X_test_data_01'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): (
-                            'RF_fitted_10', 'X_test_data_10'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): (
-                            'RF_fitted_11', 'X_test_data_11'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): (
-                            'LR_fitted_00', 'X_test_data_00'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): (
-                            'LR_fitted_01', 'X_test_data_01'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): (
-                            'LR_fitted_10', 'X_test_data_10'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): (
-                            'LR_fitted_11', 'X_test_data_11'
-                    ),
-                }
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_fitted_00",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_fitted_01",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_fitted_10",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_fitted_11",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("LR", "modeling"),
+                    ): "LR_fitted_00",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("LR", "modeling"),
+                    ): "LR_fitted_01",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("LR", "modeling"),
+                    ): "LR_fitted_10",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("LR", "modeling"),
+                    ): "LR_fitted_11",
+                },
+                {
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                    ): "X_test_data_00",
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                    ): "X_test_data_01",
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                    ): "X_test_data_10",
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                    ): "X_test_data_11",
+                },
+            ],
+            # out_dict
+            {
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                ): ("RF_fitted_00", "X_test_data_00"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                ): ("RF_fitted_01", "X_test_data_01"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                ): ("RF_fitted_10", "X_test_data_10"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                ): ("RF_fitted_11", "X_test_data_11"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("LR", "modeling"),
+                    sm("X_test", "init"),
+                ): ("LR_fitted_00", "X_test_data_00"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("LR", "modeling"),
+                    sm("X_test", "init"),
+                ): ("LR_fitted_01", "X_test_data_01"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("LR", "modeling"),
+                    sm("X_test", "init"),
+                ): ("LR_fitted_10", "X_test_data_10"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("LR", "modeling"),
+                    sm("X_test", "init"),
+                ): ("LR_fitted_11", "X_test_data_11"),
+            },
         ),
         (
-                # in_dicts
-                [
-                    {
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling')): 'RF_fitted_00',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling')): 'RF_fitted_01',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling')): 'RF_fitted_10',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling')): 'RF_fitted_11',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling')): 'LR_fitted_00',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling')): 'LR_fitted_01',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling')): 'LR_fitted_10',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling')): 'LR_fitted_11'
-                    },
-                    {
-                        (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True)): 'X_test_data_00',
-                        (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True)): 'X_test_data_01',
-                        (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True)): 'X_test_data_10',
-                        (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True)): 'X_test_data_11'
-                    }
-                ],
-                # out_dict
+            # in_dicts
+            [
                 {
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): (
-                            'RF_fitted_00', 'X_test_data_00'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): (
-                            'RF_fitted_01', 'X_test_data_01'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): (
-                            'RF_fitted_10', 'X_test_data_10'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): (
-                            'RF_fitted_11', 'X_test_data_11'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): (
-                            'LR_fitted_00', 'X_test_data_00'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): (
-                            'LR_fitted_01', 'X_test_data_01'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): (
-                            'LR_fitted_10', 'X_test_data_10'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): (
-                            'LR_fitted_11', 'X_test_data_11'
-                    ),
-                }
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_fitted_00",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_fitted_01",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_fitted_10",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_fitted_11",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("LR", "modeling"),
+                    ): "LR_fitted_00",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("LR", "modeling"),
+                    ): "LR_fitted_01",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("LR", "modeling"),
+                    ): "LR_fitted_10",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("LR", "modeling"),
+                    ): "LR_fitted_11",
+                },
+                {
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                    ): "X_test_data_00",
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                    ): "X_test_data_01",
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                    ): "X_test_data_10",
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                    ): "X_test_data_11",
+                },
+            ],
+            # out_dict
+            {
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                ): ("RF_fitted_00", "X_test_data_00"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                ): ("RF_fitted_01", "X_test_data_01"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                ): ("RF_fitted_10", "X_test_data_10"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                ): ("RF_fitted_11", "X_test_data_11"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("LR", "modeling"),
+                    sm("X_test", "init"),
+                ): ("LR_fitted_00", "X_test_data_00"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("LR", "modeling"),
+                    sm("X_test", "init"),
+                ): ("LR_fitted_01", "X_test_data_01"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("LR", "modeling"),
+                    sm("X_test", "init"),
+                ): ("LR_fitted_10", "X_test_data_10"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("LR", "modeling"),
+                    sm("X_test", "init"),
+                ): ("LR_fitted_11", "X_test_data_11"),
+            },
         ),
         (
-                # in_dicts
-                [
-                    {
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): [
-                            'RF_fitted_00', 'X_test_data_00'
-                        ],
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): [
-                            'RF_fitted_01', 'X_test_data_01'
-                        ],
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): [
-                            'RF_fitted_10', 'X_test_data_10'
-                        ],
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init')): [
-                            'RF_fitted_11', 'X_test_data_11'
-                        ],
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): [
-                            'LR_fitted_00', 'X_test_data_00'
-                        ],
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): [
-                            'LR_fitted_01', 'X_test_data_01'
-                        ],
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): [
-                            'LR_fitted_10', 'X_test_data_10'
-                        ],
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init')): [
-                            'LR_fitted_11', 'X_test_data_11'
-                        ],
-                    },
-                    {
-                        (sm('y_test', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True)): 'y_test_data_00',
-                        (sm('y_test', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True)): 'y_test_data_01',
-                        (sm('y_test', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True)): 'y_test_data_10',
-                        (sm('y_test', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True)): 'y_test_data_11'
-                    }
-                ],
-                # out_dict
+            # in_dicts
+            [
                 {
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init'),
-                     sm('y_test', 'init')): (
-                            ['RF_fitted_00', 'X_test_data_00'], 'y_test_data_00'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init'),
-                     sm('y_test', 'init')): (
-                            ['RF_fitted_01', 'X_test_data_01'], 'y_test_data_01'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init'),
-                     sm('y_test', 'init')): (
-                            ['RF_fitted_10', 'X_test_data_10'], 'y_test_data_10'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('X_test', 'init'),
-                     sm('y_test', 'init')): (
-                            ['RF_fitted_11', 'X_test_data_11'], 'y_test_data_11'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init'),
-                     sm('y_test', 'init')): (
-                            ['LR_fitted_00', 'X_test_data_00'], 'y_test_data_00'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init'),
-                     sm('y_test', 'init')): (
-                            ['LR_fitted_01', 'X_test_data_01'], 'y_test_data_01'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init'),
-                     sm('y_test', 'init')): (
-                            ['LR_fitted_10', 'X_test_data_10'], 'y_test_data_10'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('LR', 'modeling'), sm('X_test', 'init'),
-                     sm('y_test', 'init')): (
-                            ['LR_fitted_11', 'X_test_data_11'], 'y_test_data_11'
-                    ),
-                }
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("RF", "modeling"),
+                        sm("X_test", "init"),
+                    ): ["RF_fitted_00", "X_test_data_00"],
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("RF", "modeling"),
+                        sm("X_test", "init"),
+                    ): ["RF_fitted_01", "X_test_data_01"],
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("RF", "modeling"),
+                        sm("X_test", "init"),
+                    ): ["RF_fitted_10", "X_test_data_10"],
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("RF", "modeling"),
+                        sm("X_test", "init"),
+                    ): ["RF_fitted_11", "X_test_data_11"],
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("LR", "modeling"),
+                        sm("X_test", "init"),
+                    ): ["LR_fitted_00", "X_test_data_00"],
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("LR", "modeling"),
+                        sm("X_test", "init"),
+                    ): ["LR_fitted_01", "X_test_data_01"],
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("LR", "modeling"),
+                        sm("X_test", "init"),
+                    ): ["LR_fitted_10", "X_test_data_10"],
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("LR", "modeling"),
+                        sm("X_test", "init"),
+                    ): ["LR_fitted_11", "X_test_data_11"],
+                },
+                {
+                    (
+                        sm("y_test", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                    ): "y_test_data_00",
+                    (
+                        sm("y_test", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                    ): "y_test_data_01",
+                    (
+                        sm("y_test", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                    ): "y_test_data_10",
+                    (
+                        sm("y_test", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                    ): "y_test_data_11",
+                },
+            ],
+            # out_dict
+            {
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                ): (["RF_fitted_00", "X_test_data_00"], "y_test_data_00"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                ): (["RF_fitted_01", "X_test_data_01"], "y_test_data_01"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                ): (["RF_fitted_10", "X_test_data_10"], "y_test_data_10"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                ): (["RF_fitted_11", "X_test_data_11"], "y_test_data_11"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("LR", "modeling"),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                ): (["LR_fitted_00", "X_test_data_00"], "y_test_data_00"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("LR", "modeling"),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                ): (["LR_fitted_01", "X_test_data_01"], "y_test_data_01"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("LR", "modeling"),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                ): (["LR_fitted_10", "X_test_data_10"], "y_test_data_10"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("LR", "modeling"),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                ): (["LR_fitted_11", "X_test_data_11"], "y_test_data_11"),
+            },
         ),
         (
-                # in_dicts
-                [
-                    {
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'),
-                         sm('y_test', 'init')): [
-                            ['RF_fitted_00', 'X_test_data_00'], 'y_test_data'
-                        ],
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'),
-                         sm('y_test', 'init')): [
-                            ['RF_fitted_01', 'X_test_data_01'], 'y_test_data'
-                        ],
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'),
-                         sm('y_test', 'init')): [
-                            ['RF_fitted_10', 'X_test_data_10'], 'y_test_data'
-                        ],
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'),
-                         sm('y_test', 'init')): [
-                            ['RF_fitted_11', 'X_test_data_11'], 'y_test_data'
-                        ],
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'),
-                         sm('y_test', 'init')): [
-                            ['LR_fitted_00', 'X_test_data_00'], 'y_test_data'
-                        ],
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'),
-                         sm('y_test', 'init')): [
-                            ['LR_fitted_01', 'X_test_data_01'], 'y_test_data'
-                        ],
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_0', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'),
-                         sm('y_test', 'init')): [
-                            ['LR_fitted_10', 'X_test_data_10'], 'y_test_data'
-                        ],
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                         sm('voxel_extract_1', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'),
-                         sm('y_test', 'init')): [
-                            ['LR_fitted_11', 'X_test_data_11'], 'y_test_data'
-                        ],
-                    },
-                    {
-                        (sm('LR', 'm_origin', True), sm('acc', 'metrics')): 'LR_acc_func',
-                        (sm('LR', 'm_origin', True), sm('bal_acc', 'metrics')): 'LR_bal_acc_func',
-                        (sm('RF', 'm_origin', True), sm('acc', 'metrics')): 'RF_acc_func',
-                        (sm('RF', 'm_origin', True), sm('feat_imp', 'metrics')): 'RF_feat_imp_func'
-                    }
-                ],
-                # out_dict
+            # in_dicts
+            [
                 {
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'),
-                     sm('y_test', 'init'), sm('acc', 'metrics')): (
-                            [['RF_fitted_00', 'X_test_data_00'], 'y_test_data'], 'RF_acc_func'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'),
-                     sm('y_test', 'init'), sm('feat_imp', 'metrics')): (
-                            [['RF_fitted_00', 'X_test_data_00'], 'y_test_data'], 'RF_feat_imp_func'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'),
-                     sm('y_test', 'init'), sm('acc', 'metrics')): (
-                            [['RF_fitted_01', 'X_test_data_01'], 'y_test_data'], 'RF_acc_func'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'),
-                     sm('y_test', 'init'), sm('feat_imp', 'metrics')): (
-                            [['RF_fitted_01', 'X_test_data_01'], 'y_test_data'], 'RF_feat_imp_func'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'),
-                     sm('y_test', 'init'), sm('acc', 'metrics')): (
-                            [['RF_fitted_10', 'X_test_data_10'], 'y_test_data'], 'RF_acc_func'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'),
-                     sm('y_test', 'init'), sm('feat_imp', 'metrics')): (
-                            [['RF_fitted_10', 'X_test_data_10'], 'y_test_data'], 'RF_feat_imp_func'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'),
-                     sm('y_test', 'init'), sm('acc', 'metrics')): (
-                            [['RF_fitted_11', 'X_test_data_11'], 'y_test_data'], 'RF_acc_func'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('RF', 'm_origin', True), sm('X_test', 'init'),
-                     sm('y_test', 'init'), sm('feat_imp', 'metrics')): (
-                            [['RF_fitted_11', 'X_test_data_11'], 'y_test_data'], 'RF_feat_imp_func'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'),
-                     sm('y_test', 'init'), sm('acc', 'metrics')): (
-                            [['LR_fitted_00', 'X_test_data_00'], 'y_test_data'], 'LR_acc_func'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'),
-                     sm('y_test', 'init'), sm('bal_acc', 'metrics')): (
-                            [['LR_fitted_00', 'X_test_data_00'], 'y_test_data'], 'LR_bal_acc_func'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'),
-                     sm('y_test', 'init'), sm('acc', 'metrics')): (
-                            [['LR_fitted_01', 'X_test_data_01'], 'y_test_data'], 'LR_acc_func'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_0', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'),
-                     sm('y_test', 'init'), sm('bal_acc', 'metrics')): (
-                            [['LR_fitted_01', 'X_test_data_01'], 'y_test_data'], 'LR_bal_acc_func'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'),
-                     sm('y_test', 'init'), sm('acc', 'metrics')): (
-                            [['LR_fitted_10', 'X_test_data_10'], 'y_test_data'], 'LR_acc_func'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_0', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'),
-                     sm('y_test', 'init'), sm('bal_acc', 'metrics')): (
-                            [['LR_fitted_10', 'X_test_data_10'], 'y_test_data'], 'LR_bal_acc_func'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'),
-                     sm('y_test', 'init'), sm('acc', 'metrics')): (
-                            [['LR_fitted_11', 'X_test_data_11'], 'y_test_data'], 'LR_acc_func'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('subgroup_1', 's_origin', True),
-                     sm('voxel_extract_1', 'v_origin', True), sm('LR', 'm_origin', True), sm('X_test', 'init'),
-                     sm('y_test', 'init'), sm('bal_acc', 'metrics')): (
-                            [['LR_fitted_11', 'X_test_data_11'], 'y_test_data'], 'LR_bal_acc_func'
-                    ),
-                }
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("RF", "m_origin", True),
+                        sm("X_test", "init"),
+                        sm("y_test", "init"),
+                    ): [["RF_fitted_00", "X_test_data_00"], "y_test_data"],
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("RF", "m_origin", True),
+                        sm("X_test", "init"),
+                        sm("y_test", "init"),
+                    ): [["RF_fitted_01", "X_test_data_01"], "y_test_data"],
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("RF", "m_origin", True),
+                        sm("X_test", "init"),
+                        sm("y_test", "init"),
+                    ): [["RF_fitted_10", "X_test_data_10"], "y_test_data"],
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("RF", "m_origin", True),
+                        sm("X_test", "init"),
+                        sm("y_test", "init"),
+                    ): [["RF_fitted_11", "X_test_data_11"], "y_test_data"],
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("LR", "m_origin", True),
+                        sm("X_test", "init"),
+                        sm("y_test", "init"),
+                    ): [["LR_fitted_00", "X_test_data_00"], "y_test_data"],
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("LR", "m_origin", True),
+                        sm("X_test", "init"),
+                        sm("y_test", "init"),
+                    ): [["LR_fitted_01", "X_test_data_01"], "y_test_data"],
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("LR", "m_origin", True),
+                        sm("X_test", "init"),
+                        sm("y_test", "init"),
+                    ): [["LR_fitted_10", "X_test_data_10"], "y_test_data"],
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("LR", "m_origin", True),
+                        sm("X_test", "init"),
+                        sm("y_test", "init"),
+                    ): [["LR_fitted_11", "X_test_data_11"], "y_test_data"],
+                },
+                {
+                    (sm("LR", "m_origin", True), sm("acc", "metrics")): "LR_acc_func",
+                    (
+                        sm("LR", "m_origin", True),
+                        sm("bal_acc", "metrics"),
+                    ): "LR_bal_acc_func",
+                    (sm("RF", "m_origin", True), sm("acc", "metrics")): "RF_acc_func",
+                    (
+                        sm("RF", "m_origin", True),
+                        sm("feat_imp", "metrics"),
+                    ): "RF_feat_imp_func",
+                },
+            ],
+            # out_dict
+            {
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("RF", "m_origin", True),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                    sm("acc", "metrics"),
+                ): ([["RF_fitted_00", "X_test_data_00"], "y_test_data"], "RF_acc_func"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("RF", "m_origin", True),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                    sm("feat_imp", "metrics"),
+                ): (
+                    [["RF_fitted_00", "X_test_data_00"], "y_test_data"],
+                    "RF_feat_imp_func",
+                ),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("RF", "m_origin", True),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                    sm("acc", "metrics"),
+                ): ([["RF_fitted_01", "X_test_data_01"], "y_test_data"], "RF_acc_func"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("RF", "m_origin", True),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                    sm("feat_imp", "metrics"),
+                ): (
+                    [["RF_fitted_01", "X_test_data_01"], "y_test_data"],
+                    "RF_feat_imp_func",
+                ),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("RF", "m_origin", True),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                    sm("acc", "metrics"),
+                ): ([["RF_fitted_10", "X_test_data_10"], "y_test_data"], "RF_acc_func"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("RF", "m_origin", True),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                    sm("feat_imp", "metrics"),
+                ): (
+                    [["RF_fitted_10", "X_test_data_10"], "y_test_data"],
+                    "RF_feat_imp_func",
+                ),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("RF", "m_origin", True),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                    sm("acc", "metrics"),
+                ): ([["RF_fitted_11", "X_test_data_11"], "y_test_data"], "RF_acc_func"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("RF", "m_origin", True),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                    sm("feat_imp", "metrics"),
+                ): (
+                    [["RF_fitted_11", "X_test_data_11"], "y_test_data"],
+                    "RF_feat_imp_func",
+                ),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("LR", "m_origin", True),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                    sm("acc", "metrics"),
+                ): ([["LR_fitted_00", "X_test_data_00"], "y_test_data"], "LR_acc_func"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("LR", "m_origin", True),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                    sm("bal_acc", "metrics"),
+                ): (
+                    [["LR_fitted_00", "X_test_data_00"], "y_test_data"],
+                    "LR_bal_acc_func",
+                ),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("LR", "m_origin", True),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                    sm("acc", "metrics"),
+                ): ([["LR_fitted_01", "X_test_data_01"], "y_test_data"], "LR_acc_func"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("LR", "m_origin", True),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                    sm("bal_acc", "metrics"),
+                ): (
+                    [["LR_fitted_01", "X_test_data_01"], "y_test_data"],
+                    "LR_bal_acc_func",
+                ),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("LR", "m_origin", True),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                    sm("acc", "metrics"),
+                ): ([["LR_fitted_10", "X_test_data_10"], "y_test_data"], "LR_acc_func"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("LR", "m_origin", True),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                    sm("bal_acc", "metrics"),
+                ): (
+                    [["LR_fitted_10", "X_test_data_10"], "y_test_data"],
+                    "LR_bal_acc_func",
+                ),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("LR", "m_origin", True),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                    sm("acc", "metrics"),
+                ): ([["LR_fitted_11", "X_test_data_11"], "y_test_data"], "LR_acc_func"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("LR", "m_origin", True),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                    sm("bal_acc", "metrics"),
+                ): (
+                    [["LR_fitted_11", "X_test_data_11"], "y_test_data"],
+                    "LR_bal_acc_func",
+                ),
+            },
         ),
         (
-                # in_dicts
-                [
-                    {
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_00',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_01',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_10',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_11',
-                    },
-                    {
-                        (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True)): 'X_test_data_0',
-                        (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True)): 'X_test_data_1'
-                    },
-                    {
-                        (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True)): 'y_test_data_00',
-                        (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True)): 'y_test_data_01',
-                        (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True)): 'y_test_data_10',
-                        (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True)): 'y_test_data_11',
-                    }
-                ],
-                # out_dict
+            # in_dicts
+            [
                 {
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                     sm('subgroup_0', 's_origin', True),
-                     sm('RF', 'modeling'), sm('X_test', 'init'), sm('y_test', 'init')): (
-                            'RF_00', 'X_test_data_0', 'y_test_data_00'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                     sm('subgroup_1', 's_origin', True),
-                     sm('RF', 'modeling'), sm('X_test', 'init'), sm('y_test', 'init')): (
-                            'RF_01', 'X_test_data_1', 'y_test_data_01'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                     sm('subgroup_0', 's_origin', True),
-                     sm('RF', 'modeling'), sm('X_test', 'init'), sm('y_test', 'init')): (
-                            'RF_10', 'X_test_data_0', 'y_test_data_10'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                     sm('subgroup_1', 's_origin', True),
-                     sm('RF', 'modeling'), sm('X_test', 'init'), sm('y_test', 'init')): (
-                            'RF_11', 'X_test_data_1', 'y_test_data_11'
-                    )
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_00",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_01",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_10",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_11",
+                },
+                {
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "X_test_data_0",
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "X_test_data_1",
+                },
+                {
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "y_test_data_00",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "y_test_data_01",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "y_test_data_10",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "y_test_data_11",
                 },
+            ],
+            # out_dict
+            {
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                ): ("RF_00", "X_test_data_0", "y_test_data_00"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                ): ("RF_01", "X_test_data_1", "y_test_data_01"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                ): ("RF_10", "X_test_data_0", "y_test_data_10"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                    sm("y_test", "init"),
+                ): ("RF_11", "X_test_data_1", "y_test_data_11"),
+            },
         ),
         (
-                # in_dicts
-                [
-                    {
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_00',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_01',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_10',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_11',
-                    },
-                    {
-                        (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True)): 'y_test_data_00',
-                        (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True)): 'y_test_data_01',
-                        (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True)): 'y_test_data_10',
-                        (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True)): 'y_test_data_11',
-                    },
-                    {
-                        (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True)): 'X_test_data_0',
-                        (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True)): 'X_test_data_1'
-                    },
-                ],
-                # out_dict
+            # in_dicts
+            [
                 {
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                     sm('subgroup_0', 's_origin', True), sm('RF', 'modeling'), sm('y_test', 'init'),
-                     sm('X_test', 'init')): (
-                            'RF_00', 'y_test_data_00', 'X_test_data_0'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                     sm('subgroup_1', 's_origin', True), sm('RF', 'modeling'), sm('y_test', 'init'),
-                     sm('X_test', 'init')): (
-                            'RF_01', 'y_test_data_01', 'X_test_data_1'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                     sm('subgroup_0', 's_origin', True), sm('RF', 'modeling'), sm('y_test', 'init'),
-                     sm('X_test', 'init')): (
-                            'RF_10', 'y_test_data_10', 'X_test_data_0'
-                    ),
-                    (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                     sm('subgroup_1', 's_origin', True), sm('RF', 'modeling'), sm('y_test', 'init'),
-                     sm('X_test', 'init')): (
-                            'RF_11', 'y_test_data_11', 'X_test_data_1'
-                    )
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_00",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_01",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_10",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_11",
                 },
+                {
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "y_test_data_00",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "y_test_data_01",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "y_test_data_10",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "y_test_data_11",
+                },
+                {
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "X_test_data_0",
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "X_test_data_1",
+                },
+            ],
+            # out_dict
+            {
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("RF", "modeling"),
+                    sm("y_test", "init"),
+                    sm("X_test", "init"),
+                ): ("RF_00", "y_test_data_00", "X_test_data_0"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("RF", "modeling"),
+                    sm("y_test", "init"),
+                    sm("X_test", "init"),
+                ): ("RF_01", "y_test_data_01", "X_test_data_1"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("RF", "modeling"),
+                    sm("y_test", "init"),
+                    sm("X_test", "init"),
+                ): ("RF_10", "y_test_data_10", "X_test_data_0"),
+                (
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("RF", "modeling"),
+                    sm("y_test", "init"),
+                    sm("X_test", "init"),
+                ): ("RF_11", "y_test_data_11", "X_test_data_1"),
+            },
         ),
         (
-                # in_dicts
-                [
-                    {
-                        (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True)): 'X_test_data_0',
-                        (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True)): 'X_test_data_1'
-                    },
-                    {
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_00',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_01',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_10',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_11',
-                    },
-                    {
-                        (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True)): 'y_test_data_00',
-                        (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True)): 'y_test_data_01',
-                        (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True)): 'y_test_data_10',
-                        (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True)): 'y_test_data_11',
-                    }
-                ],
-                # out_dict
+            # in_dicts
+            [
                 {
-                    (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True), sm('X_train', 'init'),
-                     sm('y_train', 'init'),
-                     sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('y_test', 'init')): (
-                            'X_test_data_0', 'RF_00', 'y_test_data_00'
-                    ),
-                    (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True), sm('X_train', 'init'),
-                     sm('y_train', 'init'),
-                     sm('voxel_extract_0', 'v_origin', True), sm('RF', 'modeling'), sm('y_test', 'init')): (
-                            'X_test_data_1', 'RF_01', 'y_test_data_01'
-                    ),
-                    (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True), sm('X_train', 'init'),
-                     sm('y_train', 'init'),
-                     sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('y_test', 'init')): (
-                            'X_test_data_0', 'RF_10', 'y_test_data_10'
-                    ),
-                    (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True), sm('X_train', 'init'),
-                     sm('y_train', 'init'),
-                     sm('voxel_extract_1', 'v_origin', True), sm('RF', 'modeling'), sm('y_test', 'init')): (
-                            'X_test_data_1', 'RF_11', 'y_test_data_11'
-                    )
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "X_test_data_0",
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "X_test_data_1",
+                },
+                {
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_00",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_01",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_10",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_11",
+                },
+                {
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "y_test_data_00",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "y_test_data_01",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "y_test_data_10",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "y_test_data_11",
                 },
+            ],
+            # out_dict
+            {
+                (
+                    sm("X_test", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("RF", "modeling"),
+                    sm("y_test", "init"),
+                ): ("X_test_data_0", "RF_00", "y_test_data_00"),
+                (
+                    sm("X_test", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("RF", "modeling"),
+                    sm("y_test", "init"),
+                ): ("X_test_data_1", "RF_01", "y_test_data_01"),
+                (
+                    sm("X_test", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("RF", "modeling"),
+                    sm("y_test", "init"),
+                ): ("X_test_data_0", "RF_10", "y_test_data_10"),
+                (
+                    sm("X_test", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("RF", "modeling"),
+                    sm("y_test", "init"),
+                ): ("X_test_data_1", "RF_11", "y_test_data_11"),
+            },
         ),
         (
-                # in_dicts
-                [
-                    {
-                        (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True)): 'X_test_data_0',
-                        (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True)): 'X_test_data_1'
-                    },
-                    {
-                        (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True)): 'y_test_data_00',
-                        (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True)): 'y_test_data_01',
-                        (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True)): 'y_test_data_10',
-                        (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True)): 'y_test_data_11',
-                    },
-                    {
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_00',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_01',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_10',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_11',
-                    },
-                ],
-                # out_dict
+            # in_dicts
+            [
                 {
-                    (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True), sm('y_test', 'init'),
-                     sm('voxel_extract_0', 'v_origin', True), sm('X_train', 'init'), sm('y_train', 'init'),
-                     sm('RF', 'modeling')): (
-                            'X_test_data_0', 'y_test_data_00', 'RF_00'
-                    ),
-                    (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True), sm('y_test', 'init'),
-                     sm('voxel_extract_0', 'v_origin', True), sm('X_train', 'init'), sm('y_train', 'init'),
-                     sm('RF', 'modeling')): (
-                            'X_test_data_1', 'y_test_data_01', 'RF_01'
-                    ),
-                    (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True), sm('y_test', 'init'),
-                     sm('voxel_extract_1', 'v_origin', True), sm('X_train', 'init'), sm('y_train', 'init'),
-                     sm('RF', 'modeling')): (
-                            'X_test_data_0', 'y_test_data_10', 'RF_10'
-                    ),
-                    (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True), sm('y_test', 'init'),
-                     sm('voxel_extract_1', 'v_origin', True), sm('X_train', 'init'), sm('y_train', 'init'),
-                     sm('RF', 'modeling')): (
-                            'X_test_data_1', 'y_test_data_11', 'RF_11'
-                    )
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "X_test_data_0",
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "X_test_data_1",
+                },
+                {
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "y_test_data_00",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "y_test_data_01",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "y_test_data_10",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "y_test_data_11",
                 },
+                {
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_00",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_01",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_10",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_11",
+                },
+            ],
+            # out_dict
+            {
+                (
+                    sm("X_test", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("y_test", "init"),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                ): ("X_test_data_0", "y_test_data_00", "RF_00"),
+                (
+                    sm("X_test", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("y_test", "init"),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                ): ("X_test_data_1", "y_test_data_01", "RF_01"),
+                (
+                    sm("X_test", "init"),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("y_test", "init"),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                ): ("X_test_data_0", "y_test_data_10", "RF_10"),
+                (
+                    sm("X_test", "init"),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("y_test", "init"),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                ): ("X_test_data_1", "y_test_data_11", "RF_11"),
+            },
         ),
         (
-                # in_dicts
-                [
-                    {
-                        (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True)): 'y_test_data_00',
-                        (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True)): 'y_test_data_01',
-                        (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True)): 'y_test_data_10',
-                        (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True)): 'y_test_data_11',
-                    },
-                    {
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_00',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_01',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_10',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_11',
-                    },
-                    {
-                        (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True)): 'X_test_data_0',
-                        (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True)): 'X_test_data_1'
-                    },
-                ],
-                # out_dict
+            # in_dicts
+            [
                 {
-                    (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), sm('subgroup_0', 's_origin', True),
-                     sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling'), sm('X_test', 'init')): (
-                            'y_test_data_00', 'RF_00', 'X_test_data_0'
-                    ),
-                    (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), sm('subgroup_1', 's_origin', True),
-                     sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling'), sm('X_test', 'init')): (
-                            'y_test_data_01', 'RF_01', 'X_test_data_1'
-                    ),
-                    (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), sm('subgroup_0', 's_origin', True),
-                     sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling'), sm('X_test', 'init')): (
-                            'y_test_data_10', 'RF_10', 'X_test_data_0'
-                    ),
-                    (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), sm('subgroup_1', 's_origin', True),
-                     sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling'), sm('X_test', 'init')): (
-                            'y_test_data_11', 'RF_11', 'X_test_data_1'
-                    )
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "y_test_data_00",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "y_test_data_01",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "y_test_data_10",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "y_test_data_11",
+                },
+                {
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_00",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_01",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_10",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_11",
+                },
+                {
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "X_test_data_0",
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "X_test_data_1",
                 },
+            ],
+            # out_dict
+            {
+                (
+                    sm("y_test", "init"),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                ): ("y_test_data_00", "RF_00", "X_test_data_0"),
+                (
+                    sm("y_test", "init"),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                ): ("y_test_data_01", "RF_01", "X_test_data_1"),
+                (
+                    sm("y_test", "init"),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                ): ("y_test_data_10", "RF_10", "X_test_data_0"),
+                (
+                    sm("y_test", "init"),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                    sm("X_test", "init"),
+                ): ("y_test_data_11", "RF_11", "X_test_data_1"),
+            },
         ),
         (
-                # in_dicts
-                [
-                    {
-                        (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True)): 'y_test_data_00',
-                        (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True)): 'y_test_data_01',
-                        (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True)): 'y_test_data_10',
-                        (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True)): 'y_test_data_11',
-                    },
-                    {
-                        (sm('X_test', 'init'), sm('subgroup_0', 's_origin', True)): 'X_test_data_0',
-                        (sm('X_test', 'init'), sm('subgroup_1', 's_origin', True)): 'X_test_data_1'
-                    },
-                    {
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_00',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_01',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True), sm('RF', 'modeling')): 'RF_10',
-                        (sm('X_train', 'init'), sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True), sm('RF', 'modeling')): 'RF_11',
-                    },
-                ],
-                # out_dict
+            # in_dicts
+            [
                 {
-                    (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), sm('subgroup_0', 's_origin', True),
-                     sm('X_test', 'init'), sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): (
-                            'y_test_data_00', 'X_test_data_0', 'RF_00'
-                    ),
-                    (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True), sm('subgroup_1', 's_origin', True),
-                     sm('X_test', 'init'), sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): (
-                            'y_test_data_01', 'X_test_data_1', 'RF_01'
-                    ),
-                    (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), sm('subgroup_0', 's_origin', True),
-                     sm('X_test', 'init'), sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): (
-                            'y_test_data_10', 'X_test_data_0', 'RF_10'
-                    ),
-                    (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True), sm('subgroup_1', 's_origin', True),
-                     sm('X_test', 'init'), sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): (
-                            'y_test_data_11', 'X_test_data_1', 'RF_11'
-                    )
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "y_test_data_00",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "y_test_data_01",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "y_test_data_10",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "y_test_data_11",
+                },
+                {
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "X_test_data_0",
+                    (
+                        sm("X_test", "init"),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "X_test_data_1",
+                },
+                {
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_00",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_01",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_10",
+                    (
+                        sm("X_train", "init"),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_11",
                 },
+            ],
+            # out_dict
+            {
+                (
+                    sm("y_test", "init"),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("X_test", "init"),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                ): ("y_test_data_00", "X_test_data_0", "RF_00"),
+                (
+                    sm("y_test", "init"),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("X_test", "init"),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                ): ("y_test_data_01", "X_test_data_1", "RF_01"),
+                (
+                    sm("y_test", "init"),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("X_test", "init"),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                ): ("y_test_data_10", "X_test_data_0", "RF_10"),
+                (
+                    sm("y_test", "init"),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("X_test", "init"),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                ): ("y_test_data_11", "X_test_data_1", "RF_11"),
+            },
         ),
         (
-                # in_dicts
-                [
-                    {
-                        (sm('X_test', 'init'), sm('feature_extraction_0', 'f_origin', True),
-                         sm('subgroup_0', 's_origin', True)): 'X_test_data_00',
-                        (sm('X_test', 'init'), sm('feature_extraction_0', 'f_origin', True),
-                         sm('subgroup_1', 's_origin', True)): 'X_test_data_01',
-                        (sm('X_test', 'init'), sm('feature_extraction_1', 'f_origin', True),
-                         sm('subgroup_0', 's_origin', True)): 'X_test_data_10',
-                        (sm('X_test', 'init'), sm('feature_extraction_1', 'f_origin', True),
-                         sm('subgroup_1', 's_origin', True)): 'X_test_data_11',
-                    },
-                    {
-                        (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True)): 'y_test_data_00',
-                        (sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True)): 'y_test_data_01',
-                        (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_0', 's_origin', True)): 'y_test_data_10',
-                        (sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('subgroup_1', 's_origin', True)): 'y_test_data_11',
-                    },
-                    {
-                        (sm('X_train', 'init'), sm('feature_extraction_0', 'f_origin', True),
-                         sm('subgroup_0', 's_origin', True),
-                         sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('RF', 'modeling')): 'RF_000',
-                        (sm('X_train', 'init'), sm('feature_extraction_0', 'f_origin', True),
-                         sm('subgroup_0', 's_origin', True),
-                         sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('RF', 'modeling')): 'RF_001',
-                        (sm('X_train', 'init'), sm('feature_extraction_0', 'f_origin', True),
-                         sm('subgroup_1', 's_origin', True),
-                         sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('RF', 'modeling')): 'RF_010',
-                        (sm('X_train', 'init'), sm('feature_extraction_0', 'f_origin', True),
-                         sm('subgroup_1', 's_origin', True),
-                         sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('RF', 'modeling')): 'RF_011',
-                        (sm('X_train', 'init'), sm('feature_extraction_1', 'f_origin', True),
-                         sm('subgroup_0', 's_origin', True),
-                         sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('RF', 'modeling')): 'RF_100',
-                        (sm('X_train', 'init'), sm('feature_extraction_1', 'f_origin', True),
-                         sm('subgroup_0', 's_origin', True),
-                         sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('RF', 'modeling')): 'RF_101',
-                        (sm('X_train', 'init'), sm('feature_extraction_1', 'f_origin', True),
-                         sm('subgroup_1', 's_origin', True),
-                         sm('y_train', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                         sm('RF', 'modeling')): 'RF_110',
-                        (sm('X_train', 'init'), sm('feature_extraction_1', 'f_origin', True),
-                         sm('subgroup_1', 's_origin', True),
-                         sm('y_train', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                         sm('RF', 'modeling')): 'RF_111',
-                    },
-                ],
-                # out_dict
+            # in_dicts
+            [
                 {
-                    (sm('X_test', 'init'), sm('feature_extraction_0', 'f_origin', True),
-                     sm('subgroup_0', 's_origin', True),
-                     sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                     sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): (
-                            'X_test_data_00', 'y_test_data_00', 'RF_000'),
-                    (sm('X_test', 'init'), sm('feature_extraction_0', 'f_origin', True),
-                     sm('subgroup_0', 's_origin', True),
-                     sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                     sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): (
-                            'X_test_data_00', 'y_test_data_10', 'RF_001'),
-                    (sm('X_test', 'init'), sm('feature_extraction_0', 'f_origin', True),
-                     sm('subgroup_1', 's_origin', True),
-                     sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                     sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): (
-                            'X_test_data_01', 'y_test_data_01', 'RF_010'),
-                    (sm('X_test', 'init'), sm('feature_extraction_0', 'f_origin', True),
-                     sm('subgroup_1', 's_origin', True),
-                     sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                     sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): (
-                            'X_test_data_01', 'y_test_data_11', 'RF_011'),
-                    (sm('X_test', 'init'), sm('feature_extraction_1', 'f_origin', True),
-                     sm('subgroup_0', 's_origin', True),
-                     sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                     sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): (
-                            'X_test_data_10', 'y_test_data_00', 'RF_100'),
-                    (sm('X_test', 'init'), sm('feature_extraction_1', 'f_origin', True),
-                     sm('subgroup_0', 's_origin', True),
-                     sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                     sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): (
-                            'X_test_data_10', 'y_test_data_10', 'RF_101'),
-                    (sm('X_test', 'init'), sm('feature_extraction_1', 'f_origin', True),
-                     sm('subgroup_1', 's_origin', True),
-                     sm('y_test', 'init'), sm('voxel_extract_0', 'v_origin', True),
-                     sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): (
-                            'X_test_data_11', 'y_test_data_01', 'RF_110'),
-                    (sm('X_test', 'init'), sm('feature_extraction_1', 'f_origin', True),
-                     sm('subgroup_1', 's_origin', True),
-                     sm('y_test', 'init'), sm('voxel_extract_1', 'v_origin', True),
-                     sm('X_train', 'init'), sm('y_train', 'init'), sm('RF', 'modeling')): (
-                            'X_test_data_11', 'y_test_data_11', 'RF_111'),
-
-                }
+                    (
+                        sm("X_test", "init"),
+                        sm("feature_extraction_0", "f_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "X_test_data_00",
+                    (
+                        sm("X_test", "init"),
+                        sm("feature_extraction_0", "f_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "X_test_data_01",
+                    (
+                        sm("X_test", "init"),
+                        sm("feature_extraction_1", "f_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "X_test_data_10",
+                    (
+                        sm("X_test", "init"),
+                        sm("feature_extraction_1", "f_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "X_test_data_11",
+                },
+                {
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "y_test_data_00",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "y_test_data_01",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                    ): "y_test_data_10",
+                    (
+                        sm("y_test", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                    ): "y_test_data_11",
+                },
+                {
+                    (
+                        sm("X_train", "init"),
+                        sm("feature_extraction_0", "f_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_000",
+                    (
+                        sm("X_train", "init"),
+                        sm("feature_extraction_0", "f_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_001",
+                    (
+                        sm("X_train", "init"),
+                        sm("feature_extraction_0", "f_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_010",
+                    (
+                        sm("X_train", "init"),
+                        sm("feature_extraction_0", "f_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_011",
+                    (
+                        sm("X_train", "init"),
+                        sm("feature_extraction_1", "f_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_100",
+                    (
+                        sm("X_train", "init"),
+                        sm("feature_extraction_1", "f_origin", True),
+                        sm("subgroup_0", "s_origin", True),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_101",
+                    (
+                        sm("X_train", "init"),
+                        sm("feature_extraction_1", "f_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_0", "v_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_110",
+                    (
+                        sm("X_train", "init"),
+                        sm("feature_extraction_1", "f_origin", True),
+                        sm("subgroup_1", "s_origin", True),
+                        sm("y_train", "init"),
+                        sm("voxel_extract_1", "v_origin", True),
+                        sm("RF", "modeling"),
+                    ): "RF_111",
+                },
+            ],
+            # out_dict
+            {
+                (
+                    sm("X_test", "init"),
+                    sm("feature_extraction_0", "f_origin", True),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("y_test", "init"),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                ): ("X_test_data_00", "y_test_data_00", "RF_000"),
+                (
+                    sm("X_test", "init"),
+                    sm("feature_extraction_0", "f_origin", True),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("y_test", "init"),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                ): ("X_test_data_00", "y_test_data_10", "RF_001"),
+                (
+                    sm("X_test", "init"),
+                    sm("feature_extraction_0", "f_origin", True),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("y_test", "init"),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                ): ("X_test_data_01", "y_test_data_01", "RF_010"),
+                (
+                    sm("X_test", "init"),
+                    sm("feature_extraction_0", "f_origin", True),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("y_test", "init"),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                ): ("X_test_data_01", "y_test_data_11", "RF_011"),
+                (
+                    sm("X_test", "init"),
+                    sm("feature_extraction_1", "f_origin", True),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("y_test", "init"),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                ): ("X_test_data_10", "y_test_data_00", "RF_100"),
+                (
+                    sm("X_test", "init"),
+                    sm("feature_extraction_1", "f_origin", True),
+                    sm("subgroup_0", "s_origin", True),
+                    sm("y_test", "init"),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                ): ("X_test_data_10", "y_test_data_10", "RF_101"),
+                (
+                    sm("X_test", "init"),
+                    sm("feature_extraction_1", "f_origin", True),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("y_test", "init"),
+                    sm("voxel_extract_0", "v_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                ): ("X_test_data_11", "y_test_data_01", "RF_110"),
+                (
+                    sm("X_test", "init"),
+                    sm("feature_extraction_1", "f_origin", True),
+                    sm("subgroup_1", "s_origin", True),
+                    sm("y_test", "init"),
+                    sm("voxel_extract_1", "v_origin", True),
+                    sm("X_train", "init"),
+                    sm("y_train", "init"),
+                    sm("RF", "modeling"),
+                ): ("X_test_data_11", "y_test_data_11", "RF_111"),
+            },
         ),
         (
-                # in_dicts
-                [
-                    {
-                        (sm('X_train', 'init'), sm('standardize_0', 's_origin', True)): 'X_train_0',
-                        (sm('X_train', 'init'), sm('standardize_1', 's_origin', True)): 'X_train_1',
-                    },
-                    {(sm('y_train', 'init'),): 'y_train_data'}
-                ],
-                # out_dict
+            # in_dicts
+            [
                 {
-                    (sm('X_train', 'init'), sm('standardize_0', 's_origin', True), sm('y_train', 'init')): (
-                            'X_train_0', 'y_train_data'),
-                    (sm('X_train', 'init'), sm('standardize_1', 's_origin', True), sm('y_train', 'init')): (
-                            'X_train_1', 'y_train_data')
-                }
+                    (
+                        sm("X_train", "init"),
+                        sm("standardize_0", "s_origin", True),
+                    ): "X_train_0",
+                    (
+                        sm("X_train", "init"),
+                        sm("standardize_1", "s_origin", True),
+                    ): "X_train_1",
+                },
+                {(sm("y_train", "init"),): "y_train_data"},
+            ],
+            # out_dict
+            {
+                (
+                    sm("X_train", "init"),
+                    sm("standardize_0", "s_origin", True),
+                    sm("y_train", "init"),
+                ): ("X_train_0", "y_train_data"),
+                (
+                    sm("X_train", "init"),
+                    sm("standardize_1", "s_origin", True),
+                    sm("y_train", "init"),
+                ): ("X_train_1", "y_train_data"),
+            },
         ),
-    ]
+    ],
 )
 class TestCombineDicts:
-
     def test_combine_dicts(self, in_dicts, out_dict):
         result_dict = combine_dicts(*in_dicts)
         print(result_dict)
@@ -930,173 +1872,326 @@ def test_combine_dicts(self, in_dicts, out_dict):
 
 
 @pytest.mark.parametrize(
-    'in_dicts,out_dict',
+    "in_dicts,out_dict",
     [
         (
-                # in_dicts
-                [
-                    # modules
-                    {
-                        (sm('module_0', 'm_origin'),): lambda x, y: x + y,
-                        (sm('module_1', 'm_origin'),): lambda x, y: x * y,
-                    },
-                    # data_dict
-                    {(sm('data', 'init'),): [2, 3]}
-                ],
-                # out_dict
+            # in_dicts
+            [
+                # modules
                 {
-                    (sm('data', 'init'), sm('module_0', 'm_origin')): 5,
-                    (sm('data', 'init'), sm('module_1', 'm_origin')): 6
-                }
+                    (sm("module_0", "m_origin"),): lambda x, y: x + y,
+                    (sm("module_1", "m_origin"),): lambda x, y: x * y,
+                },
+                # data_dict
+                {(sm("data", "init"),): [2, 3]},
+            ],
+            # out_dict
+            {
+                (sm("data", "init"), sm("module_0", "m_origin")): 5,
+                (sm("data", "init"), sm("module_1", "m_origin")): 6,
+            },
         ),
         (
-                # in_dicts
-                [
-                    # modules
-                    {
-                        (sm('group_0', 'g_origin', True), sm('module_0', 'm_origin'),): lambda x, y: x + y,
-                        (sm('group_1', 'g_origin', True), sm('module_1', 'm_origin'),): lambda x, y: x * y,
-                    },
-                    # data_dict
-                    {
-                        (sm('data', 'init'), sm('group_0', 'g_origin', True)): [np.array([1, 2, 3]),
-                                                                                np.array([4, 5, 6])],
-                        (sm('data', 'init'), sm('group_1', 'g_origin', True)): [np.array([1, 2, 3]),
-                                                                                np.array([4, 5, 6])],
-                    }
-                ],
-                # out_dict
+            # in_dicts
+            [
+                # modules
                 {
-                    (sm('data', 'init'), sm('group_0', 'g_origin', True), sm('module_0', 'm_origin')): np.array(
-                        [5, 7, 9]),
-                    (sm('data', 'init'), sm('group_1', 'g_origin', True), sm('module_1', 'm_origin')): np.array(
-                        [4, 10, 18]),
-                }
+                    (
+                        sm("group_0", "g_origin", True),
+                        sm("module_0", "m_origin"),
+                    ): lambda x, y: x
+                    + y,
+                    (
+                        sm("group_1", "g_origin", True),
+                        sm("module_1", "m_origin"),
+                    ): lambda x, y: x
+                    * y,
+                },
+                # data_dict
+                {
+                    (sm("data", "init"), sm("group_0", "g_origin", True)): [
+                        np.array([1, 2, 3]),
+                        np.array([4, 5, 6]),
+                    ],
+                    (sm("data", "init"), sm("group_1", "g_origin", True)): [
+                        np.array([1, 2, 3]),
+                        np.array([4, 5, 6]),
+                    ],
+                },
+            ],
+            # out_dict
+            {
+                (
+                    sm("data", "init"),
+                    sm("group_0", "g_origin", True),
+                    sm("module_0", "m_origin"),
+                ): np.array([5, 7, 9]),
+                (
+                    sm("data", "init"),
+                    sm("group_1", "g_origin", True),
+                    sm("module_1", "m_origin"),
+                ): np.array([4, 10, 18]),
+            },
         ),
         (
-                # in_dicts
-                [
-                    # modules
-                    {
-                        (sm('data', 'init'), sm('group_0', 'g_origin', True), sm('module_0', 'm_origin'),): lambda x,
-                                                                                                                   y: x + y,
-                        (sm('data', 'init'), sm('group_1', 'g_origin', True), sm('module_1', 'm_origin'),): lambda x,
-                                                                                                                   y: x * y,
-                    },
-                    # data_dict
-                    {
-                        (sm('data', 'init2'), sm('group_0', 'g_origin', True)): [np.array([1, 2, 3]),
-                                                                                 np.array([4, 5, 6])],
-                        (sm('data', 'init2'), sm('group_1', 'g_origin', True)): [np.array([1, 2, 3]),
-                                                                                 np.array([4, 5, 6])],
-                    }
-                ],
-                # out_dict
+            # in_dicts
+            [
+                # modules
+                {
+                    (
+                        sm("data", "init"),
+                        sm("group_0", "g_origin", True),
+                        sm("module_0", "m_origin"),
+                    ): lambda x, y: x
+                    + y,
+                    (
+                        sm("data", "init"),
+                        sm("group_1", "g_origin", True),
+                        sm("module_1", "m_origin"),
+                    ): lambda x, y: x
+                    * y,
+                },
+                # data_dict
                 {
-                    (sm('data', 'init2'), sm('group_0', 'g_origin', True), sm('data', 'init'),
-                     sm('module_0', 'm_origin')): np.array([5, 7, 9]),
-                    (sm('data', 'init2'), sm('group_1', 'g_origin', True), sm('data', 'init'),
-                     sm('module_1', 'm_origin')): np.array([4, 10, 18]),
+                    (sm("data", "init2"), sm("group_0", "g_origin", True)): [
+                        np.array([1, 2, 3]),
+                        np.array([4, 5, 6]),
+                    ],
+                    (sm("data", "init2"), sm("group_1", "g_origin", True)): [
+                        np.array([1, 2, 3]),
+                        np.array([4, 5, 6]),
+                    ],
                 },
+            ],
+            # out_dict
+            {
+                (
+                    sm("data", "init2"),
+                    sm("group_0", "g_origin", True),
+                    sm("data", "init"),
+                    sm("module_0", "m_origin"),
+                ): np.array([5, 7, 9]),
+                (
+                    sm("data", "init2"),
+                    sm("group_1", "g_origin", True),
+                    sm("data", "init"),
+                    sm("module_1", "m_origin"),
+                ): np.array([4, 10, 18]),
+            },
         ),
-    ]
+    ],
 )
 class TestApplyVfuncs:
-
     def test_apply_vfuncs(self, in_dicts, out_dict):
         result_dict = apply_vfuncs(*in_dicts)
         assert_equal(result_dict, out_dict)
 
-class TestUtils:
 
+class TestUtils:
     def test_to_list(self):
-        assert to_list((['x1', 'x2', 'x3'], ['y1', 'y2', 'y3'])) == [['x1', 'y1'], ['x2', 'y2'], ['x3', 'y3']]
-        assert to_list((['x1'], ['y1'])) == [['x1', 'y1']]
-        assert to_list((['x1', 'x2', 'x3'],)) == [['x1'], ['x2'], ['x3']]
-        assert to_list(('x1',)) == [['x1']]
-        assert to_list(('x1', 'y1')) == [['x1', 'y1']]
-        assert to_list(('x1', 'x2', 'x3', 'y1', 'y2', 'y3')) == [['x1', 'y1'], ['x2', 'y2'], ['x3', 'y3']]
+        assert to_list((["x1", "x2", "x3"], ["y1", "y2", "y3"])) == [
+            ["x1", "y1"],
+            ["x2", "y2"],
+            ["x3", "y3"],
+        ]
+        assert to_list((["x1"], ["y1"])) == [["x1", "y1"]]
+        assert to_list((["x1", "x2", "x3"],)) == [["x1"], ["x2"], ["x3"]]
+        assert to_list(("x1",)) == [["x1"]]
+        assert to_list(("x1", "y1")) == [["x1", "y1"]]
+        assert to_list(("x1", "x2", "x3", "y1", "y2", "y3")) == [
+            ["x1", "y1"],
+            ["x2", "y2"],
+            ["x3", "y3"],
+        ]
         with pytest.raises(ValueError):
-            to_list(('x1', 'x2', 'x3', 'y1', 'y2'))
+            to_list(("x1", "x2", "x3", "y1", "y2"))
 
     def test_dict_to_df(self):
-        in_dict_1 = {(sm('X_train', 'init'), sm('feat_extract_0', 'feat_extract'),
-                      sm('y_train', 'init'), sm('DT', 'modeling'), sm('acc', 'metrics')): 0.9,
-                     (sm('X_train', 'init'), sm('feat_extract_1', 'feat_extract'),
-                      sm('y_train', 'init'), sm('DT', 'modeling'), sm('acc', 'metrics')): 0.95}
-        out_df_1 = pd.DataFrame(data={'init-feat_extract': ['X_train', 'X_train'],
-                                      'feat_extract': ['feat_extract_0', 'feat_extract_1'],
-                                      'init-modeling': ['y_train', 'y_train'],
-                                      'modeling': ['DT', 'DT'],
-                                      'metrics': ['acc', 'acc'],
-                                      'out': [0.9, 0.95]})
-        in_dict_2 = {(sm('X_train', 'init'), sm('sample_0', 'sample'), sm('y_train', 'init'),
-                      sm(('k=10', 'e=1e-3'), 'modeling'), sm('s_0', 'stability')): 0.333,
-                     (sm('X_train', 'init'), sm('sample_0', 'sample'), sm('y_train', 'init'),
-                      sm(('k=10', 'e=1e-5'), 'modeling'), sm('s_0', 'stability')): 0.452}
-        out_df_2 = pd.DataFrame(data={'init-sample': ['X_train', 'X_train'],
-                                      'sample': ['sample_0', 'sample_0'],
-                                      'init-modeling': ['y_train', 'y_train'],
-                                      'k-modeling': ['10', '10'],
-                                      'e-modeling': ['1e-3', '1e-5'],
-                                      'stability': ['s_0', 's_0'],
-                                      'out': [0.333, 0.452]})
-        in_dict_3 = {(sm('X_train', 'init'), sm('sample_0', 'sample'), sm('y_train', 'init'),
-                      sm(('k=10', 'e=1e-3'), 'modeling'), sm('s_0', 'stability')): [0.333, 0.222],
-                     (sm('X_train', 'init'), sm('sample_0', 'sample'), sm('y_train', 'init'),
-                      sm(('k=10', 'e=1e-5'), 'modeling'), sm('s_0', 'stability')): [0.452, 0.322]}
-        out_df_3 = pd.DataFrame(data={'init-sample': ['X_train', 'X_train'],
-                                      'sample': ['sample_0', 'sample_0'],
-                                      'init-modeling': ['y_train', 'y_train'],
-                                      'modeling': [('k=10', 'e=1e-3'), ('k=10', 'e=1e-5')],
-                                      'stability': ['s_0', 's_0'],
-                                      'out': [[0.333, 0.222], [0.452, 0.322]],
-                                      'out-0': [0.333, 0.452],
-                                      'out-1': [0.222, 0.322]})
-        in_dict_4 = {(sm('X_train', 'init'), sm('sample_0', 'sample'), sm('y_train', 'init'),
-                      sm(('k=10', 'e=1e-3'), 'modeling'), sm('s_0', 'stability')): {'k1': 0.333, 'k2': 0.222},
-                     (sm('X_train', 'init'), sm('sample_0', 'sample'), sm('y_train', 'init'),
-                      sm(('k=10', 'e=1e-5'), 'modeling'), sm('s_0', 'stability')): {'k1': 0.452, 'k2': 0.322}}
-        out_df_4 = pd.DataFrame(data={'init-sample': ['X_train', 'X_train'],
-                                      'sample': ['sample_0', 'sample_0'],
-                                      'init-modeling': ['y_train', 'y_train'],
-                                      'modeling': [('k=10', 'e=1e-3'), ('k=10', 'e=1e-5')],
-                                      'stability': ['s_0', 's_0'],
-                                      'out': [{'k1': 0.333, 'k2': 0.222}, {'k1': 0.452, 'k2': 0.322}],
-                                      'out-k1': [0.333, 0.452],
-                                      'out-k2': [0.222, 0.322]})
+        in_dict_1 = {
+            (
+                sm("X_train", "init"),
+                sm("feat_extract_0", "feat_extract"),
+                sm("y_train", "init"),
+                sm("DT", "modeling"),
+                sm("acc", "metrics"),
+            ): 0.9,
+            (
+                sm("X_train", "init"),
+                sm("feat_extract_1", "feat_extract"),
+                sm("y_train", "init"),
+                sm("DT", "modeling"),
+                sm("acc", "metrics"),
+            ): 0.95,
+        }
+        out_df_1 = pd.DataFrame(
+            data={
+                "init-feat_extract": ["X_train", "X_train"],
+                "feat_extract": ["feat_extract_0", "feat_extract_1"],
+                "init-modeling": ["y_train", "y_train"],
+                "modeling": ["DT", "DT"],
+                "metrics": ["acc", "acc"],
+                "out": [0.9, 0.95],
+            }
+        )
+        in_dict_2 = {
+            (
+                sm("X_train", "init"),
+                sm("sample_0", "sample"),
+                sm("y_train", "init"),
+                sm(("k=10", "e=1e-3"), "modeling"),
+                sm("s_0", "stability"),
+            ): 0.333,
+            (
+                sm("X_train", "init"),
+                sm("sample_0", "sample"),
+                sm("y_train", "init"),
+                sm(("k=10", "e=1e-5"), "modeling"),
+                sm("s_0", "stability"),
+            ): 0.452,
+        }
+        out_df_2 = pd.DataFrame(
+            data={
+                "init-sample": ["X_train", "X_train"],
+                "sample": ["sample_0", "sample_0"],
+                "init-modeling": ["y_train", "y_train"],
+                "k-modeling": ["10", "10"],
+                "e-modeling": ["1e-3", "1e-5"],
+                "stability": ["s_0", "s_0"],
+                "out": [0.333, 0.452],
+            }
+        )
+        in_dict_3 = {
+            (
+                sm("X_train", "init"),
+                sm("sample_0", "sample"),
+                sm("y_train", "init"),
+                sm(("k=10", "e=1e-3"), "modeling"),
+                sm("s_0", "stability"),
+            ): [0.333, 0.222],
+            (
+                sm("X_train", "init"),
+                sm("sample_0", "sample"),
+                sm("y_train", "init"),
+                sm(("k=10", "e=1e-5"), "modeling"),
+                sm("s_0", "stability"),
+            ): [0.452, 0.322],
+        }
+        out_df_3 = pd.DataFrame(
+            data={
+                "init-sample": ["X_train", "X_train"],
+                "sample": ["sample_0", "sample_0"],
+                "init-modeling": ["y_train", "y_train"],
+                "modeling": [("k=10", "e=1e-3"), ("k=10", "e=1e-5")],
+                "stability": ["s_0", "s_0"],
+                "out": [[0.333, 0.222], [0.452, 0.322]],
+                "out-0": [0.333, 0.452],
+                "out-1": [0.222, 0.322],
+            }
+        )
+        in_dict_4 = {
+            (
+                sm("X_train", "init"),
+                sm("sample_0", "sample"),
+                sm("y_train", "init"),
+                sm(("k=10", "e=1e-3"), "modeling"),
+                sm("s_0", "stability"),
+            ): {"k1": 0.333, "k2": 0.222},
+            (
+                sm("X_train", "init"),
+                sm("sample_0", "sample"),
+                sm("y_train", "init"),
+                sm(("k=10", "e=1e-5"), "modeling"),
+                sm("s_0", "stability"),
+            ): {"k1": 0.452, "k2": 0.322},
+        }
+        out_df_4 = pd.DataFrame(
+            data={
+                "init-sample": ["X_train", "X_train"],
+                "sample": ["sample_0", "sample_0"],
+                "init-modeling": ["y_train", "y_train"],
+                "modeling": [("k=10", "e=1e-3"), ("k=10", "e=1e-5")],
+                "stability": ["s_0", "s_0"],
+                "out": [{"k1": 0.333, "k2": 0.222}, {"k1": 0.452, "k2": 0.322}],
+                "out-k1": [0.333, 0.452],
+                "out-k2": [0.222, 0.322],
+            }
+        )
         assert dict_to_df(in_dict_1).equals(out_df_1)
-        assert dict_to_df(in_dict_2, param_key='modeling').equals(out_df_2)
-        assert dict_to_df(in_dict_3, param_key='out').equals(out_df_3)
-        assert dict_to_df(in_dict_4, param_key='out').equals(out_df_4)
+        assert dict_to_df(in_dict_2, param_key="modeling").equals(out_df_2)
+        assert dict_to_df(in_dict_3, param_key="out").equals(out_df_3)
+        assert dict_to_df(in_dict_4, param_key="out").equals(out_df_4)
 
     def test_perturbation_stats(self):
-        in_dict = {(sm('X_train', 'init'), sm('feat_extract_0', 'feat_extract'),
-                    sm('y_train', 'init'), sm('DT', 'modeling'), sm('feat_imp', 'metrics')): 0.455,
-                   (sm('X_train', 'init'), sm('feat_extract_0', 'feat_extract'),
-                    sm('y_train', 'init'), sm('LR', 'modeling'), sm('feat_imp', 'metrics')): 0.522,
-                   (sm('X_train', 'init'), sm('feat_extract_1', 'feat_extract'),
-                    sm('y_train', 'init'), sm('DT', 'modeling'), sm('feat_imp', 'metrics')): 0.76,
-                   (sm('X_train', 'init'), sm('feat_extract_1', 'feat_extract'),
-                    sm('y_train', 'init'), sm('LR', 'modeling'), sm('feat_imp', 'metrics')): 0.95}
+        in_dict = {
+            (
+                sm("X_train", "init"),
+                sm("feat_extract_0", "feat_extract"),
+                sm("y_train", "init"),
+                sm("DT", "modeling"),
+                sm("feat_imp", "metrics"),
+            ): 0.455,
+            (
+                sm("X_train", "init"),
+                sm("feat_extract_0", "feat_extract"),
+                sm("y_train", "init"),
+                sm("LR", "modeling"),
+                sm("feat_imp", "metrics"),
+            ): 0.522,
+            (
+                sm("X_train", "init"),
+                sm("feat_extract_1", "feat_extract"),
+                sm("y_train", "init"),
+                sm("DT", "modeling"),
+                sm("feat_imp", "metrics"),
+            ): 0.76,
+            (
+                sm("X_train", "init"),
+                sm("feat_extract_1", "feat_extract"),
+                sm("y_train", "init"),
+                sm("LR", "modeling"),
+                sm("feat_imp", "metrics"),
+            ): 0.95,
+        }
         df = dict_to_df(in_dict)
-        stats = perturbation_stats(df, 'feat_extract')
-        cols = ['feat_extract', 'count', 'mean', 'std']
+        stats = perturbation_stats(df, "feat_extract")
+        cols = ["feat_extract", "count", "mean", "std"]
         assert all(c in cols for c in stats.columns)
-        assert round(stats.loc[0]['mean'], 4) == 0.4885
-        assert round(stats.loc[1]['std'], 6) == 0.134350
+        assert round(stats.loc[0]["mean"], 4) == 0.4885
+        assert round(stats.loc[1]["std"], 6) == 0.134350
 
-        in_dict = {(sm('X_train', 'init'), sm('feat_extract_0', 'feat_extract'),
-                    sm('y_train', 'init'), sm('DT', 'modeling'), sm('feat_imp', 'metrics')): [0.6, 0.3, 0.4],
-                   (sm('X_train', 'init'), sm('feat_extract_0', 'feat_extract'),
-                    sm('y_train', 'init'), sm('LR', 'modeling'), sm('feat_imp', 'metrics')): [0.94, 0.33, 0.24],
-                   (sm('X_train', 'init'), sm('feat_extract_1', 'feat_extract'),
-                    sm('y_train', 'init'), sm('DT', 'modeling'), sm('feat_imp', 'metrics')): [0.26, 0.31, 0.47],
-                   (sm('X_train', 'init'), sm('feat_extract_1', 'feat_extract'),
-                    sm('y_train', 'init'), sm('LR', 'modeling'), sm('feat_imp', 'metrics')): [0.76, 0.883, 0.354]}
+        in_dict = {
+            (
+                sm("X_train", "init"),
+                sm("feat_extract_0", "feat_extract"),
+                sm("y_train", "init"),
+                sm("DT", "modeling"),
+                sm("feat_imp", "metrics"),
+            ): [0.6, 0.3, 0.4],
+            (
+                sm("X_train", "init"),
+                sm("feat_extract_0", "feat_extract"),
+                sm("y_train", "init"),
+                sm("LR", "modeling"),
+                sm("feat_imp", "metrics"),
+            ): [0.94, 0.33, 0.24],
+            (
+                sm("X_train", "init"),
+                sm("feat_extract_1", "feat_extract"),
+                sm("y_train", "init"),
+                sm("DT", "modeling"),
+                sm("feat_imp", "metrics"),
+            ): [0.26, 0.31, 0.47],
+            (
+                sm("X_train", "init"),
+                sm("feat_extract_1", "feat_extract"),
+                sm("y_train", "init"),
+                sm("LR", "modeling"),
+                sm("feat_imp", "metrics"),
+            ): [0.76, 0.883, 0.354],
+        }
         df = dict_to_df(in_dict)
-        stats = perturbation_stats(df, 'feat_extract', prefix='o', split=True)
+        stats = perturbation_stats(df, "feat_extract", prefix="o", split=True)
         assert len(stats.columns) == 8
-        assert stats.columns[1] == 'o-count'
-        assert stats.columns[-1] == 'o2-std'
-        assert stats.loc[1]['o2-std'] == 0.0820243866176395
+        assert stats.columns[1] == "o-count"
+        assert stats.columns[-1] == "o2-std"
+        assert stats.loc[1]["o2-std"] == 0.0820243866176395
diff --git a/vflow/__init__.py b/vflow/__init__.py
index 6ad3911..58822ee 100644
--- a/vflow/__init__.py
+++ b/vflow/__init__.py
@@ -1,8 +1,60 @@
 """
 .. include:: ../README.md
 """
-from .vfunc import *
-from .vset import *
-from .pipeline import *
-from .utils import *
-from .helpers import *
+
+from .helpers import (
+    build_vset,
+    cum_acc_by_uncertainty,
+    filter_vset_by_metric,
+    init_args,
+)
+from .pipeline import PCSPipeline, build_graph
+from .subkey import Subkey
+from .utils import (
+    apply_vfuncs,
+    base_dict,
+    combine_dicts,
+    combine_keys,
+    dict_data,
+    dict_keys,
+    dict_to_df,
+    init_step,
+    perturbation_stats,
+    sep_dicts,
+    to_list,
+    to_tuple,
+)
+from .vfunc import AsyncVfunc, Vfunc, VfuncPromise
+from .vset import Vset
+
+__all__ = [
+    # vflow.helpers
+    "init_args",
+    "build_vset",
+    "filter_vset_by_metric",
+    "cum_acc_by_uncertainty",
+    # vflow.pipeline
+    "PCSPipeline",
+    "build_graph",
+    # vflow.subkey
+    "Subkey",
+    # vflow.utils
+    "apply_vfuncs",
+    "base_dict",
+    "combine_dicts",
+    "combine_keys",
+    "dict_data",
+    "dict_keys",
+    "dict_to_df",
+    "init_step",
+    "perturbation_stats",
+    "sep_dicts",
+    "to_list",
+    "to_tuple",
+    # vflow.vfunc
+    "Vfunc",
+    "AsyncVfunc",
+    "VfuncPromise",
+    # vflow.vset
+    "Vset",
+]
diff --git a/vflow/helpers.py b/vflow/helpers.py
index b8c1e70..9364c37 100644
--- a/vflow/helpers.py
+++ b/vflow/helpers.py
@@ -1,5 +1,6 @@
 """User-facing helper functions included at import vflow
 """
+
 from functools import partial
 from itertools import product
 from typing import Union
@@ -7,9 +8,9 @@
 import mlflow
 import numpy as np
 
-from vflow.utils import dict_to_df, dict_keys, dict_data
+from vflow.utils import dict_data, dict_keys, dict_to_df
 from vflow.vfunc import Vfunc
-from vflow.vset import Vset, Subkey, PREV_KEY, FILTER_PREV_KEY
+from vflow.vset import FILTER_PREV_KEY, PREV_KEY, Subkey, Vset
 
 
 def init_args(args_tuple: Union[tuple, list], names=None):
@@ -21,28 +22,40 @@ def init_args(args_tuple: Union[tuple, list], names=None):
         given names for each of the arguments in the tuple
     """
     if names is None:
-        names = ['start'] * len(args_tuple)
+        names = ["start"] * len(args_tuple)
     else:
-        assert len(names) == len(args_tuple), 'names should be same length as args_tuple'
+        assert len(names) == len(
+            args_tuple
+        ), "names should be same length as args_tuple"
     output_dicts = []
     for i, _ in enumerate(args_tuple):
-        output_dicts.append({
-            (Subkey(names[i], 'init'),): args_tuple[i],
-            PREV_KEY: ('init',),
-        })
+        output_dicts.append(
+            {
+                (Subkey(names[i], "init"),): args_tuple[i],
+                PREV_KEY: ("init",),
+            }
+        )
     return output_dicts
 
 
-def build_vset(name: str, func, param_dict=None, reps: int = 1,
-               is_async: bool = False, output_matching: bool = False,
-               lazy: bool = False, cache_dir: str = None,
-               tracking_dir: str = None, **kwargs) -> Vset:
+def build_vset(
+    name: str,
+    func,
+    param_dict=None,
+    reps: int = 1,
+    is_async: bool = False,
+    output_matching: bool = False,
+    lazy: bool = False,
+    cache_dir: str = None,
+    tracking_dir: str = None,
+    **kwargs,
+) -> Vset:
     """Builds a new Vset by currying or instantiating callable `func` with all
     combinations of parameters in `param_dict` and optional additional `**kwargs`.
-    If `func` and `param_dict` are lists, then the ith entry of `func` will be 
-    curried with ith entry of `param_dict`. If only one of `func` or `param_dict` 
+    If `func` and `param_dict` are lists, then the ith entry of `func` will be
+    curried with ith entry of `param_dict`. If only one of `func` or `param_dict`
     is a list, the same `func`/`param_dict` will be curried for all entries in the
-    list. Vfuncs are named with `param_dict` items as tuples of 
+    list. Vfuncs are named with `param_dict` items as tuples of
     str("param_name=param_val").
 
     Parameters
@@ -53,7 +66,7 @@ def build_vset(name: str, func, param_dict=None, reps: int = 1,
         A callable to use as the base for Vfuncs in the output Vset. Can also be
         a class object, in which case the class is immediately instantiated with
         the parameter combinations from `param_dict`. Can also be a list of
-        callables, where the ith entry corresponds to `param_dict` or the ith 
+        callables, where the ith entry corresponds to `param_dict` or the ith
         entry of `param_dict` (if `param_dict` is a list).
     param_dict : dict[str, list] or list[dict[str, list]], optional
         A dict with string keys corresponding to argument names of `func` and
@@ -88,8 +101,9 @@ def build_vset(name: str, func, param_dict=None, reps: int = 1,
     pd_list = []
     if isinstance(func, list):
         if isinstance(param_dict, list):
-            assert len(param_dict) == len(func), \
-                'list of param_dicts must be same length as list of funcs'
+            assert len(param_dict) == len(
+                func
+            ), "list of param_dicts must be same length as list of funcs"
             f_list.extend(func)
             pd_list.extend(param_dict)
         else:
@@ -101,29 +115,29 @@ def build_vset(name: str, func, param_dict=None, reps: int = 1,
     else:
         f_list.append(func)
         pd_list.append(param_dict)
-    
+
     vfuncs = []
     vkeys = []
 
     for f, pd in zip(f_list, pd_list):
         if pd is None:
             pd = {}
-        assert callable(f), 'func must be callable'
-        
+        assert callable(f), "func must be callable"
+
         kwargs_tuples = product(*list(pd.values()))
         for tup in kwargs_tuples:
             kwargs_dict = {}
-            vkey_tup = (f'func={f.__name__}', )
+            vkey_tup = (f"func={f.__name__}",)
             for param_name, param_val in zip(list(pd.keys()), tup):
                 kwargs_dict[param_name] = param_val
-                vkey_tup += (f'{param_name}={param_val}', )
+                vkey_tup += (f"{param_name}={param_val}",)
             # add additional fixed kwargs to kwargs_dict
             for k, v in kwargs.items():
                 kwargs_dict[k] = v
             for i in range(reps):
                 # add vfunc key to vkeys
                 if reps > 1:
-                    vkeys.append((f'rep={i}', ) + vkey_tup)
+                    vkeys.append((f"rep={i}",) + vkey_tup)
                 else:
                     vkeys.append(vkey_tup)
                 # check if func is a class
@@ -132,18 +146,33 @@ def build_vset(name: str, func, param_dict=None, reps: int = 1,
                     vfuncs.append(Vfunc(vfunc=f(**kwargs_dict), name=str(vkey_tup)))
                 else:
                     # use partial to wrap func
-                    vfuncs.append(Vfunc(vfunc=partial(f, **kwargs_dict), name=str(vkey_tup)))
+                    vfuncs.append(
+                        Vfunc(vfunc=partial(f, **kwargs_dict), name=str(vkey_tup))
+                    )
     if all(pd is None for pd in pd_list) and reps == 1:
         vkeys = None
-    
-    return Vset(name, vfuncs, is_async=is_async, vfunc_keys=vkeys,
-                output_matching=output_matching, lazy=lazy,
-                cache_dir=cache_dir, tracking_dir=tracking_dir)
+
+    return Vset(
+        name,
+        vfuncs,
+        is_async=is_async,
+        vfunc_keys=vkeys,
+        output_matching=output_matching,
+        lazy=lazy,
+        cache_dir=cache_dir,
+        tracking_dir=tracking_dir,
+    )
 
 
-def filter_vset_by_metric(metric_dict: dict, vset: Vset, *vsets: Vset, n_keep: int = 1,
-                          bigger_is_better: bool = True, filter_on=None,
-                          group: bool = False) -> Union[Vset, list]:
+def filter_vset_by_metric(
+    metric_dict: dict,
+    vset: Vset,
+    *vsets: Vset,
+    n_keep: int = 1,
+    bigger_is_better: bool = True,
+    filter_on=None,
+    group: bool = False,
+) -> Union[Vset, list]:
     """Returns a new Vset by filtering `vset.vfuncs` based on values in filter_dict.
 
     Parameters
@@ -178,28 +207,45 @@ def filter_vset_by_metric(metric_dict: dict, vset: Vset, *vsets: Vset, n_keep: i
     vset_names = []
     for vset_i in vsets:
         if vset_i.name not in df.columns:
-            raise ValueError((f'{vset_i.name} should be one '
-                              'of the columns of dict_to_df(metric_dict)'))
+            raise ValueError(
+                (
+                    f"{vset_i.name} should be one "
+                    "of the columns of dict_to_df(metric_dict)"
+                )
+            )
         vset_names.append(vset_i.name)
     if len(filter_on) > 0:
         filter_col = list(metric_dict.keys())[0][-1].origin
         df = df[df[filter_col].isin(filter_on)]
     if group:
-        df = df.groupby(by=vset_names, as_index=False).mean()
+        df = df.groupby(by=vset_names, as_index=False).mean(numeric_only=True)
     if bigger_is_better:
-        df = df.sort_values(by='out', ascending=False)
+        df = df.sort_values(by="out", ascending=False)
     else:
-        df = df.sort_values(by='out')
+        df = df.sort_values(by="out")
     df = df.iloc[0:n_keep]
     for i, vset_i in enumerate(vsets):
         vfuncs = vset_i.vfuncs
         vfunc_filter = [str(name) for name in df[vset_i.name].to_numpy()]
         new_vfuncs = {k: v for k, v in vfuncs.items() if str(v.name) in vfunc_filter}
         tracking_dir = None if vset_i._mlflow is None else mlflow.get_tracking_uri()
-        new_vset = Vset('filtered_' + vset_i.name, new_vfuncs, is_async=vset_i._async,
-                        output_matching=vset_i._output_matching, lazy=vset_i._lazy,
-                        cache_dir=vset_i._cache_dir, tracking_dir=tracking_dir)
-        setattr(new_vset, FILTER_PREV_KEY, (metric_dict[PREV_KEY], vset_i,))
+        new_vset = Vset(
+            "filtered_" + vset_i.name,
+            new_vfuncs,
+            is_async=vset_i._async,
+            output_matching=vset_i._output_matching,
+            lazy=vset_i._lazy,
+            cache_dir=vset_i._cache_dir,
+            tracking_dir=tracking_dir,
+        )
+        setattr(
+            new_vset,
+            FILTER_PREV_KEY,
+            (
+                metric_dict[PREV_KEY],
+                vset_i,
+            ),
+        )
         setattr(new_vset, PREV_KEY, getattr(new_vset, FILTER_PREV_KEY))
         vsets[i] = new_vset
     if len(vsets) == 1:
@@ -221,20 +267,26 @@ def cum_acc_by_uncertainty(mean_preds, std_preds, true_labels):
 
     TODO: generalize to multi-class classification
     """
-    assert dict_keys(mean_preds) == dict_keys(std_preds), \
-        "mean_preds and std_preds must share the same keys"
+    assert dict_keys(mean_preds) == dict_keys(
+        std_preds
+    ), "mean_preds and std_preds must share the same keys"
     # match predictions on keys
-    paired_preds = [[d[k] for d in (mean_preds, std_preds)] for k in dict_keys(mean_preds)]
-    mean_preds, std_preds = (np.array(p)[:,:,1] for p in zip(*paired_preds))
+    paired_preds = [
+        [d[k] for d in (mean_preds, std_preds)] for k in dict_keys(mean_preds)
+    ]
+    mean_preds, std_preds = (np.array(p)[:, :, 1] for p in zip(*paired_preds))
     if isinstance(true_labels, dict):
         true_labels = dict_data(true_labels)
-        assert len(true_labels) == 1, 'true_labels should have a single 1D vector entry'
+        assert len(true_labels) == 1, "true_labels should have a single 1D vector entry"
         true_labels = true_labels[0]
     n_obs = len(mean_preds[0])
-    assert len(true_labels) == n_obs, \
-        f'true_labels has {len(true_labels)} obs. but should have same as predictions ({n_obs})'
+    assert (
+        len(true_labels) == n_obs
+    ), f"true_labels has {len(true_labels)} obs. but should have same as predictions ({n_obs})"
     sorted_idx = np.argsort(std_preds, axis=1)
-    correct_labels = np.take_along_axis(np.around(mean_preds) - true_labels == 0, sorted_idx, 1)
+    correct_labels = np.take_along_axis(
+        np.around(mean_preds) - true_labels == 0, sorted_idx, 1
+    )
     uncertainty = np.take_along_axis(std_preds, sorted_idx, 1)
-    cum_acc = np.cumsum(correct_labels, axis=1) / range(1, n_obs+1)
+    cum_acc = np.cumsum(correct_labels, axis=1) / range(1, n_obs + 1)
     return uncertainty, cum_acc, sorted_idx
diff --git a/vflow/pipeline.py b/vflow/pipeline.py
index 352fddf..04d6632 100644
--- a/vflow/pipeline.py
+++ b/vflow/pipeline.py
@@ -1,5 +1,6 @@
 """Class that stores the entire pipeline of steps in a data-science workflow
 """
+
 import itertools
 
 import joblib
@@ -27,21 +28,19 @@ def __init__(self, steps=None, cache_dir=None):
         self.memory = joblib.Memory(location=cache_dir)
 
     def run(self, *args, **kwargs):
-        """Runs the pipeline
-        """
+        """Runs the pipeline"""
         run_step_cached = self.memory.cache(_run_step)
         for i, step in enumerate(self.steps):
             try:
                 step_name = step.name
             except AttributeError:
-                step_name = f'Step {i}'
+                step_name = f"Step {i}"
             print(step_name)
             _, fitted_step = run_step_cached(step, *args, **kwargs)
             self.steps[i] = fitted_step
 
     def __getitem__(self, i):
-        """Accesses ith step of pipeline
-        """
+        """Accesses ith step of pipeline"""
         return self.steps[i]
 
     def __len__(self):
@@ -51,13 +50,13 @@ def generate_names(self, as_pandas=True):
         name_lists = []
         if as_pandas:
             for step in self.steps:
-                name_lists.append([f'{i}_{str(mod)[:8]}'
-                                   for i, mod in enumerate(step)])
+                name_lists.append([f"{i}_{str(mod)[:8]}" for i, mod in enumerate(step)])
             indexes = list(itertools.product(*name_lists))
             return pd.DataFrame(indexes, columns=[step.name for step in self.steps])
         for step in self.steps:
-            name_lists.append([f'{step.name}_{i}_{str(mod)[:8]}'
-                               for i, mod in enumerate(step)])
+            name_lists.append(
+                [f"{step.name}_{i}_{str(mod)[:8]}" for i, mod in enumerate(step)]
+            )
         return list(itertools.product(*name_lists))
 
 
@@ -85,7 +84,7 @@ def unnest_node(node):
         unnested_node: str, Vset, or None
         """
         node_type = type(node)
-        if node_type is str or 'Vset' in str(node_type):
+        if node_type is str or "Vset" in str(node_type):
             return node
         if node_type is tuple:
             return unnest_node(node[0])
@@ -109,7 +108,7 @@ def build_graph_recur(node, G):
 
         # initial case: starting at dict
         if isinstance(node, dict):
-            s_node = 'End'
+            s_node = "End"
             nodes_prev = node[PREV_KEY]
             G.add_edge(nodes_prev[0], s_node)
             for node_prev in nodes_prev[1:]:
@@ -118,7 +117,7 @@ def build_graph_recur(node, G):
             return G
 
         # main case: at a vfuncset
-        if 'Vset' in str(type(node)):
+        if "Vset" in str(type(node)):
             if hasattr(node, PREV_KEY):
                 nodes_prev = getattr(node, PREV_KEY)
                 for node_prev in nodes_prev:
@@ -140,7 +139,7 @@ def build_graph_recur(node, G):
     G = nx.DiGraph()
     G = build_graph_recur(node, G)
     if draw:
-        nx.draw(G, with_labels=True, node_color='#CCCCCC')
+        nx.draw(G, with_labels=True, node_color="#CCCCCC")
     return G
 
 
diff --git a/vflow/subkey.py b/vflow/subkey.py
index bc91a49..9467d35 100644
--- a/vflow/subkey.py
+++ b/vflow/subkey.py
@@ -1,7 +1,8 @@
 """Defines a parameter from some origin Vset
 """
-class Subkey:
 
+
+class Subkey:
     def __init__(self, value, origin: str, output_matching: bool = False):
         """
         Parameters
@@ -21,16 +22,16 @@ def __init__(self, value, origin: str, output_matching: bool = False):
         self.sep_dicts_id = None
 
     def is_matching(self):
-        """Checks if subkey should be matched in other Vsets
-        """
+        """Checks if subkey should be matched in other Vsets"""
         return self.output_matching or self.sep_dicts_id is not None
 
     def matches_sep_dict_id(self, other: object):
-        """Helper to match Subkey by _sep_dict_id
-        """
+        """Helper to match Subkey by _sep_dict_id"""
         if isinstance(other, self.__class__):
-            return self.sep_dicts_id is not None \
-                   and self.sep_dicts_id == other.sep_dicts_id
+            return (
+                self.sep_dicts_id is not None
+                and self.sep_dicts_id == other.sep_dicts_id
+            )
         return False
 
     def matches(self, other: object):
@@ -44,8 +45,9 @@ def matches(self, other: object):
             # value and origins match
             cond1 = self.value == other.value and self.origin == other.origin
             # sep_dicts_id matches
-            cond2 = self.sep_dicts_id == other.sep_dicts_id or \
-                    (self.output_matching and other.output_matching)
+            cond2 = self.sep_dicts_id == other.sep_dicts_id or (
+                self.output_matching and other.output_matching
+            )
             return cond0 and cond1 and cond2
         return False
 
@@ -68,8 +70,7 @@ def mismatches(self, other: object):
         return True
 
     def __eq__(self, other: object):
-        """Mainly used for testing purposes.
-        """
+        """Mainly used for testing purposes."""
         if isinstance(other, self.__class__):
             # value and origins match
             return self.value == other.value and self.origin == other.origin
@@ -79,6 +80,5 @@ def __repr__(self):
         return str(self.value)
 
     def __hash__(self):
-        """Mainly used for testing purposes.
-        """
+        """Mainly used for testing purposes."""
         return hash(self.value) ^ hash(self.origin) ^ hash(self.output_matching)
diff --git a/vflow/utils.py b/vflow/utils.py
index 606b68c..3c425cd 100644
--- a/vflow/utils.py
+++ b/vflow/utils.py
@@ -1,27 +1,19 @@
 """Useful functions for converting between different types (dicts, lists, tuples, etc.)
 """
+
 from copy import deepcopy
 from typing import Union
 from uuid import uuid4
 
 import numpy as np
 import pandas as pd
-
 import ray
 from ray.remote_function import RemoteFunction as RayRemoteFun
 
 from vflow.subkey import Subkey
 from vflow.vfunc import VfuncPromise
 
-
-PREV_KEY = '__prev__'
-
-def s(x):
-    """Gets shape of a list/tuple/ndarray
-    """
-    if type(x) in [list, tuple]:
-        return len(x)
-    return x.shape
+PREV_KEY = "__prev__"
 
 
 def init_step(idx, cols):
@@ -36,26 +28,23 @@ def init_step(idx, cols):
         List of column names.
     """
     for i in range(idx, len(cols)):
-        if cols[i] != 'init':
-            return 'init-' + cols[i]
+        if cols[i] != "init":
+            return "init-" + cols[i]
     return None
 
 
 def base_dict(d: dict):
-    """Remove PREV_KEY from dict d if present
-    """
-    return {k:v for k,v in d.items() if k != PREV_KEY}
+    """Remove PREV_KEY from dict d if present"""
+    return {k: v for k, v in d.items() if k != PREV_KEY}
 
 
 def dict_data(d: dict):
-    """Returns a list containing all data in dict d
-    """
+    """Returns a list containing all data in dict d"""
     return list(base_dict(d).values())
 
 
 def dict_keys(d: dict):
-    """Returns a list containing all keys in dict d
-    """
+    """Returns a list containing all keys in dict d"""
     return list(base_dict(d).keys())
 
 
@@ -129,10 +118,12 @@ def to_list(tup: tuple):
         if n_tup == 1:
             return [list(tup)]
         if n_tup % 2 != 0:
-            raise ValueError('Don\'t know how to handle uneven number of args '
-                             'without a list. Please wrap your args in a list.')
+            raise ValueError(
+                "Don't know how to handle uneven number of args "
+                "without a list. Please wrap your args in a list."
+            )
         # assume first half of args is input and second half is outcome
-        return [list(el) for el in zip(tup[:(n_tup // 2)], tup[(n_tup // 2):])]
+        return [list(el) for el in zip(tup[: (n_tup // 2)], tup[(n_tup // 2) :])]
     if n_tup == 1:
         return [[x] for x in tup[0]]
     n_mods = len(tup[0])
@@ -170,7 +161,7 @@ def sep_dicts(d: dict, n_out: int = 1, keys=None):
     if keys is None:
         keys = []
     if len(keys) > 0 and len(keys) != n_out:
-        raise ValueError(f'keys should be empty or have length n_out={n_out}')
+        raise ValueError(f"keys should be empty or have length n_out={n_out}")
     # empty dict -- return empty dict
     if n_out <= 1:
         return d
@@ -184,7 +175,9 @@ def sep_dicts(d: dict, n_out: int = 1, keys=None):
                 if len(keys) == 0:
                     new_key = (key[i],) + key[n_out:]
                 else:
-                    new_sub = Subkey(value=keys[i], origin=key[-1].origin + '-' + str(i))
+                    new_sub = Subkey(
+                        value=keys[i], origin=key[-1].origin + "-" + str(i)
+                    )
                     new_key = (new_sub,) + key
                 new_key[-1].sep_dicts_id = sep_dicts_id
                 if isinstance(value, VfuncPromise):
@@ -197,6 +190,7 @@ def sep_dicts(d: dict, n_out: int = 1, keys=None):
 
     return sep_dicts_list
 
+
 def dict_to_df(d: dict, param_key=None):
     """Converts a dictionary with tuple keys
     into a pandas DataFrame, optionally seperating
@@ -219,31 +213,47 @@ def dict_to_df(d: dict, param_key=None):
     if len(d_copy.keys()) > 0:
         key_list = list(d.keys())
         subkey_list = key_list[0] if key_list[0] != PREV_KEY else key_list[1]
-        cols = [sk.origin for sk in subkey_list] + ['out']
+        cols = [sk.origin for sk in subkey_list] + ["out"]
         # set each init col to init-{next_vfunc_set}
-        cols = [c if c != 'init' else init_step(idx, cols) for idx, c in enumerate(cols)]
-        df.set_axis(cols, axis=1, inplace=True)
+        cols = [
+            c if c != "init" else init_step(idx, cols) for idx, c in enumerate(cols)
+        ]
+        df = df.set_axis(cols, axis=1)
         if param_key:
-            param_keys = df[param_key].tolist() # pylint: disable=unsubscriptable-object
-            if param_key == 'out' and hasattr(param_keys[0], '__iter__'):
+            param_keys = df[
+                param_key
+            ].tolist()  # pylint: disable=unsubscriptable-object
+            if param_key == "out" and hasattr(param_keys[0], "__iter__"):
                 param_df = pd.DataFrame(param_keys)
-                param_df.columns = [f'{param_key}-{col}' for col in param_df.columns]
+                param_df.columns = [f"{param_key}-{col}" for col in param_df.columns]
                 df = df.join(param_df)
             else:
                 param_loc = df.columns.get_loc(param_key)
-                param_key_cols = [f"{p.split('=')[0]}-{param_key}" for p in param_keys[0]]
-                param_keys = [[s.split('=')[1] for s in t] for t in param_keys]
+                param_key_cols = [
+                    f"{p.split('=')[0]}-{param_key}" for p in param_keys[0]
+                ]
+                param_keys = [[s.split("=")[1] for s in t] for t in param_keys]
                 df = df.join(pd.DataFrame(param_keys)).drop(columns=param_key)
-                new_cols = df.columns[:len(cols)-1].tolist() + param_key_cols
-                df.set_axis(new_cols, axis=1, inplace=True)
+                new_cols = df.columns[: len(cols) - 1].tolist() + param_key_cols
+                df = df.set_axis(new_cols, axis=1)
                 new_idx = list(range(len(new_cols)))
-                new_idx = new_idx[:param_loc] + new_idx[len(cols)-1:] + new_idx[param_loc:len(cols)-1]
+                new_idx = (
+                    new_idx[:param_loc]
+                    + new_idx[len(cols) - 1 :]
+                    + new_idx[param_loc : len(cols) - 1]
+                )
                 df = df.iloc[:, new_idx]
     return df
 
 
-def perturbation_stats(data: Union[pd.DataFrame, dict], *group_by: str, wrt: str = 'out',
-                       func=None, prefix: str = None, split: bool = False):
+def perturbation_stats(
+    data: Union[pd.DataFrame, dict],
+    *group_by: str,
+    wrt: str = "out",
+    func=None,
+    prefix: str = None,
+    split: bool = False,
+):
     """Compute statistics for `wrt` in `data`, conditional on `group_by`
 
     Parameters
@@ -275,7 +285,7 @@ def perturbation_stats(data: Union[pd.DataFrame, dict], *group_by: str, wrt: str
         A DataFrame with summary statistics on `wrt`.
     """
     if func is None:
-        func = ['count', 'mean', 'std']
+        func = ["count", "mean", "std"]
     if prefix is None:
         prefix = wrt
     if isinstance(data, dict):
@@ -287,36 +297,45 @@ def perturbation_stats(data: Union[pd.DataFrame, dict], *group_by: str, wrt: str
         gb = df.groupby(group_by)[wrt]
     else:
         gb = df.groupby(lambda x: True)[wrt]
-    if (isinstance(func, list) and 'mean' in func or 'std' in func) and \
-       (type(df[wrt].iloc[0]) in [list, np.ndarray]):
-        wrt_arrays = [np.stack(d.tolist()) for d in (gb.get_group(grp) for grp in gb.groups)]
+    if (isinstance(func, list) and "mean" in func or "std" in func) and (
+        type(df[wrt].iloc[0]) in [list, np.ndarray]
+    ):
+        wrt_arrays = [
+            np.stack(d.tolist()) for d in (gb.get_group(grp) for grp in gb.groups)
+        ]
         n_cols = wrt_arrays[0].shape[1]
-        df_out = pd.DataFrame(gb.agg('count'))
-        df_out.columns = [f'{prefix}-count']
-        if 'mean' in func:
+        df_out = pd.DataFrame(gb.agg("count"))
+        df_out.columns = [f"{prefix}-count"]
+        if "mean" in func:
             if split:
                 col_means = [arr.mean(axis=0) for arr in wrt_arrays]
-                wrt_means = pd.DataFrame(col_means,
-                                         columns=[f'{prefix}{i}-mean' for i in range(n_cols)],
-                                         index=gb.groups.keys())
+                wrt_means = pd.DataFrame(
+                    col_means,
+                    columns=[f"{prefix}{i}-mean" for i in range(n_cols)],
+                    index=gb.groups.keys(),
+                )
             else:
-                col_means = [{f'{prefix}-mean': arr.mean(axis=0)} for arr in wrt_arrays]
+                col_means = [{f"{prefix}-mean": arr.mean(axis=0)} for arr in wrt_arrays]
                 wrt_means = pd.DataFrame(col_means, index=gb.groups.keys())
             wrt_means.index.names = df_out.index.names
             df_out = df_out.join(wrt_means)
-        if 'std' in func:
+        if "std" in func:
             if split:
                 col_stds = [arr.std(axis=0, ddof=1) for arr in wrt_arrays]
-                wrt_stds = pd.DataFrame(col_stds,
-                                        columns=[f'{prefix}{i}-std' for i in range(n_cols)],
-                                        index=gb.groups.keys())
+                wrt_stds = pd.DataFrame(
+                    col_stds,
+                    columns=[f"{prefix}{i}-std" for i in range(n_cols)],
+                    index=gb.groups.keys(),
+                )
             else:
-                col_stds = [{f'{prefix}-std': arr.std(axis=0, ddof=1)} for arr in wrt_arrays]
+                col_stds = [
+                    {f"{prefix}-std": arr.std(axis=0, ddof=1)} for arr in wrt_arrays
+                ]
                 wrt_stds = pd.DataFrame(col_stds, index=gb.groups.keys())
             wrt_stds.index.names = df_out.index.names
             df_out = df_out.join(wrt_stds)
-        if 'count' not in func:
-            df_out = df_out.drop(f'{prefix}-count')
+        if "count" not in func:
+            df_out = df_out.drop(f"{prefix}-count")
     else:
         df_out = gb.agg(func)
     df_out = df_out.reindex(sorted(df_out.columns), axis=1)
@@ -367,7 +386,9 @@ def combine_keys(left_key, right_key):
                     return ()
         if len(matched_subkeys) > 0:
             # always filter on right key
-            filtered_key = tuple(subkey for subkey in right_key if subkey not in matched_subkeys)
+            filtered_key = tuple(
+                subkey for subkey in right_key if subkey not in matched_subkeys
+            )
             combined_key = left_key + filtered_key
             return combined_key
         return left_key + right_key
@@ -404,7 +425,6 @@ def combine_dicts(*args: dict, base_case=True):
     if n_args == 2:
         for k0 in args[0]:
             for k1 in args[1]:
-
                 if PREV_KEY in (k0, k1):
                     continue
 
@@ -421,7 +441,7 @@ def combine_dicts(*args: dict, base_case=True):
     return combine_dicts(combine_dicts(args[0], args[1]), *args[2:], base_case=False)
 
 
-def apply_vfuncs(vfuncs: dict, data_dict: dict, lazy: bool=False):
+def apply_vfuncs(vfuncs: dict, data_dict: dict, lazy: bool = False):
     """Apply a dictionary of functions `vfuncs` to each item of `data_dict`,
     optionally returning a dictionary of `vflow.vfunc.VfuncPromise` objects if `lazy` is True
 
@@ -469,7 +489,9 @@ def apply_vfuncs(vfuncs: dict, data_dict: dict, lazy: bool=False):
                 for i, data in enumerate(data_list):
                     if isinstance(data, VfuncPromise):
                         data_list[i] = data()
-                    if isinstance(func, RayRemoteFun) and not isinstance(data_list[i], ray.ObjectRef):
+                    if isinstance(func, RayRemoteFun) and not isinstance(
+                        data_list[i], ray.ObjectRef
+                    ):
                         # send data to Ray's remote object store
                         data_list[i] = ray.put(data_list[i])
                     elif isinstance(data_list[i], ray.ObjectRef):
diff --git a/vflow/vfunc.py b/vflow/vfunc.py
index 507b26a..42950fc 100644
--- a/vflow/vfunc.py
+++ b/vflow/vfunc.py
@@ -10,29 +10,27 @@ class Vfunc:
     If none of these is supported, it need only be a function
     """
 
-    def __init__(self, name: str = '', vfunc=lambda x: x):
-        assert hasattr(vfunc, 'fit') or callable(vfunc), \
-            'vfunc must be an object with a fit method or a callable'
+    def __init__(self, name: str = "", vfunc=lambda x: x):
+        assert hasattr(vfunc, "fit") or callable(
+            vfunc
+        ), "vfunc must be an object with a fit method or a callable"
         self.name = name
         self.vfunc = vfunc
 
     def fit(self, *args, **kwargs):
-        """This function fits params for this vfunc
-        """
-        if hasattr(self.vfunc, 'fit'):
+        """This function fits params for this vfunc"""
+        if hasattr(self.vfunc, "fit"):
             return self.vfunc.fit(*args, **kwargs)
         return self.vfunc(*args, **kwargs)
 
     def transform(self, *args, **kwargs):
-        """This function transforms its input in some way
-        """
-        if hasattr(self.vfunc, 'transform'):
+        """This function transforms its input in some way"""
+        if hasattr(self.vfunc, "transform"):
             return self.vfunc.transform(*args, **kwargs)
         return self.vfunc(*args, **kwargs)
 
     def __call__(self, *args, **kwargs):
-        """This should decide what to call
-        """
+        """This should decide what to call"""
         return self.fit(*args, **kwargs)
 
 
@@ -42,41 +40,37 @@ def _remote_fun(vfunc, *args, **kwargs):
 
 
 class AsyncVfunc:
-    """An asynchronous version of the Vfunc class.
-    """
+    """An asynchronous version of the Vfunc class."""
 
-    def __init__(self, name: str = '', vfunc=lambda x: x):
+    def __init__(self, name: str = "", vfunc=lambda x: x):
         self.name = name
         if isinstance(vfunc, Vfunc):
             self.vfunc = vfunc.vfunc
         else:
-            assert hasattr(vfunc, 'fit') or callable(vfunc), \
-                'vfunc must be an object with a fit method or a callable'
+            assert hasattr(vfunc, "fit") or callable(
+                vfunc
+            ), "vfunc must be an object with a fit method or a callable"
             self.vfunc = vfunc
 
     def fit(self, *args, **kwargs):
-        """This function fits params for this vfunc
-        """
-        if hasattr(self.vfunc, 'fit'):
+        """This function fits params for this vfunc"""
+        if hasattr(self.vfunc, "fit"):
             return _remote_fun.remote(self.vfunc.fit, *args, **kwargs)
         return _remote_fun.remote(self.vfunc, *args, **kwargs)
 
     def transform(self, *args, **kwargs):
-        """This function transforms its input in some way
-        """
-        if hasattr(self.vfunc, 'transform'):
+        """This function transforms its input in some way"""
+        if hasattr(self.vfunc, "transform"):
             return _remote_fun.remote(self.vfunc.transform, *args, **kwargs)
         return _remote_fun.remote(self.vfunc, *args, **kwargs)
 
     def __call__(self, *args, **kwargs):
-        """This should decide what to call
-        """
+        """This should decide what to call"""
         return self.fit(*args, **kwargs)
 
 
 class VfuncPromise:
-    """A Vfunc promise used for lazy evaluation.
-    """
+    """A Vfunc promise used for lazy evaluation."""
 
     def __init__(self, vfunc: callable, *args):
         self.vfunc = vfunc
@@ -85,8 +79,7 @@ def __init__(self, vfunc: callable, *args):
         self.value = None
 
     def __call__(self):
-        """This should decide what to call
-        """
+        """This should decide what to call"""
         if self.called:
             return self.value
         tmp_args = []
@@ -106,21 +99,18 @@ def _get_value(self):
         return self.value
 
     def transform(self, *args):
-        """This function transforms its input in some way
-        """
+        """This function transforms its input in some way"""
         return self._get_value().transform(*args)
 
     def predict(self, *args):
-        """This function calls predict on its inputs
-        """
+        """This function calls predict on its inputs"""
         return self._get_value().predict(*args)
 
     def predict_proba(self, *args):
-        """This function calls predict_proba on its inputs
-        """
+        """This function calls predict_proba on its inputs"""
         return self._get_value().predict_proba(*args)
 
     def __repr__(self):
         if self.called:
-            return f'Fulfilled VfuncPromise({self.value})'
-        return f'Unfulfilled VfuncPromise(func={self.vfunc}, args={self.args})'
+            return f"Fulfilled VfuncPromise({self.value})"
+        return f"Unfulfilled VfuncPromise(func={self.vfunc}, args={self.args})"
diff --git a/vflow/vset.py b/vflow/vset.py
index bf3ed91..7e8884e 100644
--- a/vflow/vset.py
+++ b/vflow/vset.py
@@ -1,29 +1,40 @@
 """Set of vfuncs to be parallelized over in a pipeline.
 Function arguments are each a list
 """
+
 from copy import deepcopy
 
-import numpy as np
 import joblib
+import numpy as np
 import ray
-
 from mlflow.tracking import MlflowClient
 
 from vflow.subkey import Subkey
-from vflow.utils import apply_vfuncs, combine_dicts, dict_to_df, perturbation_stats, sep_dicts, \
-    PREV_KEY
-from vflow.vfunc import Vfunc, AsyncVfunc
-
+from vflow.utils import (
+    PREV_KEY,
+    apply_vfuncs,
+    combine_dicts,
+    dict_to_df,
+    perturbation_stats,
+    sep_dicts,
+)
+from vflow.vfunc import AsyncVfunc, Vfunc
 
-FILTER_PREV_KEY = '__filter_prev__'
+FILTER_PREV_KEY = "__filter_prev__"
 
 
 class Vset:
-
-    def __init__(self, name: str, vfuncs, vfunc_keys: list = None,
-                 is_async: bool = False, output_matching: bool = False,
-                 lazy: bool = False, cache_dir: str = None,
-                 tracking_dir: str = None):
+    def __init__(
+        self,
+        name: str,
+        vfuncs,
+        vfunc_keys: list = None,
+        is_async: bool = False,
+        output_matching: bool = False,
+        lazy: bool = False,
+        cache_dir: str = None,
+        tracking_dir: str = None,
+    ):
         """
         Parameters
         ----------
@@ -73,13 +84,18 @@ def __init__(self, name: str, vfuncs, vfunc_keys: list = None,
             self.vfuncs = vfuncs
         elif isinstance(vfuncs, list):
             if vfunc_keys is not None:
-                assert isinstance(vfunc_keys, list), 'vfuncs passed as list but vfunc_keys is not a list'
+                assert isinstance(
+                    vfunc_keys, list
+                ), "vfuncs passed as list but vfunc_keys is not a list"
                 assert len(vfuncs) == len(
-                    vfunc_keys), 'vfuncs list and vfunc_keys list do not have the same length'
+                    vfunc_keys
+                ), "vfuncs list and vfunc_keys list do not have the same length"
                 # TODO: how best to handle tuple subkeys?
                 vfunc_keys = [(self.__create_subkey(k),) for k in vfunc_keys]
             else:
-                vfunc_keys = [(self.__create_subkey(f'{name}_{i}'),) for i in range(len(vfuncs))]
+                vfunc_keys = [
+                    (self.__create_subkey(f"{name}_{i}"),) for i in range(len(vfuncs))
+                ]
             # convert vfunc keys to singleton tuples
             self.vfuncs = dict(zip(vfunc_keys, vfuncs))
         # if needed, wrap the vfuncs in the Vfunc or AsyncVfunc class
@@ -135,9 +151,7 @@ def _apply_func(self, *args, out_dict: dict = None):
                     continue
                 origins = np.array([subk.origin for subk in k])
                 # ignore init origins and the last origin (this Vset)
-                param_idx = [
-                    i for i in range(len(k[:-1])) if origins[i] != 'init'
-                ]
+                param_idx = [i for i in range(len(k[:-1])) if origins[i] != "init"]
                 # get or create mlflow run
                 run_dict_key = tuple(subk.value for subk in k[:-1])
                 if run_dict_key in run_dict:
@@ -154,15 +168,12 @@ def _apply_func(self, *args, out_dict: dict = None):
                         if np.sum(origins == param_name) > 1:
                             occurence = np.sum(origins[:idx] == param_name)
                             param_name = param_name + str(occurence)
-                            self._mlflow.log_param(
-                                run_id, param_name, subkey.value
-                            )
+                            self._mlflow.log_param(run_id, param_name, subkey.value)
                 self._mlflow.log_metric(run_id, k[-1].value, v)
         return out_dict
 
     def fit(self, *args):
-        """Fits to args using `_apply_func`
-        """
+        """Fits to args using `_apply_func`"""
         out_dict = {}
         for k, v in self.vfuncs.items():
             out_dict[k] = v.fit
@@ -175,43 +186,45 @@ def fit(self, *args):
         return self
 
     def fit_transform(self, *args):
-        """Fits to args and transforms only the first arg.
-        """
+        """Fits to args and transforms only the first arg."""
         return self.fit(*args).transform(args[0])
 
     def transform(self, *args):
-        """Transforms args using `_apply_func`
-        """
+        """Transforms args using `_apply_func`"""
         if not self._fitted:
-            raise AttributeError('Please fit the Vset object before calling the transform method.')
+            raise AttributeError(
+                "Please fit the Vset object before calling the transform method."
+            )
         out_dict = {}
         for k, v in self.fitted_vfuncs.items():
-            if hasattr(v, 'transform'):
+            if hasattr(v, "transform"):
                 out_dict[k] = v.transform
         return self._apply_func(*args, out_dict=out_dict)
 
-    def predict(self, *args, with_uncertainty: bool=False, group_by: list=None):
-        """Predicts args using `_apply_func`
-        """
+    def predict(self, *args, with_uncertainty: bool = False, group_by: list = None):
+        """Predicts args using `_apply_func`"""
         if not self._fitted:
-            raise AttributeError('Please fit the Vset object before calling predict.')
+            raise AttributeError("Please fit the Vset object before calling predict.")
         pred_dict = {}
         for k, v in self.fitted_vfuncs.items():
-            if hasattr(v, 'predict'):
+            if hasattr(v, "predict"):
                 pred_dict[k] = v.predict
         preds = self._apply_func(*args, out_dict=pred_dict)
         if with_uncertainty:
             return prediction_uncertainty(preds, group_by)
         return preds
 
-    def predict_proba(self, *args, with_uncertainty: bool=False, group_by: list=None):
-        """Calls predict_proba on args using `_apply_func`
-        """
+    def predict_proba(
+        self, *args, with_uncertainty: bool = False, group_by: list = None
+    ):
+        """Calls predict_proba on args using `_apply_func`"""
         if not self._fitted:
-            raise AttributeError('Please fit the Vset object before calling predict_proba.')
+            raise AttributeError(
+                "Please fit the Vset object before calling predict_proba."
+            )
         pred_dict = {}
         for k, v in self.fitted_vfuncs.items():
-            if hasattr(v, 'predict_proba'):
+            if hasattr(v, "predict_proba"):
                 pred_dict[k] = v.predict_proba
         preds = self._apply_func(*args, out_dict=pred_dict)
         if with_uncertainty:
@@ -219,8 +232,7 @@ def predict_proba(self, *args, with_uncertainty: bool=False, group_by: list=None
         return preds
 
     def evaluate(self, *args):
-        """Combines dicts before calling `_apply_func`
-        """
+        """Combines dicts before calling `_apply_func`"""
         return self._apply_func(*args)
 
     def __call__(self, *args, n_out: int = None, keys=None, **kwargs):
@@ -245,20 +257,17 @@ def __call__(self, *args, n_out: int = None, keys=None, **kwargs):
         return out_dicts
 
     def __getitem__(self, i):
-        """Accesses ith item in the vfunc set
-        """
+        """Accesses ith item in the vfunc set"""
         return self.vfuncs[i]
 
     def __contains__(self, key):
-        """Returns true if vfuncs is a dict and key is one of its keys
-        """
+        """Returns true if vfuncs is a dict and key is one of its keys"""
         if isinstance(self.vfuncs, dict):
             return key in self.vfuncs.keys()
         return False
 
     def keys(self):
-        """Returns Vset vfunc keys
-        """
+        """Returns Vset vfunc keys"""
         if isinstance(self.vfuncs, dict):
             return self.vfuncs.keys()
         return {}.keys()
@@ -267,7 +276,7 @@ def __len__(self):
         return len(self.vfuncs)
 
     def __str__(self):
-        return 'Vset(' + self.name + ')'
+        return "Vset(" + self.name + ")"
 
     def __create_subkey(self, value):
         """Helper function to construct `Subkey` with
@@ -298,7 +307,9 @@ def _apply_func_cached(out_dict: dict, is_async: bool, lazy: bool, *args):
     """
     for in_dict in args:
         if not isinstance(in_dict, dict):
-            raise Exception('Run init_args on data before using it when calling a Vset!')
+            raise Exception(
+                "Run init_args on data before using it when calling a Vset!"
+            )
 
     data_dict = combine_dicts(*args)
     out_dict = apply_vfuncs(out_dict, data_dict, lazy)
@@ -313,7 +324,7 @@ def _apply_func_cached(out_dict: dict, is_async: bool, lazy: bool, *args):
     return out_dict
 
 
-def prediction_uncertainty(preds, group_by: list=None):
+def prediction_uncertainty(preds, group_by: list = None):
     """Returns the mean and std predictions conditional on group_by
 
     Params
@@ -331,15 +342,15 @@ def prediction_uncertainty(preds, group_by: list=None):
     if group_by is None:
         # just average over all predictions
         preds_stats = perturbation_stats(preds_df)
-        group_by = ['index']
+        group_by = ["index"]
     else:
         preds_stats = perturbation_stats(preds_df, *group_by)
     origins = preds_stats[group_by].columns
     keys = preds_stats[group_by].to_numpy()
     # wrap subkey values in Subkey
     keys = [tuple(Subkey(sk, origins[idx]) for idx, sk in enumerate(x)) for x in keys]
-    mean_dict = dict(zip(keys, preds_stats['out-mean']))
-    std_dict = dict(zip(keys, preds_stats['out-std']))
+    mean_dict = dict(zip(keys, preds_stats["out-mean"]))
+    std_dict = dict(zip(keys, preds_stats["out-std"]))
     # add PREV_KEY to out dicts
     mean_dict[PREV_KEY] = preds[PREV_KEY]
     std_dict[PREV_KEY] = preds[PREV_KEY]