From 9860b69a8168ee172cec2f06482ace224d5f2bb9 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Mon, 26 Feb 2024 09:18:49 +0100
Subject: [PATCH 01/36] Stub custom materializer

---
 .../_util/__init__.py                         |  0
 .../_util/formulaic.py                        | 27 +++++++++++++++++++
 src/multi_condition_comparisions/tl/de.py     |  4 +--
 3 files changed, 29 insertions(+), 2 deletions(-)
 create mode 100644 src/multi_condition_comparisions/_util/__init__.py
 create mode 100644 src/multi_condition_comparisions/_util/formulaic.py

diff --git a/src/multi_condition_comparisions/_util/__init__.py b/src/multi_condition_comparisions/_util/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
new file mode 100644
index 0000000..a964fc3
--- /dev/null
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -0,0 +1,27 @@
+"""Helpers to interact with Formulaic Formulas"""
+
+from collections.abc import Sequence
+from typing import Any
+
+from formulaic import ModelSpec
+from formulaic.materializers import PandasMaterializer
+from formulaic.materializers.types import EvaluatedFactor
+from interface_meta import override
+
+
+class CustomPandasMaterializer(PandasMaterializer):
+    """An extension of the PandasMaterializer that records all cateogrical variables and their (base) categories."""
+
+    REGISTER_NAME = "custom_pandas"
+    REGISTER_INPUTS = ("pandas.core.frame.DataFrame",)
+    REGISTER_OUTPUTS = ("pandas", "numpy", "sparse")
+
+    @override
+    def _encode_evaled_factor(
+        self, factor: EvaluatedFactor, spec: ModelSpec, drop_rows: Sequence[int], reduced_rank: bool = False
+    ) -> dict[str, Any]:
+        print(factor)
+        print(spec)
+        res = super()._encode_evaled_factor(factor, spec, drop_rows, reduced_rank)
+        print(res)
+        return res
diff --git a/src/multi_condition_comparisions/tl/de.py b/src/multi_condition_comparisions/tl/de.py
index d4c9b97..8076d1c 100644
--- a/src/multi_condition_comparisions/tl/de.py
+++ b/src/multi_condition_comparisions/tl/de.py
@@ -62,7 +62,7 @@ def __init__(
 
         self.layer = layer
         if isinstance(design, str):
-            self.design = model_matrix(design, adata.obs)
+            self.design = model_matrix(design, adata.obs, materializer="custom_pandas")
         else:
             self.design = design
 
@@ -220,7 +220,7 @@ class StatsmodelsDE(BaseMethod):
     """Differential expression test using a statsmodels linear regression"""
 
     def _check_counts(self) -> bool:
-        return self._check_count_matrix(self.adata.X)
+        pass
 
     def fit(
         self,

From 91f8b1ca05913c2dc6620b81fbecdbd6548a19f1 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Mon, 26 Feb 2024 09:47:04 +0100
Subject: [PATCH 02/36] Stub materializer factory

---
 .../_util/formulaic.py                        | 57 ++++++++++++++-----
 tests/conftest.py                             | 32 +++++++++++
 tests/test_de.py                              |  4 ++
 3 files changed, 79 insertions(+), 14 deletions(-)

diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index a964fc3..f504605 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -1,6 +1,7 @@
 """Helpers to interact with Formulaic Formulas"""
 
 from collections.abc import Sequence
+from dataclasses import dataclass
 from typing import Any
 
 from formulaic import ModelSpec
@@ -9,19 +10,47 @@
 from interface_meta import override
 
 
-class CustomPandasMaterializer(PandasMaterializer):
-    """An extension of the PandasMaterializer that records all cateogrical variables and their (base) categories."""
+@dataclass
+class FactorMetadata:
+    """Store (relevant) metadata for a factor of a formula."""
 
-    REGISTER_NAME = "custom_pandas"
-    REGISTER_INPUTS = ("pandas.core.frame.DataFrame",)
-    REGISTER_OUTPUTS = ("pandas", "numpy", "sparse")
+    name: str
+    reduced_rank: bool
+    base_level: str
 
-    @override
-    def _encode_evaled_factor(
-        self, factor: EvaluatedFactor, spec: ModelSpec, drop_rows: Sequence[int], reduced_rank: bool = False
-    ) -> dict[str, Any]:
-        print(factor)
-        print(spec)
-        res = super()._encode_evaled_factor(factor, spec, drop_rows, reduced_rank)
-        print(res)
-        return res
+
+class FactorMetadataStorage:
+    """
+    Keep track of categorical factors used in a model spec.
+
+    Generates a custom materializers that reports back certain metadata upon materialization of the model matrix.
+    """
+
+    def __init__(self) -> None:
+        self.factors: dict[str, FactorMetadata] = {}
+        self.materializer = self._make_materializer()
+
+    def _make_materializer(self_factor_metadata_storage):
+        class CustomPandasMaterializer(PandasMaterializer):
+            """An extension of the PandasMaterializer that records all cateogrical variables and their (base) categories."""
+
+            REGISTER_NAME = "custom_pandas"
+            REGISTER_INPUTS = ("pandas.core.frame.DataFrame",)
+            REGISTER_OUTPUTS = ("pandas", "numpy", "sparse")
+
+            factor_metadata_storage = self_factor_metadata_storage.factors
+
+            @override
+            def _encode_evaled_factor(
+                self, factor: EvaluatedFactor, spec: ModelSpec, drop_rows: Sequence[int], reduced_rank: bool = False
+            ) -> dict[str, Any]:
+                self.factor_metadata_storage[factor] = FactorMetadata(
+                    name=str(factor), reduced_rank=factor.reduced_rank, base_level="TODO"
+                )
+                print(factor)
+                print(spec)
+                res = super()._encode_evaled_factor(factor, spec, drop_rows, reduced_rank)
+                print(res)
+                return res
+
+        return CustomPandasMaterializer
diff --git a/tests/conftest.py b/tests/conftest.py
index 43d7c8a..5f99689 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,4 +1,6 @@
 import anndata as ad
+import numpy as np
+import pandas as pd
 import pytest
 from pydeseq2.utils import load_example_data
 
@@ -23,6 +25,36 @@ def test_metadata():
     )
 
 
+@pytest.fixture
+def test_adata_minimal():
+    obs = pd.DataFrame(
+        [
+            ["A", "D1", "X", 0.2],
+            ["B", "D1", "X", 0.7],
+            ["A", "D2", "Y", 1.6],
+            ["B", "D2", "Y", 42],
+            ["A", "D3", "Y", 212],
+            ["B", "D3", "Y", 6023],
+        ],
+        columns=["condition", "donor", "other", "cont"],
+    )
+    var = pd.DataFrame(index=["gene1", "gene2"])
+    X = np.array(
+        [
+            # gene1 differs between condition A/B by approx. FC=2
+            # gene2 differs between donor D1/D2 by approx FC=2
+            # [gene1, gene2]
+            [10, 200],
+            [20, 220],
+            [12, 400],
+            [25, 420],
+            [13, 600],
+            [24, 620],
+        ]
+    )
+    return ad.AnnData(X=X, obs=obs, var=var)
+
+
 @pytest.fixture
 def test_adata(test_counts, test_metadata):
     return ad.AnnData(X=test_counts, obs=test_metadata)
diff --git a/tests/test_de.py b/tests/test_de.py
index e39cdd2..7614d85 100644
--- a/tests/test_de.py
+++ b/tests/test_de.py
@@ -11,6 +11,10 @@ def test_package_has_version():
     assert multi_condition_comparisions.__version__ is not None
 
 
+def test_foo(test_adata_minimal):
+    StatsmodelsDE(test_adata_minimal, "~ 0 + C(condition, contr.treatment(base='B')) + donor")
+
+
 @pytest.mark.parametrize(
     "method_class,kwargs",
     [

From 937e303b1a6e1f6d56aaac68fafa7a495a0dc03c Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Mon, 26 Feb 2024 10:02:29 +0100
Subject: [PATCH 03/36] Setup basic factor metadata registry

---
 .../_util/formulaic.py                        | 57 +++++++++----------
 src/multi_condition_comparisions/tl/de.py     |  9 ++-
 tests/test_de.py                              |  3 +-
 3 files changed, 36 insertions(+), 33 deletions(-)

diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index f504605..66e68b8 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -19,38 +19,35 @@ class FactorMetadata:
     base_level: str
 
 
-class FactorMetadataStorage:
+def get_factor_storage_and_materializer():
     """
     Keep track of categorical factors used in a model spec.
 
     Generates a custom materializers that reports back certain metadata upon materialization of the model matrix.
     """
-
-    def __init__(self) -> None:
-        self.factors: dict[str, FactorMetadata] = {}
-        self.materializer = self._make_materializer()
-
-    def _make_materializer(self_factor_metadata_storage):
-        class CustomPandasMaterializer(PandasMaterializer):
-            """An extension of the PandasMaterializer that records all cateogrical variables and their (base) categories."""
-
-            REGISTER_NAME = "custom_pandas"
-            REGISTER_INPUTS = ("pandas.core.frame.DataFrame",)
-            REGISTER_OUTPUTS = ("pandas", "numpy", "sparse")
-
-            factor_metadata_storage = self_factor_metadata_storage.factors
-
-            @override
-            def _encode_evaled_factor(
-                self, factor: EvaluatedFactor, spec: ModelSpec, drop_rows: Sequence[int], reduced_rank: bool = False
-            ) -> dict[str, Any]:
-                self.factor_metadata_storage[factor] = FactorMetadata(
-                    name=str(factor), reduced_rank=factor.reduced_rank, base_level="TODO"
-                )
-                print(factor)
-                print(spec)
-                res = super()._encode_evaled_factor(factor, spec, drop_rows, reduced_rank)
-                print(res)
-                return res
-
-        return CustomPandasMaterializer
+    factor_storage: dict[str, FactorMetadata] = {}
+
+    class CustomPandasMaterializer(PandasMaterializer):
+        """An extension of the PandasMaterializer that records all cateogrical variables and their (base) categories."""
+
+        REGISTER_NAME = "custom_pandas"
+        REGISTER_INPUTS = ("pandas.core.frame.DataFrame",)
+        REGISTER_OUTPUTS = ("pandas", "numpy", "sparse")
+
+        factor_metadata_storage = factor_storage
+
+        @override
+        def _encode_evaled_factor(
+            self, factor: EvaluatedFactor, spec: ModelSpec, drop_rows: Sequence[int], reduced_rank: bool = False
+        ) -> dict[str, Any]:
+            assert factor.expr not in self.factor_metadata_storage, "Factor already present in metadata storage"
+            self.factor_metadata_storage[factor.expr] = FactorMetadata(
+                name=factor.expr, reduced_rank=reduced_rank, base_level="TODO"
+            )
+            print(factor)
+            print(spec)
+            res = super()._encode_evaled_factor(factor, spec, drop_rows, reduced_rank)
+            print(res)
+            return res
+
+    return factor_storage, CustomPandasMaterializer
diff --git a/src/multi_condition_comparisions/tl/de.py b/src/multi_condition_comparisions/tl/de.py
index 8076d1c..297f81d 100644
--- a/src/multi_condition_comparisions/tl/de.py
+++ b/src/multi_condition_comparisions/tl/de.py
@@ -8,7 +8,7 @@
 import statsmodels
 import statsmodels.api as sm
 from anndata import AnnData
-from formulaic import model_matrix
+from formulaic import Formula
 from formulaic.model_matrix import ModelMatrix
 from pydeseq2.dds import DeseqDataSet
 from pydeseq2.default_inference import DefaultInference
@@ -17,10 +17,14 @@
 from scipy.sparse import issparse, spmatrix
 from tqdm.auto import tqdm
 
+from multi_condition_comparisions._util.formulaic import get_factor_storage_and_materializer
+
 
 class BaseMethod(ABC):
     """Base method for DE testing that is implemented per DE test."""
 
+    factor_metadata_storage = None
+
     def __init__(
         self,
         adata: AnnData,
@@ -62,7 +66,8 @@ def __init__(
 
         self.layer = layer
         if isinstance(design, str):
-            self.design = model_matrix(design, adata.obs, materializer="custom_pandas")
+            self.factor_metadata_storage, materializer_class = get_factor_storage_and_materializer()
+            self.design = materializer_class(adata.obs).get_model_matrix(Formula(design))
         else:
             self.design = design
 
diff --git a/tests/test_de.py b/tests/test_de.py
index 7614d85..7931589 100644
--- a/tests/test_de.py
+++ b/tests/test_de.py
@@ -12,7 +12,8 @@ def test_package_has_version():
 
 
 def test_foo(test_adata_minimal):
-    StatsmodelsDE(test_adata_minimal, "~ 0 + C(condition, contr.treatment(base='B')) + donor")
+    model = StatsmodelsDE(test_adata_minimal, "~ 0 + C(condition, contr.treatment(base='B')) + donor")
+    print(model)
 
 
 @pytest.mark.parametrize(

From 36f07dbee05eaf141df7598a3cb3a92c47743ba1 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Mon, 26 Feb 2024 11:57:26 +0100
Subject: [PATCH 04/36] Record all required attributes

---
 .../_util/formulaic.py                        | 41 ++++++++++++++-----
 tests/test_de.py                              |  2 +-
 2 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index 66e68b8..3f1e6f8 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -4,9 +4,10 @@
 from dataclasses import dataclass
 from typing import Any
 
-from formulaic import ModelSpec
+from formulaic import FactorValues, ModelSpec
 from formulaic.materializers import PandasMaterializer
 from formulaic.materializers.types import EvaluatedFactor
+from formulaic.parser.types import Factor
 from interface_meta import override
 
 
@@ -15,11 +16,25 @@ class FactorMetadata:
     """Store (relevant) metadata for a factor of a formula."""
 
     name: str
+    """The unambiguous factor name as specified in the formula. E.g. `donor`, or `C(donor, contr.treatment(base="A"))`"""
+
     reduced_rank: bool
-    base_level: str
+    """Whether a column will be dropped because it is redundant"""
+
+    drop_field: str = None
+    """The category that is dropped. Note that this may also be populated if `reduced_rank = False`"""
+
+    kind: Factor.Kind = None
+    """Type of the factor"""
+
+    categories: Sequence[str] = None
+    """The unique categories in this factor"""
 
+    colname_format: str = None
+    """A formattable string that can be used to generate the column name in the design matrix, e.g. `{name}[T.{field}]`"""
 
-def get_factor_storage_and_materializer():
+
+def get_factor_storage_and_materializer() -> tuple[dict[str, FactorMetadata], type]:
     """
     Keep track of categorical factors used in a model spec.
 
@@ -41,13 +56,17 @@ def _encode_evaled_factor(
             self, factor: EvaluatedFactor, spec: ModelSpec, drop_rows: Sequence[int], reduced_rank: bool = False
         ) -> dict[str, Any]:
             assert factor.expr not in self.factor_metadata_storage, "Factor already present in metadata storage"
-            self.factor_metadata_storage[factor.expr] = FactorMetadata(
-                name=factor.expr, reduced_rank=reduced_rank, base_level="TODO"
-            )
-            print(factor)
-            print(spec)
-            res = super()._encode_evaled_factor(factor, spec, drop_rows, reduced_rank)
-            print(res)
-            return res
+            self.factor_metadata_storage[factor.expr] = FactorMetadata(name=factor.expr, reduced_rank=reduced_rank)
+            return super()._encode_evaled_factor(factor, spec, drop_rows, reduced_rank)
+
+        @override
+        def _flatten_encoded_evaled_factor(self, name: str, values: FactorValues[dict]) -> dict[str, Any]:
+            """Function is called at the end, here se still have access to the raw factor values."""
+            factor_metadata = self.factor_metadata_storage[name]
+            factor_metadata.drop_field = values.__formulaic_metadata__.drop_field
+            factor_metadata.categories = values.__formulaic_metadata__.column_names
+            factor_metadata.colname_format = values.__formulaic_metadata__.format
+
+            return super()._flatten_encoded_evaled_factor(name, values)
 
     return factor_storage, CustomPandasMaterializer
diff --git a/tests/test_de.py b/tests/test_de.py
index 7931589..eacd286 100644
--- a/tests/test_de.py
+++ b/tests/test_de.py
@@ -12,7 +12,7 @@ def test_package_has_version():
 
 
 def test_foo(test_adata_minimal):
-    model = StatsmodelsDE(test_adata_minimal, "~ 0 + C(condition, contr.treatment(base='B')) + donor")
+    model = StatsmodelsDE(test_adata_minimal, "~ 0 + C(condition, contr.treatment) + donor")
     print(model)
 
 

From 56e0b05a35b487200e8520ab7560268e2d59b0b6 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Mon, 26 Feb 2024 12:51:58 +0100
Subject: [PATCH 05/36] Implement variable2term

---
 src/multi_condition_comparisions/tl/de.py | 53 +++++++++++++++--------
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/src/multi_condition_comparisions/tl/de.py b/src/multi_condition_comparisions/tl/de.py
index 297f81d..bf4dfeb 100644
--- a/src/multi_condition_comparisions/tl/de.py
+++ b/src/multi_condition_comparisions/tl/de.py
@@ -1,4 +1,3 @@
-import re
 import warnings
 from abc import ABC, abstractmethod
 
@@ -9,7 +8,6 @@
 import statsmodels.api as sm
 from anndata import AnnData
 from formulaic import Formula
-from formulaic.model_matrix import ModelMatrix
 from pydeseq2.dds import DeseqDataSet
 from pydeseq2.default_inference import DefaultInference
 from pydeseq2.ds import DeseqStats
@@ -91,6 +89,33 @@ def variables(self):
         """Get the names of the variables used in the model definition"""
         return self.design.model_spec.variables_by_source["data"]
 
+    def _get_factor_from_variable(self, variable: str):
+        """
+        Get the corresponding factor from a variable specified in a formula.
+
+        TODO unittests/examples
+        >>> _get_factor_from_variable("donor")
+        C(donor, contr.treatment)
+        """
+        try:
+            variable_terms = self.design.model_spec.variable_terms
+            terms = self.design.model_spec.terms
+        except AttributeError:
+            raise ValueError(
+                "Getting the factor from a variable only works when the design was specified as a formula."
+            ) from None
+        # Either, the variable is a term
+        if variable in terms:
+            return variable
+        # Or, there's an unambiguous mapping from variable to term:
+        elif len(variable_terms) == 1:
+            return next(iter(variable_terms))
+        # Otherwise, we can't get the term and the user requires to specify the term directly
+        else:
+            raise ValueError(
+                "No unambigous mapping from variable to term -- Please specify the term instead of its variable"
+            )
+
     @abstractmethod
     def _check_counts(self) -> bool:
         """
@@ -170,26 +195,23 @@ def cond(self, **kwargs) -> np.ndarray:
         """
         The intention is to make contrasts using this function as in glmGamPoi
 
-        >>> res < -test_de(
-        ...     fit, contrast=cond(cell="B cells", condition="stim") - cond(cell="B cells", condition="ctrl")
-        ... )
+        ```r
+        res <- test_de(
+             fit, contrast=cond(cell="B cells", condition="stim") - cond(cell="B cells", condition="ctrl")
+        )
+        ```
 
         Parameters
         ----------
         **kwargs
 
         """
-
-        # TODO this is hacky - reach out to formulaic authors how to do this properly
-        def _get_var_from_colname(colname):
-            regex = re.compile(r"^.+\[T\.(.+)\]$")
-            return regex.search(colname).groups()[0]
-
-        if not isinstance(self.design, ModelMatrix):
+        if self.factor_metadata_storage is None:
             raise RuntimeError(
                 "Building contrasts with `cond` only works if you specified the model using a "
                 "formulaic formula. Please manually provide a contrast vector."
             )
+
         cond_dict = kwargs
         for var in self.variables:
             var_type = self.design.model_spec.encoder_state[var][0].value
@@ -205,12 +227,7 @@ def _get_var_from_colname(colname):
                 if var_type != "categorical":
                     cond_dict[var] = 0
                 else:
-                    var_cols = self.design.columns[self.design.columns.str.startswith(f"{var}[")]
-
-                    present_categories = {_get_var_from_colname(x) for x in var_cols}
-                    dropped_category = all_categories - present_categories
-                    assert len(dropped_category) == 1
-                    cond_dict[var] = next(iter(dropped_category))
+                    pass
 
         df = pd.DataFrame([kwargs])
 

From fdcded04fc5f7380ad45dde7b6a1eb274e9ccbff Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Tue, 27 Feb 2024 09:43:42 +0100
Subject: [PATCH 06/36] Reimplement model.cond using factor_metadata_storage

---
 .../_util/formulaic.py                        |  9 +++-
 src/multi_condition_comparisions/tl/de.py     | 47 +++++++++----------
 tests/test_de.py                              |  2 +-
 3 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index 3f1e6f8..abdbe0d 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -55,7 +55,14 @@ class CustomPandasMaterializer(PandasMaterializer):
         def _encode_evaled_factor(
             self, factor: EvaluatedFactor, spec: ModelSpec, drop_rows: Sequence[int], reduced_rank: bool = False
         ) -> dict[str, Any]:
-            assert factor.expr not in self.factor_metadata_storage, "Factor already present in metadata storage"
+            if factor.expr in self.factor_metadata_storage and not (
+                factor.expr in self.encoded_cache or (factor.expr, reduced_rank) in self.encoded_cache
+            ):
+                # the same factor might be referred to multiple times in the same formula -- for instance, when using
+                # an interaction term such as group*condition. In that case formulaic is reuding a cached encoding.
+                # However, if it's not just reusing an existing encoding, something unexpected is happening that
+                # we haven't accounted for yet.
+                raise AssertionError("Factor already present in metadata storage and not reusing cached encoding")
             self.factor_metadata_storage[factor.expr] = FactorMetadata(name=factor.expr, reduced_rank=reduced_rank)
             return super()._encode_evaled_factor(factor, spec, drop_rows, reduced_rank)
 
diff --git a/src/multi_condition_comparisions/tl/de.py b/src/multi_condition_comparisions/tl/de.py
index bf4dfeb..6c1ab06 100644
--- a/src/multi_condition_comparisions/tl/de.py
+++ b/src/multi_condition_comparisions/tl/de.py
@@ -8,6 +8,7 @@
 import statsmodels.api as sm
 from anndata import AnnData
 from formulaic import Formula
+from formulaic.parser.types import Factor
 from pydeseq2.dds import DeseqDataSet
 from pydeseq2.default_inference import DefaultInference
 from pydeseq2.ds import DeseqStats
@@ -53,14 +54,14 @@ def __init__(
 
         # Do some sanity checks on the input. Do them after the mask is applied.
 
-        # Check that counts have no NaN or Inf values.
-        if np.any(np.logical_or(adata.X < 0, np.isnan(self.adata.X))) or np.any(np.isinf(self.adata.X)):
-            raise ValueError("Counts cannot contain negative, NaN or Inf values.")
-        # Check that counts have numeric values.
-        if not np.issubdtype(self.adata.X.dtype, np.number):
-            raise ValueError("Counts must be numeric.")
+        # # Check that counts have no NaN or Inf values.
+        # if np.any(np.logical_or(adata.X < 0, np.isnan(self.adata.X))) or np.any(np.isinf(self.adata.X)):
+        #     raise ValueError("Counts cannot contain negative, NaN or Inf values.")
+        # # Check that counts have numeric values.
+        # if not np.issubdtype(self.adata.X.dtype, np.number):
+        #     raise ValueError("Counts must be numeric.")
 
-        self._check_counts()
+        # self._check_counts()
 
         self.layer = layer
         if isinstance(design, str):
@@ -89,12 +90,12 @@ def variables(self):
         """Get the names of the variables used in the model definition"""
         return self.design.model_spec.variables_by_source["data"]
 
-    def _get_factor_from_variable(self, variable: str):
+    def _get_term_from_variable(self, variable: str):
         """
         Get the corresponding factor from a variable specified in a formula.
 
         TODO unittests/examples
-        >>> _get_factor_from_variable("donor")
+        >>> _get_terms_from_variable("donor")
         C(donor, contr.treatment)
         """
         try:
@@ -213,25 +214,19 @@ def cond(self, **kwargs) -> np.ndarray:
             )
 
         cond_dict = kwargs
-        for var in self.variables:
-            var_type = self.design.model_spec.encoder_state[var][0].value
-            if var_type == "categorical":
-                all_categories = set(self.design.model_spec.encoder_state[var][1]["categories"])
-            if var in kwargs:
-                if var_type == "categorical" and kwargs[var] not in all_categories:
-                    raise ValueError(
-                        f"You specified a non-existant category for {var}. Possible categories: {', '.join(all_categories)}"
-                    )
-            else:
-                # fill with default values
-                if var_type != "categorical":
-                    cond_dict[var] = 0
+        # convert variables to terms in the cond dict (if possible)
+        cond_dict = {self._get_term_from_variable(term): value for term, value in cond_dict.items()}
+
+        # fill cond dict with default values
+        for term in self.factor_metadata_storage:
+            if term not in cond_dict:
+                metadata = self.factor_metadata_storage[term]
+                if metadata.kind == Factor.Kind.CATEGORICAL:
+                    cond_dict[term] = metadata.drop_field
                 else:
-                    pass
-
-        df = pd.DataFrame([kwargs])
+                    cond_dict[term] = 0
 
-        return self.design.model_spec.get_model_matrix(df)
+        return self.design.model_spec.get_model_matrix(pd.DataFrame([cond_dict]))
 
     def contrast(self, column: str, baseline: str, group_to_compare: str) -> object:
         """Build a simple contrast for pairwise comparisons.  In the future all methods should be able to accept the output of :method:`StatsmodelsDE.contrast` but alas a big TODO."""
diff --git a/tests/test_de.py b/tests/test_de.py
index eacd286..4ad7bbe 100644
--- a/tests/test_de.py
+++ b/tests/test_de.py
@@ -12,7 +12,7 @@ def test_package_has_version():
 
 
 def test_foo(test_adata_minimal):
-    model = StatsmodelsDE(test_adata_minimal, "~ 0 + C(condition, contr.treatment) + donor")
+    model = StatsmodelsDE(test_adata_minimal, "~ condition * donor")
     print(model)
 
 

From 8ddd5bb385f5b270aeeaf70312dd6b810ad012c5 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Tue, 2 Apr 2024 14:59:39 +0200
Subject: [PATCH 07/36] Cleanup after merge

---
 .../_util/__init__.py                         |  3 ++
 .../{_util.py => _util/checks.py}             |  0
 .../methods/_statsmodels.py                   |  2 +-
 tests/conftest.py                             | 30 -------------------
 tests/test_formulaic.py                       |  8 +++++
 tests/test_statsmodels.py                     |  5 ----
 6 files changed, 12 insertions(+), 36 deletions(-)
 rename src/multi_condition_comparisions/{_util.py => _util/checks.py} (100%)
 create mode 100644 tests/test_formulaic.py

diff --git a/src/multi_condition_comparisions/_util/__init__.py b/src/multi_condition_comparisions/_util/__init__.py
index e69de29..a98b31a 100644
--- a/src/multi_condition_comparisions/_util/__init__.py
+++ b/src/multi_condition_comparisions/_util/__init__.py
@@ -0,0 +1,3 @@
+from .checks import check_is_integer_matrix, check_is_numeric_matrix
+
+__all__ = ["check_is_integer_matrix", "check_is_numeric_matrix"]
diff --git a/src/multi_condition_comparisions/_util.py b/src/multi_condition_comparisions/_util/checks.py
similarity index 100%
rename from src/multi_condition_comparisions/_util.py
rename to src/multi_condition_comparisions/_util/checks.py
diff --git a/src/multi_condition_comparisions/methods/_statsmodels.py b/src/multi_condition_comparisions/methods/_statsmodels.py
index 1bc4ef5..1fda634 100644
--- a/src/multi_condition_comparisions/methods/_statsmodels.py
+++ b/src/multi_condition_comparisions/methods/_statsmodels.py
@@ -18,7 +18,7 @@ def _check_counts(self):
 
     def fit(
         self,
-        regression_model: sm.OLS | sm.GLM = sm.OLS,
+        regression_model: type[sm.OLS] | type[sm.GLM] = sm.OLS,
         **kwargs,
     ) -> None:
         """
diff --git a/tests/conftest.py b/tests/conftest.py
index 904e618..936ccf3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -24,36 +24,6 @@ def test_metadata():
     )
 
 
-# @pytest.fixture
-# def test_adata_minimal():
-#     obs = pd.DataFrame(
-#         [
-#             ["A", "D1", "X", 0.2],
-#             ["B", "D1", "X", 0.7],
-#             ["A", "D2", "Y", 1.6],
-#             ["B", "D2", "Y", 42],
-#             ["A", "D3", "Y", 212],
-#             ["B", "D3", "Y", 6023],
-#         ],
-#         columns=["condition", "donor", "other", "cont"],
-#     )
-#     var = pd.DataFrame(index=["gene1", "gene2"])
-#     X = np.array(
-#         [
-#             # gene1 differs between condition A/B by approx. FC=2
-#             # gene2 differs between donor D1/D2 by approx FC=2
-#             # [gene1, gene2]
-#             [10, 200],
-#             [20, 220],
-#             [12, 400],
-#             [25, 420],
-#             [13, 600],
-#             [24, 620],
-#         ]
-#     )
-#     return ad.AnnData(X=X, obs=obs, var=var)
-
-
 @pytest.fixture
 def test_adata(test_counts, test_metadata):
     return ad.AnnData(X=test_counts, obs=test_metadata)
diff --git a/tests/test_formulaic.py b/tests/test_formulaic.py
new file mode 100644
index 0000000..cc62244
--- /dev/null
+++ b/tests/test_formulaic.py
@@ -0,0 +1,8 @@
+from multi_condition_comparisions._util.formulaic import get_factor_storage_and_materializer
+
+
+def test_custom_materializer(test_adata_minimal, formula):
+    """Test that the custom materializer correctly stores the baseline category."""
+    factor_storage, materializer = get_factor_storage_and_materializer()
+    materializer(test_adata_minimal.obs).get_model_matrix(formula)
+    raise AssertionError()
diff --git a/tests/test_statsmodels.py b/tests/test_statsmodels.py
index ffa195a..962e3a0 100644
--- a/tests/test_statsmodels.py
+++ b/tests/test_statsmodels.py
@@ -30,8 +30,3 @@ def test_statsmodels(test_adata, method_class, kwargs):
 
 # TODO: there should be a test checking if, for a concrete example, the output p-values and effect sizes are what
 # we expect (-> frozen snapshot, that way we also get a heads-up if something changes upstream)
-
-
-def test_foo(test_adata_minimal):
-    model = Statsmodels(test_adata_minimal, "~ condition * donor")
-    print(model)

From ee79d70ebd19ddd1fd1035f1b05a41486f4639b1 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
Date: Tue, 2 Apr 2024 15:33:29 +0200
Subject: [PATCH 08/36] stub test cases

---
 .../_util/formulaic.py                        |  2 +-
 tests/test_formulaic.py                       | 23 ++++++++++++++++++-
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index abdbe0d..f1eb4e3 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -68,7 +68,7 @@ def _encode_evaled_factor(
 
         @override
         def _flatten_encoded_evaled_factor(self, name: str, values: FactorValues[dict]) -> dict[str, Any]:
-            """Function is called at the end, here se still have access to the raw factor values."""
+            """Function is called at the end, here we still have access to the raw factor values."""
             factor_metadata = self.factor_metadata_storage[name]
             factor_metadata.drop_field = values.__formulaic_metadata__.drop_field
             factor_metadata.categories = values.__formulaic_metadata__.column_names
diff --git a/tests/test_formulaic.py b/tests/test_formulaic.py
index cc62244..446a950 100644
--- a/tests/test_formulaic.py
+++ b/tests/test_formulaic.py
@@ -1,8 +1,29 @@
+import pytest
+
 from multi_condition_comparisions._util.formulaic import get_factor_storage_and_materializer
 
 
+@pytest.mark.parametrize(
+    "formula",
+    [
+        "~ donor",
+        "~ C(donor)",
+        "~ C(donor, contr.treatment(base='D2'))",
+        "~ C(donor, contr.sum)",
+        "~ condition",
+        "~ C(condition)",
+        "~ C(condition, contr.treatment(base='B'))",
+        "~ C(condition, contr.sum)",
+        "~ 0 + condition",
+        "~ condition + donor",
+        "~ condition * donor",
+        "~ condition + C(condition) + C(condition, contr.treatment(base='B'))",
+        "~ condition + continuous + np.log(continuous)",
+        "~ condition * donor + continuous",
+    ],
+)
 def test_custom_materializer(test_adata_minimal, formula):
     """Test that the custom materializer correctly stores the baseline category."""
     factor_storage, materializer = get_factor_storage_and_materializer()
     materializer(test_adata_minimal.obs).get_model_matrix(formula)
-    raise AssertionError()
+    raise AssertionError(factor_storage)

From 402cff4428629db4b28e23ae5ecb7ab162758f05 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Tue, 2 Apr 2024 16:51:55 +0200
Subject: [PATCH 09/36] WIP: deal with custom encoder classes

---
 .../_util/formulaic.py                        | 28 ++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index f1eb4e3..d2d818a 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -21,8 +21,16 @@ class FactorMetadata:
     reduced_rank: bool
     """Whether a column will be dropped because it is redundant"""
 
+    custom_encoder: str = None
+
     drop_field: str = None
-    """The category that is dropped. Note that this may also be populated if `reduced_rank = False`"""
+    """
+    The category that is dropped.
+
+    Note that
+      * this may also be populated if `reduced_rank = False`
+      * this is only populated when no encoder was used (e.g. `~ donor` but NOT `~ C(donor)`.
+    """
 
     kind: Factor.Kind = None
     """Type of the factor"""
@@ -30,9 +38,26 @@ class FactorMetadata:
     categories: Sequence[str] = None
     """The unique categories in this factor"""
 
+    column_names: Sequence[str] = None
+    """The column names for this factor included in the design matrix"""
+
     colname_format: str = None
     """A formattable string that can be used to generate the column name in the design matrix, e.g. `{name}[T.{field}]`"""
 
+    @property
+    def base(self) -> str | None:
+        """The base category for this categorical factor"""
+        if not self.reduced_rank:
+            return None
+        else:
+            if self.custom_encoder:
+                tmp_base = set(self.categories) - set(self.column_names)
+                assert len(tmp_base) == 1
+                return tmp_base.pop()
+            else:
+                assert self.drop_field is not None
+                return self.drop_field
+
 
 def get_factor_storage_and_materializer() -> tuple[dict[str, FactorMetadata], type]:
     """
@@ -64,6 +89,7 @@ def _encode_evaled_factor(
                 # we haven't accounted for yet.
                 raise AssertionError("Factor already present in metadata storage and not reusing cached encoding")
             self.factor_metadata_storage[factor.expr] = FactorMetadata(name=factor.expr, reduced_rank=reduced_rank)
+            tuple(sorted(factor.values.drop(index=factor.values.index[drop_rows]).unique()))
             return super()._encode_evaled_factor(factor, spec, drop_rows, reduced_rank)
 
         @override

From 438a46919caa8e5a271795bb5ee34f3513fe6f5c Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Tue, 2 Apr 2024 17:08:09 +0200
Subject: [PATCH 10/36] WIP stub testcase

---
 .../_util/formulaic.py                        | 37 +++++---
 tests/test_formulaic.py                       | 88 +++++++++++++++----
 2 files changed, 97 insertions(+), 28 deletions(-)

diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index d2d818a..cb9b5d2 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -21,7 +21,14 @@ class FactorMetadata:
     reduced_rank: bool
     """Whether a column will be dropped because it is redundant"""
 
-    custom_encoder: str = None
+    custom_encoder: bool
+    """Whether or not a custom encoder (e.g. `C(...)`) was used."""
+
+    categories: Sequence[str]
+    """The unique categories in this factor (after applying `drop_rows`)"""
+
+    kind: Factor.Kind = None
+    """Type of the factor"""
 
     drop_field: str = None
     """
@@ -32,21 +39,25 @@ class FactorMetadata:
       * this is only populated when no encoder was used (e.g. `~ donor` but NOT `~ C(donor)`.
     """
 
-    kind: Factor.Kind = None
-    """Type of the factor"""
-
-    categories: Sequence[str] = None
-    """The unique categories in this factor"""
-
     column_names: Sequence[str] = None
-    """The column names for this factor included in the design matrix"""
+    """
+    The column names for this factor included in the design matrix.
+
+    This may be the same as `categories` if the default encoder is used, or
+    categories without the base level if a custom encoder (e.g. `C(...)`) is used.
+    """
 
     colname_format: str = None
     """A formattable string that can be used to generate the column name in the design matrix, e.g. `{name}[T.{field}]`"""
 
     @property
     def base(self) -> str | None:
-        """The base category for this categorical factor"""
+        """
+        The base category for this categorical factor.
+
+        This is derived from `drop_field` (for default encoding) or by comparing the column names in
+        the design matrix with all categories (for custom encoding, e.g. `C(...)`).
+        """
         if not self.reduced_rank:
             return None
         else:
@@ -88,8 +99,12 @@ def _encode_evaled_factor(
                 # However, if it's not just reusing an existing encoding, something unexpected is happening that
                 # we haven't accounted for yet.
                 raise AssertionError("Factor already present in metadata storage and not reusing cached encoding")
-            self.factor_metadata_storage[factor.expr] = FactorMetadata(name=factor.expr, reduced_rank=reduced_rank)
-            tuple(sorted(factor.values.drop(index=factor.values.index[drop_rows]).unique()))
+            self.factor_metadata_storage[factor.expr] = FactorMetadata(
+                name=factor.expr,
+                reduced_rank=reduced_rank,
+                categories=tuple(sorted(factor.values.drop(index=factor.values.index[drop_rows]).unique())),
+                custom_encoder=factor.metadata.encoder is not None,
+            )
             return super()._encode_evaled_factor(factor, spec, drop_rows, reduced_rank)
 
         @override
diff --git a/tests/test_formulaic.py b/tests/test_formulaic.py
index 446a950..b6916ee 100644
--- a/tests/test_formulaic.py
+++ b/tests/test_formulaic.py
@@ -4,26 +4,80 @@
 
 
 @pytest.mark.parametrize(
-    "formula",
+    "formula,expected_factor_metadata",
     [
-        "~ donor",
-        "~ C(donor)",
-        "~ C(donor, contr.treatment(base='D2'))",
-        "~ C(donor, contr.sum)",
-        "~ condition",
-        "~ C(condition)",
-        "~ C(condition, contr.treatment(base='B'))",
-        "~ C(condition, contr.sum)",
-        "~ 0 + condition",
-        "~ condition + donor",
-        "~ condition * donor",
-        "~ condition + C(condition) + C(condition, contr.treatment(base='B'))",
-        "~ condition + continuous + np.log(continuous)",
-        "~ condition * donor + continuous",
+        [
+            "~ donor",
+            {"donor": {"reduced_rank": True, "custom_encoder": False, "base": ""}},
+        ],
+        [
+            "~ C(donor)",
+            {"donor": {"reduced_rank": True, "custom_encoder": True, "base": ""}},
+        ],
+        [
+            "~ C(donor, contr.treatment(base='D2'))",
+            {"donor": {"reduced_rank": True, "custom_encoder": False, "base": "D2"}},
+        ],
+        [
+            "~ C(donor, contr.sum)",
+            {"donor": {"reduced_rank": True, "custom_encoder": False, "base": ""}},
+        ],
+        [
+            "~ condition",
+            {"condition": {"reduced_rank": True, "custom_encoder": False, "base": ""}},
+        ],
+        [
+            "~ C(condition)",
+            {"condition": {"reduced_rank": True, "custom_encoder": True, "base": ""}},
+        ],
+        [
+            "~ C(condition, contr.treatment(base='B'))",
+            {"condition": {"reduced_rank": True, "custom_encoder": False, "base": "B"}},
+        ],
+        [
+            "~ C(condition, contr.sum)",
+            {"condition": {"reduced_rank": True, "custom_encoder": False, "base": ""}},
+        ],
+        [
+            "~ 0 + condition",
+            {"condition": {"reduced_rank": True, "custom_encoder": False, "base": ""}},
+        ],
+        [
+            "~ condition + donor",
+            {
+                "condition": {"reduced_rank": True, "custom_encoder": False, "base": ""},
+                "donor": {"reduced_rank": True, "custom_encoder": False, "base": ""},
+            },
+        ],
+        [
+            "~ condition * donor",
+            {
+                "condition": {"reduced_rank": True, "custom_encoder": False, "base": ""},
+                "donor": {"reduced_rank": True, "custom_encoder": False, "base": ""},
+            },
+        ],
+        [
+            "~ condition + C(condition) + C(condition, contr.treatment(base='B'))",
+            {"condition": {"reduced_rank": True, "custom_encoder": False, "base": ""}},
+        ],
+        [
+            "~ condition + continuous + np.log(continuous)",
+            {"condition": {"reduced_rank": True, "custom_encoder": False, "base": ""}},
+        ],
+        [
+            "~ condition * donor + continuous",
+            {
+                "condition": {"reduced_rank": True, "custom_encoder": False, "base": ""},
+                "donor": {"reduced_rank": True, "custom_encoder": False, "base": ""},
+            },
+        ],
     ],
 )
-def test_custom_materializer(test_adata_minimal, formula):
+def test_custom_materializer(test_adata_minimal, formula, expected_factor_metadata):
     """Test that the custom materializer correctly stores the baseline category."""
     factor_storage, materializer = get_factor_storage_and_materializer()
     materializer(test_adata_minimal.obs).get_model_matrix(formula)
-    raise AssertionError(factor_storage)
+    for factor, expected_metadata in expected_factor_metadata.items():
+        actual_metadata = factor_storage[factor]
+        for k in expected_metadata:
+            assert getattr(actual_metadata, k) == expected_metadata[k]

From f6be15db33ac5f1e3d81d8c7993516577d56aeb9 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Tue, 2 Apr 2024 20:29:09 +0200
Subject: [PATCH 11/36] Add testcases for custom materializer

---
 .../_util/formulaic.py                        |  23 +++-
 tests/test_formulaic.py                       | 126 +++++++++++++++---
 2 files changed, 126 insertions(+), 23 deletions(-)

diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index cb9b5d2..585cfaa 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -27,7 +27,7 @@ class FactorMetadata:
     categories: Sequence[str]
     """The unique categories in this factor (after applying `drop_rows`)"""
 
-    kind: Factor.Kind = None
+    kind: Factor.Kind
     """Type of the factor"""
 
     drop_field: str = None
@@ -75,6 +75,13 @@ def get_factor_storage_and_materializer() -> tuple[dict[str, FactorMetadata], ty
     Keep track of categorical factors used in a model spec.
 
     Generates a custom materializers that reports back certain metadata upon materialization of the model matrix.
+
+    Returns
+    -------
+    factor_storage
+        A dictionary pointing to Metadata for each factor processed by the custom materializer
+    CustomPandasMaterializer
+        A materializer class that is tied to the particular instance of `factor_storage`.
     """
     factor_storage: dict[str, FactorMetadata] = {}
 
@@ -91,6 +98,11 @@ class CustomPandasMaterializer(PandasMaterializer):
         def _encode_evaled_factor(
             self, factor: EvaluatedFactor, spec: ModelSpec, drop_rows: Sequence[int], reduced_rank: bool = False
         ) -> dict[str, Any]:
+            """
+            Function is called just before the factor is evaluated.
+
+            We can record some metadata, before we call the original function.
+            """
             if factor.expr in self.factor_metadata_storage and not (
                 factor.expr in self.encoded_cache or (factor.expr, reduced_rank) in self.encoded_cache
             ):
@@ -104,15 +116,20 @@ def _encode_evaled_factor(
                 reduced_rank=reduced_rank,
                 categories=tuple(sorted(factor.values.drop(index=factor.values.index[drop_rows]).unique())),
                 custom_encoder=factor.metadata.encoder is not None,
+                kind=factor.metadata.kind,
             )
             return super()._encode_evaled_factor(factor, spec, drop_rows, reduced_rank)
 
         @override
         def _flatten_encoded_evaled_factor(self, name: str, values: FactorValues[dict]) -> dict[str, Any]:
-            """Function is called at the end, here we still have access to the raw factor values."""
+            """
+            Function is called at the end, before the design matrix gets materialized.
+
+            Here we have access to additional metadata, such as `drop_field`.
+            """
             factor_metadata = self.factor_metadata_storage[name]
             factor_metadata.drop_field = values.__formulaic_metadata__.drop_field
-            factor_metadata.categories = values.__formulaic_metadata__.column_names
+            factor_metadata.column_names = values.__formulaic_metadata__.column_names
             factor_metadata.colname_format = values.__formulaic_metadata__.format
 
             return super()._flatten_encoded_evaled_factor(name, values)
diff --git a/tests/test_formulaic.py b/tests/test_formulaic.py
index b6916ee..e099199 100644
--- a/tests/test_formulaic.py
+++ b/tests/test_formulaic.py
@@ -1,80 +1,166 @@
+import pandas as pd
 import pytest
+from formulaic.parser.types import Factor
 
 from multi_condition_comparisions._util.formulaic import get_factor_storage_and_materializer
 
 
 @pytest.mark.parametrize(
-    "formula,expected_factor_metadata",
+    "formula,reorder_categorical,expected_factor_metadata",
     [
         [
             "~ donor",
-            {"donor": {"reduced_rank": True, "custom_encoder": False, "base": ""}},
+            None,
+            {"donor": {"reduced_rank": True, "custom_encoder": False, "base": "D0"}},
+        ],
+        [
+            "~ donor",
+            {"donor": ["D2", "D1", "D0", "D3"]},
+            {"donor": {"reduced_rank": True, "custom_encoder": False, "base": "D2"}},
         ],
         [
             "~ C(donor)",
-            {"donor": {"reduced_rank": True, "custom_encoder": True, "base": ""}},
+            None,
+            {"C(donor)": {"reduced_rank": True, "custom_encoder": True, "base": "D0"}},
         ],
         [
             "~ C(donor, contr.treatment(base='D2'))",
-            {"donor": {"reduced_rank": True, "custom_encoder": False, "base": "D2"}},
+            None,
+            {"C(donor, contr.treatment(base='D2'))": {"reduced_rank": True, "custom_encoder": True, "base": "D2"}},
         ],
         [
             "~ C(donor, contr.sum)",
-            {"donor": {"reduced_rank": True, "custom_encoder": False, "base": ""}},
+            None,
+            {"C(donor, contr.sum)": {"reduced_rank": True, "custom_encoder": True, "base": "D3"}},
+        ],
+        [
+            "~ C(donor, contr.sum)",
+            {"donor": ["D1", "D0", "D3", "D2"]},
+            {"C(donor, contr.sum)": {"reduced_rank": True, "custom_encoder": True, "base": "D2"}},
         ],
         [
             "~ condition",
-            {"condition": {"reduced_rank": True, "custom_encoder": False, "base": ""}},
+            None,
+            {"condition": {"reduced_rank": True, "custom_encoder": False, "base": "A"}},
         ],
         [
             "~ C(condition)",
-            {"condition": {"reduced_rank": True, "custom_encoder": True, "base": ""}},
+            None,
+            {"C(condition)": {"reduced_rank": True, "custom_encoder": True, "base": "A"}},
         ],
         [
             "~ C(condition, contr.treatment(base='B'))",
-            {"condition": {"reduced_rank": True, "custom_encoder": False, "base": "B"}},
+            None,
+            {"C(condition, contr.treatment(base='B'))": {"reduced_rank": True, "custom_encoder": True, "base": "B"}},
         ],
         [
             "~ C(condition, contr.sum)",
-            {"condition": {"reduced_rank": True, "custom_encoder": False, "base": ""}},
+            None,
+            {"C(condition, contr.sum)": {"reduced_rank": True, "custom_encoder": True, "base": "B"}},
         ],
         [
             "~ 0 + condition",
-            {"condition": {"reduced_rank": True, "custom_encoder": False, "base": ""}},
+            None,
+            {"condition": {"reduced_rank": False, "custom_encoder": False, "base": None}},
         ],
         [
             "~ condition + donor",
+            None,
             {
-                "condition": {"reduced_rank": True, "custom_encoder": False, "base": ""},
-                "donor": {"reduced_rank": True, "custom_encoder": False, "base": ""},
+                "condition": {"reduced_rank": True, "custom_encoder": False, "base": "A"},
+                "donor": {"reduced_rank": True, "custom_encoder": False, "base": "D0"},
+            },
+        ],
+        [
+            "~ 0 + condition + donor",
+            None,
+            {
+                "condition": {"reduced_rank": False, "custom_encoder": False, "base": None},
+                "donor": {"reduced_rank": True, "custom_encoder": False, "base": "D0"},
             },
         ],
         [
             "~ condition * donor",
+            None,
+            {
+                "condition": {"reduced_rank": True, "custom_encoder": False, "base": "A"},
+                "donor": {"reduced_rank": True, "custom_encoder": False, "base": "D0"},
+            },
+        ],
+        [
+            "~ condition * C(donor, contr.treatment(base='D2'))",
+            None,
             {
-                "condition": {"reduced_rank": True, "custom_encoder": False, "base": ""},
-                "donor": {"reduced_rank": True, "custom_encoder": False, "base": ""},
+                "condition": {"reduced_rank": True, "custom_encoder": False, "base": "A"},
+                "C(donor, contr.treatment(base='D2'))": {"reduced_rank": True, "custom_encoder": True, "base": "D2"},
             },
         ],
         [
             "~ condition + C(condition) + C(condition, contr.treatment(base='B'))",
-            {"condition": {"reduced_rank": True, "custom_encoder": False, "base": ""}},
+            None,
+            {
+                "condition": {"reduced_rank": True, "custom_encoder": False, "base": "A"},
+                "C(condition)": {"reduced_rank": True, "custom_encoder": True, "base": "A"},
+                "C(condition, contr.treatment(base='B'))": {"reduced_rank": True, "custom_encoder": True, "base": "B"},
+            },
         ],
         [
             "~ condition + continuous + np.log(continuous)",
-            {"condition": {"reduced_rank": True, "custom_encoder": False, "base": ""}},
+            None,
+            {
+                "condition": {
+                    "reduced_rank": True,
+                    "custom_encoder": False,
+                    "base": "A",
+                    "kind": Factor.Kind.CATEGORICAL,
+                },
+                "continuous": {
+                    "reduced_rank": False,
+                    "custom_encoder": False,
+                    "base": None,
+                    "kind": Factor.Kind.NUMERICAL,
+                },
+                "np.log(continuous)": {
+                    "reduced_rank": False,
+                    "custom_encoder": False,
+                    "base": None,
+                    "kind": Factor.Kind.NUMERICAL,
+                },
+            },
         ],
         [
             "~ condition * donor + continuous",
+            None,
             {
-                "condition": {"reduced_rank": True, "custom_encoder": False, "base": ""},
-                "donor": {"reduced_rank": True, "custom_encoder": False, "base": ""},
+                "condition": {"reduced_rank": True, "custom_encoder": False, "base": "A"},
+                "donor": {"reduced_rank": True, "custom_encoder": False, "base": "D0"},
+                "continuous": {
+                    "reduced_rank": False,
+                    "custom_encoder": False,
+                    "base": None,
+                    "kind": Factor.Kind.NUMERICAL,
+                },
             },
         ],
     ],
 )
-def test_custom_materializer(test_adata_minimal, formula, expected_factor_metadata):
-    """Test that the custom materializer correctly stores the baseline category."""
+def test_custom_materializer(test_adata_minimal, formula, reorder_categorical, expected_factor_metadata):
+    """Test that the custom materializer correctly stores the baseline category.
+
+    Parameters
+    ----------
+    test_adata_minimal
+        adata fixture
+    formula
+        Formula to test
+    reorder_categorical
+        Create a pandas categorical for a given column with a certain order of categories
+    expected_factor_metadata
+        dict with expected values for each factor
+    """
+    if reorder_categorical is not None:
+        for col, order in reorder_categorical.items():
+            test_adata_minimal.obs[col] = pd.Categorical(test_adata_minimal.obs[col], categories=order)
     factor_storage, materializer = get_factor_storage_and_materializer()
     materializer(test_adata_minimal.obs).get_model_matrix(formula)
     for factor, expected_metadata in expected_factor_metadata.items():

From 784266b77022c90402ea4633c2b54476f942ef19 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Tue, 2 Apr 2024 21:07:25 +0200
Subject: [PATCH 12/36] Update docstring for linear model base

---
 .../methods/_base.py                          | 47 ++++++++++++++-----
 tests/test_base.py                            |  0
 2 files changed, 34 insertions(+), 13 deletions(-)
 create mode 100644 tests/test_base.py

diff --git a/src/multi_condition_comparisions/methods/_base.py b/src/multi_condition_comparisions/methods/_base.py
index b834207..a78f6ca 100644
--- a/src/multi_condition_comparisions/methods/_base.py
+++ b/src/multi_condition_comparisions/methods/_base.py
@@ -252,22 +252,25 @@ def fit(self, **kwargs) -> None:
         ...
 
     @abstractmethod
-    def _test_single_contrast(self, contrast, **kwargs) -> pd.DataFrame: ...
+    def _test_single_contrast(self, contrast: Sequence[float], **kwargs) -> pd.DataFrame: ...
 
-    def test_contrasts(
-        self, contrasts: list[str] | dict[str, np.ndarray] | dict[str, list] | np.ndarray, **kwargs
-    ) -> pd.DataFrame:
+    def test_contrasts(self, contrasts: Sequence[float] | dict[str, Sequence[float]], **kwargs) -> pd.DataFrame:
         """
-        Conduct a specific test.
+        Perform a comparison as specified in a contrast vector.
+
+        The contrast vector is a numeric vector with one element for each column in the design matrix.
+        We recommend building the contrast vector using {func}`~LinearModelBase.contrast` or
+        {func}`~LinearModelBase.cond`.
 
-        Please use :method:`contrast` to build the contrasts instead of building it on your own.
+        Multiple comparisons can be specified as a dictionary of contrast vectors.
 
         Parameters
         ----------
         contrasts:
-            either a single contrast, or a dictionary of contrasts where the key is the name for that particular contrast.
-            Each contrast can be either a vector of coefficients (the most general case), a string, a DSL (work in progress)
-            or a tuple with three elements contrasts = ("condition", "control", "treatment")
+            Either a numeric contrast vector, or a dictionary of numeric contrast vectors. The dictionary keys
+            are added in a column named `contrast` in the result dataframe.
+        kwargs
+            are passed to the respective implementation
         """
         if not isinstance(contrasts, dict):
             contrasts = {None: contrasts}
@@ -348,9 +351,27 @@ def _get_var_from_colname(colname):
 
         return self.design.model_spec.get_model_matrix(df)
 
-    def contrast(self, column: str, baseline: str, group_to_compare: str) -> list:
-        """Build a simple contrast for pairwise comparisons.
+    def contrast(self, column: str, baseline: str, group_to_compare: str) -> pd.Series:
+        """
+        Build a simple contrast for pairwise comparisons.
+
+        This is an alias for
+
+        ```
+        model.cond(column=group_to_compare) - model.cond(column=baseline)
+        ```
+
+        Parameters
+        ----------
+        column
+            column in adata.obs to test on
+        baseline
+            baseline category (denominator)
+        group_to_compare
+            category to compare against baseline (nominator)
 
-        In the future all methods should be able to accept the output of :method:`StatsmodelsDE.contrast` but alas a big TODO.
+        Returns
+        -------
+        Numeric contrast vector
         """
-        return [column, baseline, group_to_compare]
+        return self.cond(**{column: group_to_compare}) - self.cond(**{column: baseline})
diff --git a/tests/test_base.py b/tests/test_base.py
new file mode 100644
index 0000000..e69de29

From 3a1ef9ae4656c79a05adfa6b4b3bb87d9136b015 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Tue, 2 Apr 2024 21:46:55 +0200
Subject: [PATCH 13/36] WIP reimplement model.cond

---
 docs/notebooks/example.ipynb                  | 132 ++++++++++++++++--
 .../_util/formulaic.py                        |   1 -
 .../methods/_base.py                          |  95 ++++++++-----
 tests/test_base.py                            |  19 +++
 4 files changed, 201 insertions(+), 46 deletions(-)

diff --git a/docs/notebooks/example.ipynb b/docs/notebooks/example.ipynb
index ce7dbad..d80220a 100644
--- a/docs/notebooks/example.ipynb
+++ b/docs/notebooks/example.ipynb
@@ -9,32 +9,146 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import anndata as ad\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import pytest\n",
+    "from pydeseq2.utils import load_example_data\n",
+    "from formulaic import model_matrix\n",
+    "\n",
+    "\n",
+    "@pytest.fixture\n",
+    "def test_counts():\n",
+    "    return load_example_data(\n",
+    "        modality=\"raw_counts\",\n",
+    "        dataset=\"synthetic\",\n",
+    "        debug=False,\n",
+    "    )\n",
+    "\n",
+    "\n",
+    "@pytest.fixture\n",
+    "def test_metadata():\n",
+    "    return load_example_data(\n",
+    "        modality=\"metadata\",\n",
+    "        dataset=\"synthetic\",\n",
+    "        debug=False,\n",
+    "    )\n",
+    "\n",
+    "\n",
+    "@pytest.fixture\n",
+    "def test_adata(test_counts, test_metadata):\n",
+    "    return ad.AnnData(X=test_counts, obs=test_metadata)\n",
+    "\n",
+    "\n",
+    "def test_adata_minimal():\n",
+    "    matrix_format = np.array\n",
+    "    n_obs = 80\n",
+    "    n_donors = n_obs // 4\n",
+    "    rng = np.random.default_rng(9)  # make tests deterministic\n",
+    "    obs = pd.DataFrame(\n",
+    "        {\n",
+    "            \"condition\": [\"A\", \"B\"] * (n_obs // 2),\n",
+    "            \"donor\": sum(([f\"D{i}\"] * n_donors for i in range(n_obs // n_donors)), []),\n",
+    "            \"other\": ([\"X\"] * (n_obs // 4)) + ([\"Y\"] * ((3 * n_obs) // 4)),\n",
+    "            \"pairing\": sum(([str(i), str(i)] for i in range(n_obs // 2)), []),\n",
+    "            \"continuous\": [rng.uniform(0, 1) * 4000 for _ in range(n_obs)],\n",
+    "        },\n",
+    "    )\n",
+    "    var = pd.DataFrame(index=[\"gene1\", \"gene2\"])\n",
+    "    group1 = rng.negative_binomial(20, 0.1, n_obs // 2)  # large mean\n",
+    "    group2 = rng.negative_binomial(5, 0.5, n_obs // 2)  # small mean\n",
+    "\n",
+    "    condition_data = np.empty((n_obs,), dtype=group1.dtype)\n",
+    "    condition_data[0::2] = group1\n",
+    "    condition_data[1::2] = group2\n",
+    "\n",
+    "    donor_data = np.empty((n_obs,), dtype=group1.dtype)\n",
+    "    donor_data[0:n_donors] = group2[:n_donors]\n",
+    "    donor_data[n_donors : (2 * n_donors)] = group1[n_donors:]\n",
+    "\n",
+    "    donor_data[(2 * n_donors) : (3 * n_donors)] = group2[:n_donors]\n",
+    "    donor_data[(3 * n_donors) :] = group1[n_donors:]\n",
+    "\n",
+    "    X = matrix_format(np.vstack([condition_data, donor_data]).T)\n",
+    "    return ad.AnnData(X=X, obs=obs, var=var)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/sturm/apps/micromamba/envs/multi-condition-comparisons/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
+      "/home/sturm/apps/micromamba/envs/multi-condition-comparisons/lib/python3.10/site-packages/anndata/_core/anndata.py:183: ImplicitModificationWarning: Transforming to str index.\n",
+      "  warnings.warn(\"Transforming to str index.\", ImplicitModificationWarning)\n"
      ]
     }
    ],
    "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2\n",
-    "from multi_condition_comparisions.tl.de import StatsmodelsDE\n",
-    "import scanpy as sc"
+    "adata = test_adata_minimal()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
-    "adata = sc.read_h5ad(\"./airway.h5ad\")"
+    "design = model_matrix(\"~ condition + C(condition) + donor + continuous\", adata.obs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1: set(),\n",
+       " condition: {'condition'},\n",
+       " C(condition): {'C', 'condition'},\n",
+       " donor: {'donor'},\n",
+       " continuous: {'continuous'}}"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "design.model_spec.term_variables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'condition': {C(condition), condition},\n",
+       " 'C': {C(condition)},\n",
+       " 'donor': {donor},\n",
+       " 'continuous': {continuous}}"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "design.model_spec.variable_terms"
    ]
   },
   {
diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index 585cfaa..25e7b20 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -7,7 +7,6 @@
 from formulaic import FactorValues, ModelSpec
 from formulaic.materializers import PandasMaterializer
 from formulaic.materializers.types import EvaluatedFactor
-from formulaic.parser.types import Factor
 from interface_meta import override
 
 
diff --git a/src/multi_condition_comparisions/methods/_base.py b/src/multi_condition_comparisions/methods/_base.py
index a78f6ca..a85b462 100644
--- a/src/multi_condition_comparisions/methods/_base.py
+++ b/src/multi_condition_comparisions/methods/_base.py
@@ -1,4 +1,3 @@
-import re
 from abc import ABC, abstractmethod
 from collections.abc import Mapping, Sequence
 from dataclasses import dataclass
@@ -7,10 +6,9 @@
 import numpy as np
 import pandas as pd
 from anndata import AnnData
-from formulaic import model_matrix
-from formulaic.model_matrix import ModelMatrix
 
 from multi_condition_comparisions._util import check_is_numeric_matrix
+from multi_condition_comparisions._util.formulaic import Factor, get_factor_storage_and_materializer
 
 
 @dataclass
@@ -119,6 +117,11 @@ def compare_groups(
 class LinearModelBase(MethodBase):
     """Base class for DE methods that have a linear model interface (i.e. support design matrices and contrast testing)"""
 
+    materializer = None
+    factor_storage = None
+    """Object to store metadata of formulaic factors which is used for building contrasts later. If a design matrix
+    is passed directly, this remains None."""
+
     def __init__(
         self,
         adata: AnnData,
@@ -148,7 +151,8 @@ def __init__(
         self._check_counts()
 
         if isinstance(design, str):
-            self.design = model_matrix(design, adata.obs)
+            self.factor_storage, self.materializer = get_factor_storage_and_materializer()
+            self.design = self.materializer(adata.obs).get_model_matrix(design)
         else:
             self.design = design
 
@@ -216,7 +220,7 @@ def compare_groups(
         return de_res
 
     @property
-    def variables(self):
+    def variables(self) -> set:
         """Get the names of the variables used in the model definition"""
         try:
             return self.design.model_spec.variables_by_source["data"]
@@ -303,53 +307,74 @@ def test_reduced(self, modelB: "MethodBase") -> pd.DataFrame:
 
     def cond(self, **kwargs) -> np.ndarray:
         """
-        The intention is to make contrasts using this function as in glmGamPoi
+        Get a contrast vector representing a specific condition.
+
+        The resulting contrast vector can be combined using arithmetic operations to generate a contrast
+        vector for a specific comparison, e.g. for a simple comparison treated vs. control
+
+        >>> contrast = model.cond(condition="treated") - model.cond(condition="ctrl")
+
+        When using an interaction term `cell * condition`, a comparison within a specific cell-type can be
+        easily built as follows:
+
+        >>> contrast = model.cond(cell="B cells", condition="ctrl") - cond(cell="B cells", condition="treated")
 
-        >>> res < -test_de(
-        ...     fit, contrast=cond(cell="B cells", condition="stim") - cond(cell="B cells", condition="ctrl")
-        ... )
+        This way of building contrast vectors is inspired by [glmGamPoi](https://bioconductor.org/packages/release/bioc/html/glmGamPoi.html).
 
         Parameters
         ----------
         **kwargs
+            column/value pairs
 
+        Returns
+        -------
+        A contrast vector that aligns to the columns of the design matrix.
         """
-
-        # TODO this is hacky - reach out to formulaic authors how to do this properly
-        def _get_var_from_colname(colname):
-            regex = re.compile(r"^.+\[T\.(.+)\]$")
-            return regex.search(colname).groups()[0]
-
-        if not isinstance(self.design, ModelMatrix):
+        if self.factor_storage is None:
             raise RuntimeError(
                 "Building contrasts with `cond` only works if you specified the model using a "
                 "formulaic formula. Please manually provide a contrast vector."
             )
+
         cond_dict = kwargs
-        for var in self.variables:
-            var_type = self.design.model_spec.encoder_state[var][0].value
-            if var_type == "categorical":
-                all_categories = set(self.design.model_spec.encoder_state[var][1]["categories"])
-            if var in kwargs:
-                if var_type == "categorical" and kwargs[var] not in all_categories:
+
+        if not set(cond_dict.keys()).issubset(self.variables):
+            raise ValueError(
+                "You specified a variable that is not part of the model. Available variables: "
+                + ",".join(self.variables)
+            )
+
+        # We now fill `cond_dict` such that it is equivalent to a data row from `adata.obs`.
+        # This data can then be passed to the `get_model_matrix` of formulaic to retreive a correpsonding
+        # contrast vector.
+        # To do so, we keep the values that were already specified, and fill all other values with the default category
+        # (the one that is usually dropped from the model for being redundant).
+        #
+        # `model_spec.variable_terms` is a mapping from variable to a set of terms. Unless a variable is used twice in the
+        # same formula (which for don't support for now), it contains exactly one element.
+        for var, term in self.design.model_spec.variable_terms.items():
+            if len(term) != 1:
+                raise RuntimeError(
+                    "Ambiguous variable! Building contrasts with model.cond only works "
+                    "when each variable occurs only once per formula"
+                )
+            term = term.pop()
+            term_metadata = self.factor_storage[term]
+            if var in cond_dict:
+                # In this case we keep the specified value in the dict, but we verify that it's a valid category
+                if term_metadata.kind == Factor.Kind.CATEGORICAL and cond_dict[var] not in term_metadata.categories:
                     raise ValueError(
-                        f"You specified a non-existant category for {var}. Possible categories: {', '.join(all_categories)}"
+                        f"You specified a non-existant category for {var}. Possible categories: {', '.join(term_metadata.categories)}"
                     )
             else:
                 # fill with default values
-                if var_type != "categorical":
-                    cond_dict[var] = 0
+                if term_metadata.kind == Factor.Kind.CATEGORICAL:
+                    cond_dict[var] = term_metadata.base
                 else:
-                    var_cols = self.design.columns[self.design.columns.str.startswith(f"{var}[")]
-
-                    present_categories = {_get_var_from_colname(x) for x in var_cols}
-                    dropped_category = all_categories - present_categories
-                    assert len(dropped_category) == 1
-                    cond_dict[var] = next(iter(dropped_category))
+                    cond_dict[var] = 0
 
         df = pd.DataFrame([kwargs])
-
-        return self.design.model_spec.get_model_matrix(df)
+        return self.design.model_spec.get_model_matrix(df).iloc[0]
 
     def contrast(self, column: str, baseline: str, group_to_compare: str) -> pd.Series:
         """
@@ -357,9 +382,7 @@ def contrast(self, column: str, baseline: str, group_to_compare: str) -> pd.Seri
 
         This is an alias for
 
-        ```
-        model.cond(column=group_to_compare) - model.cond(column=baseline)
-        ```
+        >>> model.cond(column=group_to_compare) - model.cond(column=baseline)
 
         Parameters
         ----------
diff --git a/tests/test_base.py b/tests/test_base.py
index e69de29..a7cbbb6 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -0,0 +1,19 @@
+from collections.abc import Sequence
+
+import pytest
+from pandas.core.api import DataFrame as DataFrame
+
+from multi_condition_comparisions.methods import LinearModelBase
+
+
+@pytest.fixture
+def MockLinearModel():
+    class _MockLinearModel(LinearModelBase):
+        def _check_counts(self) -> None:
+            pass
+
+        def fit(self, **kwargs) -> None:
+            pass
+
+        def _test_single_contrast(self, contrast: Sequence[float], **kwargs) -> DataFrame:
+            pass

From 90e82474420d2e2937d97ad78382e49ea445f280 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Tue, 2 Apr 2024 21:58:19 +0200
Subject: [PATCH 14/36] Stub testcase for model.cond

---
 .../_util/formulaic.py                            |  1 +
 tests/test_base.py                                | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index 25e7b20..f8b1811 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -8,6 +8,7 @@
 from formulaic.materializers import PandasMaterializer
 from formulaic.materializers.types import EvaluatedFactor
 from interface_meta import override
+from formulaic.parser.types import Factor
 
 
 @dataclass
diff --git a/tests/test_base.py b/tests/test_base.py
index a7cbbb6..3deed9e 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -17,3 +17,18 @@ def fit(self, **kwargs) -> None:
 
         def _test_single_contrast(self, contrast: Sequence[float], **kwargs) -> DataFrame:
             pass
+
+    return _MockLinearModel
+
+
+@pytest.mark.parametrize(
+    "formula,cond_kwargs,expected_contrast",
+    [
+        ["~ condition", {"condition": "A"}, [0, 0]],
+    ],
+)
+def test_model_cond(test_adata_minimal, MockLinearModel, formula, cond_kwargs, expected_contrast):
+    mod = MockLinearModel(test_adata_minimal, formula)
+    actual_contrast = mod.cond(**cond_kwargs)
+    assert actual_contrast.tolist() == expected_contrast
+    assert actual_contrast.index.tolist() == mod.design.columns.tolist()

From 068c7cc949b06e20a2d3ef58d4169140b62cee83 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 2 Apr 2024 19:58:30 +0000
Subject: [PATCH 15/36] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/multi_condition_comparisions/_util/formulaic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index f8b1811..585cfaa 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -7,8 +7,8 @@
 from formulaic import FactorValues, ModelSpec
 from formulaic.materializers import PandasMaterializer
 from formulaic.materializers.types import EvaluatedFactor
-from interface_meta import override
 from formulaic.parser.types import Factor
+from interface_meta import override
 
 
 @dataclass

From 46f142028f8c4c88d3a0ad613c5872a93785a16c Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Wed, 3 Apr 2024 11:45:08 +0200
Subject: [PATCH 16/36] Fix that contrasts couldn't be build from model spec

---
 .../_util/formulaic.py                        | 50 ++++++++++++-------
 .../methods/_base.py                          | 16 +++---
 tests/test_base.py                            |  2 +-
 3 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index 585cfaa..c10f56a 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -93,6 +93,16 @@ class CustomPandasMaterializer(PandasMaterializer):
         REGISTER_OUTPUTS = ("pandas", "numpy", "sparse")
 
         factor_metadata_storage = factor_storage
+        recording_active = True
+
+        def stop_recording(self):
+            """
+            Call this function after the model matrix has been completely initalized.
+
+            Then the materializer can be reused for creating contrast vectors without modifying the stored values.
+            """
+            # Use class variable here
+            CustomPandasMaterializer.recording_active = False
 
         @override
         def _encode_evaled_factor(
@@ -103,21 +113,22 @@ def _encode_evaled_factor(
 
             We can record some metadata, before we call the original function.
             """
-            if factor.expr in self.factor_metadata_storage and not (
-                factor.expr in self.encoded_cache or (factor.expr, reduced_rank) in self.encoded_cache
-            ):
-                # the same factor might be referred to multiple times in the same formula -- for instance, when using
-                # an interaction term such as group*condition. In that case formulaic is reuding a cached encoding.
-                # However, if it's not just reusing an existing encoding, something unexpected is happening that
-                # we haven't accounted for yet.
-                raise AssertionError("Factor already present in metadata storage and not reusing cached encoding")
-            self.factor_metadata_storage[factor.expr] = FactorMetadata(
-                name=factor.expr,
-                reduced_rank=reduced_rank,
-                categories=tuple(sorted(factor.values.drop(index=factor.values.index[drop_rows]).unique())),
-                custom_encoder=factor.metadata.encoder is not None,
-                kind=factor.metadata.kind,
-            )
+            if self.recording_active:
+                if factor.expr in self.factor_metadata_storage and not (
+                    factor.expr in self.encoded_cache or (factor.expr, reduced_rank) in self.encoded_cache
+                ):
+                    # the same factor might be referred to multiple times in the same formula -- for instance, when using
+                    # an interaction term such as group*condition. In that case formulaic is reuding a cached encoding.
+                    # However, if it's not just reusing an existing encoding, something unexpected is happening that
+                    # we haven't accounted for yet.
+                    raise AssertionError("Factor already present in metadata storage and not reusing cached encoding")
+                self.factor_metadata_storage[factor.expr] = FactorMetadata(
+                    name=factor.expr,
+                    reduced_rank=reduced_rank,
+                    categories=tuple(sorted(factor.values.drop(index=factor.values.index[drop_rows]).unique())),
+                    custom_encoder=factor.metadata.encoder is not None,
+                    kind=factor.metadata.kind,
+                )
             return super()._encode_evaled_factor(factor, spec, drop_rows, reduced_rank)
 
         @override
@@ -127,10 +138,11 @@ def _flatten_encoded_evaled_factor(self, name: str, values: FactorValues[dict])
 
             Here we have access to additional metadata, such as `drop_field`.
             """
-            factor_metadata = self.factor_metadata_storage[name]
-            factor_metadata.drop_field = values.__formulaic_metadata__.drop_field
-            factor_metadata.column_names = values.__formulaic_metadata__.column_names
-            factor_metadata.colname_format = values.__formulaic_metadata__.format
+            if self.recording_active:
+                factor_metadata = self.factor_metadata_storage[name]
+                factor_metadata.drop_field = values.__formulaic_metadata__.drop_field
+                factor_metadata.column_names = values.__formulaic_metadata__.column_names
+                factor_metadata.colname_format = values.__formulaic_metadata__.format
 
             return super()._flatten_encoded_evaled_factor(name, values)
 
diff --git a/src/multi_condition_comparisions/methods/_base.py b/src/multi_condition_comparisions/methods/_base.py
index a85b462..afc46b5 100644
--- a/src/multi_condition_comparisions/methods/_base.py
+++ b/src/multi_condition_comparisions/methods/_base.py
@@ -117,11 +117,6 @@ def compare_groups(
 class LinearModelBase(MethodBase):
     """Base class for DE methods that have a linear model interface (i.e. support design matrices and contrast testing)"""
 
-    materializer = None
-    factor_storage = None
-    """Object to store metadata of formulaic factors which is used for building contrasts later. If a design matrix
-    is passed directly, this remains None."""
-
     def __init__(
         self,
         adata: AnnData,
@@ -150,9 +145,16 @@ def __init__(
         super().__init__(adata, mask=mask, layer=layer)
         self._check_counts()
 
+        self.materializer = None
+        self.factor_storage = None
+        """Object to store metadata of formulaic factors which is used for building contrasts later. If a design matrix
+        is passed directly, this remains None."""
+
         if isinstance(design, str):
-            self.factor_storage, self.materializer = get_factor_storage_and_materializer()
-            self.design = self.materializer(adata.obs).get_model_matrix(design)
+            self.factor_storage, materializer_class = get_factor_storage_and_materializer()
+            self.materializer = materializer_class(adata.obs)
+            self.design = self.materializer.get_model_matrix(design)
+            self.materializer.stop_recording()
         else:
             self.design = design
 
diff --git a/tests/test_base.py b/tests/test_base.py
index 3deed9e..09fe93d 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -24,7 +24,7 @@ def _test_single_contrast(self, contrast: Sequence[float], **kwargs) -> DataFram
 @pytest.mark.parametrize(
     "formula,cond_kwargs,expected_contrast",
     [
-        ["~ condition", {"condition": "A"}, [0, 0]],
+        ["~ condition", {"condition": "A"}, [1, 0]],
     ],
 )
 def test_model_cond(test_adata_minimal, MockLinearModel, formula, cond_kwargs, expected_contrast):

From c7d32906a71560c2a8cf822482ee68c76e5a01de Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Wed, 3 Apr 2024 11:54:27 +0200
Subject: [PATCH 17/36] Fix edgeR type hints

---
 src/multi_condition_comparisions/methods/_edger.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/multi_condition_comparisions/methods/_edger.py b/src/multi_condition_comparisions/methods/_edger.py
index dd328bb..897102c 100644
--- a/src/multi_condition_comparisions/methods/_edger.py
+++ b/src/multi_condition_comparisions/methods/_edger.py
@@ -1,3 +1,5 @@
+from collections.abc import Sequence
+
 import numpy as np
 import pandas as pd
 from scanpy import logging
@@ -81,7 +83,7 @@ def fit(self, **kwargs):  # adata, design, mask, layer
         ro.globalenv["fit"] = fit
         self.fit = fit
 
-    def _test_single_contrast(self, contrast: list[str], **kwargs) -> pd.DataFrame:
+    def _test_single_contrast(self, contrast: Sequence[float], **kwargs) -> pd.DataFrame:
         """
         Conduct test for each contrast and return a data frame
 
@@ -119,11 +121,7 @@ def _test_single_contrast(self, contrast: list[str], **kwargs) -> pd.DataFrame:
             ) from None
 
         # Convert vector to R, which drops a category like `self.design_matrix` to use the intercept for the left out.
-        contrast_vec = [0] * len(self.design.columns)
-        make_contrast_column_key = lambda ind: f"{contrast[0]}[T.{contrast[ind]}]"
-        for index in [1, 2]:
-            if make_contrast_column_key(index) in self.design.columns:
-                contrast_vec[self.design.columns.to_list().index(f"{contrast[0]}[T.{contrast[index]}]")] = 1
+        contrast_vec = contrast
         contrast_vec_r = ro.conversion.py2rpy(np.asarray(contrast_vec))
         ro.globalenv["contrast_vec"] = contrast_vec_r
 

From 36ed50919d08a3b9b468238355a4383d9ca51d89 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Wed, 3 Apr 2024 12:48:54 +0200
Subject: [PATCH 18/36] Fix pydeseq2 function signatures

---
 .../methods/_pydeseq2.py                      | 31 ++++++++++++++-----
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/src/multi_condition_comparisions/methods/_pydeseq2.py b/src/multi_condition_comparisions/methods/_pydeseq2.py
index f46d775..5cdc666 100644
--- a/src/multi_condition_comparisions/methods/_pydeseq2.py
+++ b/src/multi_condition_comparisions/methods/_pydeseq2.py
@@ -63,18 +63,27 @@ def fit(self, **kwargs) -> pd.DataFrame:
         dds.deseq2()
         self.dds = dds
 
-    def _test_single_contrast(self, contrast: list[str], alpha=0.05, **kwargs) -> pd.DataFrame:
+    # TODO: PyDeseq2 doesn't support arbitrary designs and contrasts yet
+    # see https://github.com/owkin/PyDESeq2/issues/213
+    #
+    # Therefore these functions are overridden in a way to make it work with PyDESeq2,
+    # ingoring the inconsistency of function signatures. Once arbitrary design
+    # matrices and contrasts are supported by PyDEseq2, we can fully support the
+    # Linear model interface.
+    def _test_single_contrast(self, contrast: list[str], alpha=0.05, **kwargs) -> pd.DataFrame:  # type: ignore
         """
         Conduct a specific test and returns a Pandas DataFrame.
 
+        Note that PyDESeq2 doesn't support arbitrary contrasts yet.
+
         Parameters
         ----------
-        contrasts:
-            list of three strings of the form
-            ["variable", "tested level", "reference level"]
-        alpha: p value threshold used for controlling fdr with
-        independent hypothesis weighting
-        kwargs: extra arguments to pass to DeseqStats()
+        contrasts
+            list of three strings of the form `["variable", "tested level", "reference level"]`.
+        alpha
+            p value threshold used for controlling fdr with independent hypothesis weighting
+        kwargs
+            extra arguments to pass to DeseqStats()
         """
         stat_res = DeseqStats(self.dds, contrast=contrast, alpha=alpha, **kwargs)
         # Calling `.summary()` is required to fill the `results_df` data frame
@@ -87,3 +96,11 @@ def _test_single_contrast(self, contrast: list[str], alpha=0.05, **kwargs) -> pd
         res_df.index.name = "variable"
         res_df = res_df.reset_index()
         return res_df
+
+    def cond(self, **kwargs) -> ndarray:
+        raise NotImplementedError(
+            "PyDESeq2 currently doesn't support arbitratry contrasts, see https://github.com/owkin/PyDESeq2/issues/213"
+        )
+
+    def contrast(self, column: str, baseline: str, group_to_compare: str) -> tuple[str, str, str]:  # type: ignore
+        return (column, group_to_compare, baseline)

From 9d8add0da700ad800bac573a64a9c1648b11238a Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Wed, 3 Apr 2024 13:14:09 +0200
Subject: [PATCH 19/36] Fix edgeR tests

---
 src/multi_condition_comparisions/methods/_base.py  | 2 +-
 src/multi_condition_comparisions/methods/_edger.py | 7 +++----
 tests/test_edger.py                                | 4 ++--
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/multi_condition_comparisions/methods/_base.py b/src/multi_condition_comparisions/methods/_base.py
index afc46b5..7413a99 100644
--- a/src/multi_condition_comparisions/methods/_base.py
+++ b/src/multi_condition_comparisions/methods/_base.py
@@ -360,7 +360,7 @@ def cond(self, **kwargs) -> np.ndarray:
                     "Ambiguous variable! Building contrasts with model.cond only works "
                     "when each variable occurs only once per formula"
                 )
-            term = term.pop()
+            term = next(iter(term))
             term_metadata = self.factor_storage[term]
             if var in cond_dict:
                 # In this case we keep the specified value in the dict, but we verify that it's a valid category
diff --git a/src/multi_condition_comparisions/methods/_edger.py b/src/multi_condition_comparisions/methods/_edger.py
index 897102c..a7691c5 100644
--- a/src/multi_condition_comparisions/methods/_edger.py
+++ b/src/multi_condition_comparisions/methods/_edger.py
@@ -50,11 +50,11 @@ def fit(self, **kwargs):  # adata, design, mask, layer
 
         try:
             edger = importr("edgeR")
-        except ImportError:
+        except ImportError as e:
             raise ImportError(
                 "edgeR requires a valid R installation with the following packages:\n"
                 "edgeR, BiocParallel, RhpcBLASctl"
-            ) from None
+            ) from e
 
         # Convert dataframe
         with localconverter(ro.default_converter + numpy2ri.converter):
@@ -121,8 +121,7 @@ def _test_single_contrast(self, contrast: Sequence[float], **kwargs) -> pd.DataF
             ) from None
 
         # Convert vector to R, which drops a category like `self.design_matrix` to use the intercept for the left out.
-        contrast_vec = contrast
-        contrast_vec_r = ro.conversion.py2rpy(np.asarray(contrast_vec))
+        contrast_vec_r = ro.conversion.py2rpy(np.asarray(contrast))
         ro.globalenv["contrast_vec"] = contrast_vec_r
 
         # Test contrast with R
diff --git a/tests/test_edger.py b/tests/test_edger.py
index 521403b..b55fb36 100644
--- a/tests/test_edger.py
+++ b/tests/test_edger.py
@@ -12,7 +12,7 @@ def test_edger_simple(test_adata):
     """
     method = EdgeR(adata=test_adata, design="~condition")
     method.fit()
-    res_df = method.test_contrasts(["condition", "A", "B"])
+    res_df = method.test_contrasts(method.contrast("condition", "A", "B"))
 
     assert len(res_df) == test_adata.n_vars
 
@@ -24,7 +24,7 @@ def test_edger_complex(test_adata):
     test_adata.obs["condition1"] = test_adata.obs["condition"].copy()
     method = EdgeR(adata=test_adata, design="~condition1+group")
     method.fit()
-    res_df = method.test_contrasts(["condition1", "A", "B"])
+    res_df = method.test_contrasts(method.contrast("condition1", "A", "B"))
 
     assert len(res_df) == test_adata.n_vars
     # Check that the index of the result matches the var_names of the AnnData object

From 28faf64547e4c9566380d7d0912fb8bd10c2aec6 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Wed, 3 Apr 2024 14:15:38 +0200
Subject: [PATCH 20/36] Fix model.cond term iteration

---
 src/multi_condition_comparisions/methods/_base.py | 6 ++++--
 tests/test_base.py                                | 8 ++++++++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/multi_condition_comparisions/methods/_base.py b/src/multi_condition_comparisions/methods/_base.py
index 7413a99..7ef0f8b 100644
--- a/src/multi_condition_comparisions/methods/_base.py
+++ b/src/multi_condition_comparisions/methods/_base.py
@@ -353,8 +353,10 @@ def cond(self, **kwargs) -> np.ndarray:
         # (the one that is usually dropped from the model for being redundant).
         #
         # `model_spec.variable_terms` is a mapping from variable to a set of terms. Unless a variable is used twice in the
-        # same formula (which for don't support for now), it contains exactly one element.
-        for var, term in self.design.model_spec.variable_terms.items():
+        # same formula (which for don't support for now), it contains exactly one element. It also contains
+        # "non-data" variables such as `C` - therefore we use `self.variables` to loop through.
+        for var in self.variables:
+            term = self.design.model_spec.variable_terms[var]
             if len(term) != 1:
                 raise RuntimeError(
                     "Ambiguous variable! Building contrasts with model.cond only works "
diff --git a/tests/test_base.py b/tests/test_base.py
index 09fe93d..05a2438 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -25,6 +25,14 @@ def _test_single_contrast(self, contrast: Sequence[float], **kwargs) -> DataFram
     "formula,cond_kwargs,expected_contrast",
     [
         ["~ condition", {"condition": "A"}, [1, 0]],
+        ["~ condition", {"condition": "B"}, [1, 1]],
+        ["~ 0 + condition", {"condition": "A"}, [1, 0]],
+        ["~ 0 + condition", {"condition": "B"}, [0, 1]],
+        ["~ donor", {"donor": "D0"}, [1, 0, 0, 0]],
+        ["~ C(donor)", {"donor": "D0"}, [1, 0, 0, 0]],
+        ["~ C(donor, contr.treatment(base='D2'))", {"donor": "D2"}, [1, 0, 0, 0]],
+        ["~ C(donor, contr.treatment(base='D2'))", {"donor": "D0"}, [1, 1, 0, 0]],
+        # TODO: also include tests for errors that should be caught by .cond
     ],
 )
 def test_model_cond(test_adata_minimal, MockLinearModel, formula, cond_kwargs, expected_contrast):

From 822c6232510c1eec204ed0461f453bf834e39d7a Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Wed, 3 Apr 2024 14:24:59 +0200
Subject: [PATCH 21/36] Get rid of class variable stuff

---
 .../_util/formulaic.py                        | 36 +++++++++++++++----
 .../methods/_base.py                          |  5 +--
 tests/test_formulaic.py                       |  2 +-
 3 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index c10f56a..c1f7281 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -1,6 +1,6 @@
 """Helpers to interact with Formulaic Formulas"""
 
-from collections.abc import Sequence
+from collections.abc import Mapping, Sequence
 from dataclasses import dataclass
 from typing import Any
 
@@ -92,8 +92,32 @@ class CustomPandasMaterializer(PandasMaterializer):
         REGISTER_INPUTS = ("pandas.core.frame.DataFrame",)
         REGISTER_OUTPUTS = ("pandas", "numpy", "sparse")
 
-        factor_metadata_storage = factor_storage
-        recording_active = True
+        def __init__(
+            self,
+            data: Any,
+            context: Mapping[str, Any] | None = None,
+            record_factor_metadata: bool = False,
+            **params: Any,
+        ):
+            """
+            Initialize the Materializer
+
+            Parameters
+            ----------
+            data
+                passed to PandasMaterializer
+            context
+                passed to PandasMaterializer
+            record_factor_metadata
+                Flag that tells whether this particular instance of the custom materializer class
+                is supposed to record factor metadata. Only the instance that is used for building the design
+                matrix should record the metadata. All other instances (e.g. used to generate contrast vectors)
+                should not record metadata to not overwrite the specifications from the design matrix.
+            **params
+                passed to PandasMaterializer
+            """
+            self.factor_metadata_storage = factor_storage if record_factor_metadata else None
+            super().__init__(data, context, **params)
 
         def stop_recording(self):
             """
@@ -102,7 +126,7 @@ def stop_recording(self):
             Then the materializer can be reused for creating contrast vectors without modifying the stored values.
             """
             # Use class variable here
-            CustomPandasMaterializer.recording_active = False
+            CustomPandasMaterializer.record_factor_metadata = False
 
         @override
         def _encode_evaled_factor(
@@ -113,7 +137,7 @@ def _encode_evaled_factor(
 
             We can record some metadata, before we call the original function.
             """
-            if self.recording_active:
+            if self.factor_metadata_storage is not None:
                 if factor.expr in self.factor_metadata_storage and not (
                     factor.expr in self.encoded_cache or (factor.expr, reduced_rank) in self.encoded_cache
                 ):
@@ -138,7 +162,7 @@ def _flatten_encoded_evaled_factor(self, name: str, values: FactorValues[dict])
 
             Here we have access to additional metadata, such as `drop_field`.
             """
-            if self.recording_active:
+            if self.factor_metadata_storage is not None:
                 factor_metadata = self.factor_metadata_storage[name]
                 factor_metadata.drop_field = values.__formulaic_metadata__.drop_field
                 factor_metadata.column_names = values.__formulaic_metadata__.column_names
diff --git a/src/multi_condition_comparisions/methods/_base.py b/src/multi_condition_comparisions/methods/_base.py
index 7ef0f8b..b55a7e7 100644
--- a/src/multi_condition_comparisions/methods/_base.py
+++ b/src/multi_condition_comparisions/methods/_base.py
@@ -145,16 +145,13 @@ def __init__(
         super().__init__(adata, mask=mask, layer=layer)
         self._check_counts()
 
-        self.materializer = None
         self.factor_storage = None
         """Object to store metadata of formulaic factors which is used for building contrasts later. If a design matrix
         is passed directly, this remains None."""
 
         if isinstance(design, str):
             self.factor_storage, materializer_class = get_factor_storage_and_materializer()
-            self.materializer = materializer_class(adata.obs)
-            self.design = self.materializer.get_model_matrix(design)
-            self.materializer.stop_recording()
+            self.design = materializer_class(adata.obs, record_factor_metadata=True).get_model_matrix(design)
         else:
             self.design = design
 
diff --git a/tests/test_formulaic.py b/tests/test_formulaic.py
index e099199..92b261b 100644
--- a/tests/test_formulaic.py
+++ b/tests/test_formulaic.py
@@ -162,7 +162,7 @@ def test_custom_materializer(test_adata_minimal, formula, reorder_categorical, e
         for col, order in reorder_categorical.items():
             test_adata_minimal.obs[col] = pd.Categorical(test_adata_minimal.obs[col], categories=order)
     factor_storage, materializer = get_factor_storage_and_materializer()
-    materializer(test_adata_minimal.obs).get_model_matrix(formula)
+    materializer(test_adata_minimal.obs, record_factor_metadata=True).get_model_matrix(formula)
     for factor, expected_metadata in expected_factor_metadata.items():
         actual_metadata = factor_storage[factor]
         for k in expected_metadata:

From ea027a2f0d28f42f2a292d97e4fa887ebd49d331 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Tue, 9 Apr 2024 16:32:41 +0200
Subject: [PATCH 22/36] Account for multiple factors being generated

---
 .../_util/formulaic.py                        | 59 +++++++++----------
 .../methods/_base.py                          | 18 ++++--
 tests/conftest.py                             |  5 ++
 tests/test_base.py                            | 32 +++++++++-
 tests/test_formulaic.py                       | 10 ++++
 5 files changed, 84 insertions(+), 40 deletions(-)

diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index c1f7281..b9f4a8a 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -1,5 +1,6 @@
 """Helpers to interact with Formulaic Formulas"""
 
+from collections import defaultdict
 from collections.abc import Mapping, Sequence
 from dataclasses import dataclass
 from typing import Any
@@ -70,7 +71,7 @@ def base(self) -> str | None:
                 return self.drop_field
 
 
-def get_factor_storage_and_materializer() -> tuple[dict[str, FactorMetadata], type]:
+def get_factor_storage_and_materializer() -> tuple[dict[str, list[FactorMetadata]], type]:
     """
     Keep track of categorical factors used in a model spec.
 
@@ -83,7 +84,9 @@ def get_factor_storage_and_materializer() -> tuple[dict[str, FactorMetadata], ty
     CustomPandasMaterializer
         A materializer class that is tied to the particular instance of `factor_storage`.
     """
-    factor_storage: dict[str, FactorMetadata] = {}
+    # There can be multiple FactorMetadata entries per sample, for instance when formulaic generates an interaction
+    # term, it generates the factor with both full rank and reduced rank.
+    factor_storage: dict[str, list[FactorMetadata]] = defaultdict(list)
 
     class CustomPandasMaterializer(PandasMaterializer):
         """An extension of the PandasMaterializer that records all cateogrical variables and their (base) categories."""
@@ -117,17 +120,10 @@ def __init__(
                 passed to PandasMaterializer
             """
             self.factor_metadata_storage = factor_storage if record_factor_metadata else None
+            # temporary pointer to metadata of factor that is currently evaluated
+            self._current_factor: FactorMetadata = None
             super().__init__(data, context, **params)
 
-        def stop_recording(self):
-            """
-            Call this function after the model matrix has been completely initalized.
-
-            Then the materializer can be reused for creating contrast vectors without modifying the stored values.
-            """
-            # Use class variable here
-            CustomPandasMaterializer.record_factor_metadata = False
-
         @override
         def _encode_evaled_factor(
             self, factor: EvaluatedFactor, spec: ModelSpec, drop_rows: Sequence[int], reduced_rank: bool = False
@@ -137,22 +133,21 @@ def _encode_evaled_factor(
 
             We can record some metadata, before we call the original function.
             """
+            assert (
+                self._current_factor is None
+            ), "_current_factor should always be None when we start recording metadata"
             if self.factor_metadata_storage is not None:
-                if factor.expr in self.factor_metadata_storage and not (
-                    factor.expr in self.encoded_cache or (factor.expr, reduced_rank) in self.encoded_cache
-                ):
-                    # the same factor might be referred to multiple times in the same formula -- for instance, when using
-                    # an interaction term such as group*condition. In that case formulaic is reuding a cached encoding.
-                    # However, if it's not just reusing an existing encoding, something unexpected is happening that
-                    # we haven't accounted for yet.
-                    raise AssertionError("Factor already present in metadata storage and not reusing cached encoding")
-                self.factor_metadata_storage[factor.expr] = FactorMetadata(
-                    name=factor.expr,
-                    reduced_rank=reduced_rank,
-                    categories=tuple(sorted(factor.values.drop(index=factor.values.index[drop_rows]).unique())),
-                    custom_encoder=factor.metadata.encoder is not None,
-                    kind=factor.metadata.kind,
-                )
+                # Don't store if the factor is cached (then we should already have recorded it)
+                if factor.expr in self.encoded_cache or (factor.expr, reduced_rank) in self.encoded_cache:
+                    assert factor.expr in self.factor_metadata_storage, "Factor should be there since it's cached"
+                else:
+                    self._current_factor = FactorMetadata(
+                        name=factor.expr,
+                        reduced_rank=reduced_rank,
+                        categories=tuple(sorted(factor.values.drop(index=factor.values.index[drop_rows]).unique())),
+                        custom_encoder=factor.metadata.encoder is not None,
+                        kind=factor.metadata.kind,
+                    )
             return super()._encode_evaled_factor(factor, spec, drop_rows, reduced_rank)
 
         @override
@@ -162,11 +157,13 @@ def _flatten_encoded_evaled_factor(self, name: str, values: FactorValues[dict])
 
             Here we have access to additional metadata, such as `drop_field`.
             """
-            if self.factor_metadata_storage is not None:
-                factor_metadata = self.factor_metadata_storage[name]
-                factor_metadata.drop_field = values.__formulaic_metadata__.drop_field
-                factor_metadata.column_names = values.__formulaic_metadata__.column_names
-                factor_metadata.colname_format = values.__formulaic_metadata__.format
+            if self._current_factor is not None:
+                assert self._current_factor.name == name
+                self._current_factor.drop_field = values.__formulaic_metadata__.drop_field
+                self._current_factor.column_names = values.__formulaic_metadata__.column_names
+                self._current_factor.colname_format = values.__formulaic_metadata__.format
+                self.factor_metadata_storage[name].append(self._current_factor)
+                self._current_factor = None
 
             return super()._flatten_encoded_evaled_factor(name, values)
 
diff --git a/src/multi_condition_comparisions/methods/_base.py b/src/multi_condition_comparisions/methods/_base.py
index b55a7e7..f640cc1 100644
--- a/src/multi_condition_comparisions/methods/_base.py
+++ b/src/multi_condition_comparisions/methods/_base.py
@@ -353,14 +353,18 @@ def cond(self, **kwargs) -> np.ndarray:
         # same formula (which for don't support for now), it contains exactly one element. It also contains
         # "non-data" variables such as `C` - therefore we use `self.variables` to loop through.
         for var in self.variables:
-            term = self.design.model_spec.variable_terms[var]
-            if len(term) != 1:
-                raise RuntimeError(
+            # A variable can refer to one or multiple terms. Either if a variable is specified
+            # multiple times in the model (e.g. ~ var + C(var); ~ continuous + np.log(continuous))
+            # or when there's an interaction term (e.g. ~ A * B ==> terms `A`, `B`, `A:B`)
+            terms = self.design.model_spec.variable_terms[var]
+            # Only terms tied to a single variable are in the factor storage. Interaction terms (A:B) are not.
+            terms_metadata = [self.factor_storage[term] for term in terms if term in self.factor_storage]
+            if len(terms_metadata) != 1:
+                raise ValueError(
                     "Ambiguous variable! Building contrasts with model.cond only works "
                     "when each variable occurs only once per formula"
                 )
-            term = next(iter(term))
-            term_metadata = self.factor_storage[term]
+            term_metadata = terms_metadata[0]
             if var in cond_dict:
                 # In this case we keep the specified value in the dict, but we verify that it's a valid category
                 if term_metadata.kind == Factor.Kind.CATEGORICAL and cond_dict[var] not in term_metadata.categories:
@@ -370,7 +374,9 @@ def cond(self, **kwargs) -> np.ndarray:
             else:
                 # fill with default values
                 if term_metadata.kind == Factor.Kind.CATEGORICAL:
-                    cond_dict[var] = term_metadata.base
+                    cond_dict[var] = (
+                        term_metadata.base if term_metadata.base is not None else "\0"
+                    )  # can be any other string that is not a category, but not None
                 else:
                     cond_dict[var] = 0
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 936ccf3..5eb3cd0 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,3 +1,6 @@
+# TODO: temp workaround for Gregor
+import os
+
 import anndata as ad
 import numpy as np
 import pandas as pd
@@ -5,6 +8,8 @@
 import scipy.sparse as sp
 from pydeseq2.utils import load_example_data
 
+os.environ["R_HOME"] = "/home/sturm/apps/micromamba/envs/multi-condition-comparisons/lib/R"
+
 
 @pytest.fixture
 def test_counts():
diff --git a/tests/test_base.py b/tests/test_base.py
index 05a2438..010585f 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -26,17 +26,43 @@ def _test_single_contrast(self, contrast: Sequence[float], **kwargs) -> DataFram
     [
         ["~ condition", {"condition": "A"}, [1, 0]],
         ["~ condition", {"condition": "B"}, [1, 1]],
+        ["~ condition", {"condition": "42"}, ValueError],  # non-existant category
         ["~ 0 + condition", {"condition": "A"}, [1, 0]],
         ["~ 0 + condition", {"condition": "B"}, [0, 1]],
         ["~ donor", {"donor": "D0"}, [1, 0, 0, 0]],
         ["~ C(donor)", {"donor": "D0"}, [1, 0, 0, 0]],
         ["~ C(donor, contr.treatment(base='D2'))", {"donor": "D2"}, [1, 0, 0, 0]],
         ["~ C(donor, contr.treatment(base='D2'))", {"donor": "D0"}, [1, 1, 0, 0]],
+        ["~ donor + continuous", {"donor": "D1"}, [1, 1, 0, 0, 0]],
+        ["~ donor + np.log1p(continuous)", {"donor": "D1"}, [1, 1, 0, 0, 0]],
+        [
+            "~ donor + continuous + np.log(continuous)",
+            {"donor": "D0"},
+            ValueError,
+        ],  # current limitation: variables may only be used once per formula
+        [
+            "~ donor + C(donor)",
+            {"donor": "D0"},
+            ValueError,
+        ],  # current limitation: variables may only be used once per formula
+        ["~ C(donor, contr.sum)", {"donor": "D0"}, [1, 1, 0, 0]],
+        ["~ C(donor, contr.sum)", {"donor": "D3"}, [1, -1, -1, -1]],
+        ["~ condition + donor", {"condition": "A"}, [1, 0, 0, 0, 0]],
+        ["~ 0 + condition + donor", {"donor": "D1"}, [0, 0, 1, 0, 0]],
+        ["~ condition + donor", {"donor": "D2"}, [1, 0, 0, 1, 0]],
+        ["~ condition + donor", {"condition": "B", "donor": "D2"}, [1, 1, 0, 1, 0]],
+        ["~ condition * donor", {"condition": "A"}, [1, 0, 0, 0, 0]],
+        ["~ condition + donor + condition:donor", {"condition": "A"}, [1, 0, 0, 0, 0]],
+        ["~ condition:donor", {"condition": "A"}, [1, 0, 0, 0, 0]],
         # TODO: also include tests for errors that should be caught by .cond
     ],
 )
 def test_model_cond(test_adata_minimal, MockLinearModel, formula, cond_kwargs, expected_contrast):
     mod = MockLinearModel(test_adata_minimal, formula)
-    actual_contrast = mod.cond(**cond_kwargs)
-    assert actual_contrast.tolist() == expected_contrast
-    assert actual_contrast.index.tolist() == mod.design.columns.tolist()
+    if isinstance(expected_contrast, type):
+        with pytest.raises(expected_contrast):
+            mod.cond(**cond_kwargs)
+    else:
+        actual_contrast = mod.cond(**cond_kwargs)
+        assert actual_contrast.tolist() == expected_contrast
+        assert actual_contrast.index.tolist() == mod.design.columns.tolist()
diff --git a/tests/test_formulaic.py b/tests/test_formulaic.py
index 92b261b..33a3f19 100644
--- a/tests/test_formulaic.py
+++ b/tests/test_formulaic.py
@@ -142,6 +142,14 @@
                 },
             },
         ],
+        [
+            "~ condition:donor",
+            None,
+            {
+                "condition": {"reduced_rank": True, "custom_encoder": False, "base": "A"},
+                "donor": {"reduced_rank": True, "custom_encoder": False, "base": "D0"},
+            },
+        ],
     ],
 )
 def test_custom_materializer(test_adata_minimal, formula, reorder_categorical, expected_factor_metadata):
@@ -165,5 +173,7 @@ def test_custom_materializer(test_adata_minimal, formula, reorder_categorical, e
     materializer(test_adata_minimal.obs, record_factor_metadata=True).get_model_matrix(formula)
     for factor, expected_metadata in expected_factor_metadata.items():
         actual_metadata = factor_storage[factor]
+        assert len(actual_metadata) == 1
+        actual_metadata = actual_metadata[0]
         for k in expected_metadata:
             assert getattr(actual_metadata, k) == expected_metadata[k]

From cab97501eb4137ba2da40467f64998659af4afef Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Tue, 9 Apr 2024 17:03:13 +0200
Subject: [PATCH 23/36] only fail cond if variable is ambiguous

---
 .../_util/formulaic.py                        | 16 ++++++++++
 .../methods/_base.py                          | 32 +++++++++++--------
 2 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index b9f4a8a..54e8ab3 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -168,3 +168,19 @@ def _flatten_encoded_evaled_factor(self, name: str, values: FactorValues[dict])
             return super()._flatten_encoded_evaled_factor(name, values)
 
     return factor_storage, CustomPandasMaterializer
+
+
+def resolve_ambiguous(objs: Sequence[Any], attr: str) -> Any:
+    """Given a list of objects, return an attribute if it is the same between all object. Otherwise raise an error."""
+    if not objs:
+        raise ValueError("Collection is empty")
+
+    first_obj_attr = getattr(objs[0], attr)
+
+    # Check if the attribute is the same for all objects
+    for obj in objs[1:]:
+        if getattr(obj, attr) != first_obj_attr:
+            raise ValueError(f"Ambiguous attribute '{attr}': values differ between objects")
+
+    # If attribute is the same for all objects, return it
+    return first_obj_attr
diff --git a/src/multi_condition_comparisions/methods/_base.py b/src/multi_condition_comparisions/methods/_base.py
index f640cc1..a00c2e9 100644
--- a/src/multi_condition_comparisions/methods/_base.py
+++ b/src/multi_condition_comparisions/methods/_base.py
@@ -1,6 +1,7 @@
 from abc import ABC, abstractmethod
 from collections.abc import Mapping, Sequence
 from dataclasses import dataclass
+from itertools import chain
 from types import MappingProxyType
 
 import numpy as np
@@ -8,7 +9,7 @@
 from anndata import AnnData
 
 from multi_condition_comparisions._util import check_is_numeric_matrix
-from multi_condition_comparisions._util.formulaic import Factor, get_factor_storage_and_materializer
+from multi_condition_comparisions._util.formulaic import Factor, get_factor_storage_and_materializer, resolve_ambiguous
 
 
 @dataclass
@@ -350,32 +351,35 @@ def cond(self, **kwargs) -> np.ndarray:
         # (the one that is usually dropped from the model for being redundant).
         #
         # `model_spec.variable_terms` is a mapping from variable to a set of terms. Unless a variable is used twice in the
-        # same formula (which for don't support for now), it contains exactly one element. It also contains
+        # same formula, it contains exactly one element. It also contains
         # "non-data" variables such as `C` - therefore we use `self.variables` to loop through.
         for var in self.variables:
             # A variable can refer to one or multiple terms. Either if a variable is specified
             # multiple times in the model (e.g. ~ var + C(var); ~ continuous + np.log(continuous))
             # or when there's an interaction term (e.g. ~ A * B ==> terms `A`, `B`, `A:B`)
             terms = self.design.model_spec.variable_terms[var]
-            # Only terms tied to a single variable are in the factor storage. Interaction terms (A:B) are not.
-            terms_metadata = [self.factor_storage[term] for term in terms if term in self.factor_storage]
-            if len(terms_metadata) != 1:
-                raise ValueError(
-                    "Ambiguous variable! Building contrasts with model.cond only works "
-                    "when each variable occurs only once per formula"
-                )
-            term_metadata = terms_metadata[0]
+
+            # Get list of all Metadata objects for the associated terms.
+            # Some terms don't have metadata (e.g. instead of an interaction term `A:B`, metadata exists only for the
+            # individual variablesl `A` and `B`). Some terms have multiple metadata objects, because they are specified
+            # multiple times in the formula, or forumlaic resolves them multiple times internally.
+            terms_metadata = list(chain.from_iterable(self.factor_storage[term] for term in terms))
             if var in cond_dict:
                 # In this case we keep the specified value in the dict, but we verify that it's a valid category
-                if term_metadata.kind == Factor.Kind.CATEGORICAL and cond_dict[var] not in term_metadata.categories:
+                tmp_categories = resolve_ambiguous(terms_metadata, "categories")
+                if (
+                    resolve_ambiguous(terms_metadata, "kind") == Factor.Kind.CATEGORICAL
+                    and cond_dict[var] not in tmp_categories
+                ):
                     raise ValueError(
-                        f"You specified a non-existant category for {var}. Possible categories: {', '.join(term_metadata.categories)}"
+                        f"You specified a non-existant category for {var}. Possible categories: {', '.join(tmp_categories)}"
                     )
             else:
                 # fill with default values
-                if term_metadata.kind == Factor.Kind.CATEGORICAL:
+                if resolve_ambiguous(terms_metadata, "kind") == Factor.Kind.CATEGORICAL:
+                    tmp_base = resolve_ambiguous(terms_metadata, "base")
                     cond_dict[var] = (
-                        term_metadata.base if term_metadata.base is not None else "\0"
+                        tmp_base if tmp_base is not None else "\0"
                     )  # can be any other string that is not a category, but not None
                 else:
                     cond_dict[var] = 0

From 4039e2387b522be85d390814f056b21e391d70b8 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Tue, 9 Apr 2024 17:17:36 +0200
Subject: [PATCH 24/36] Update comments

---
 .../_util/formulaic.py                        |  6 +++-
 .../methods/_base.py                          | 33 +++++++++++++++----
 2 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index 54e8ab3..96eca7e 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -170,6 +170,10 @@ def _flatten_encoded_evaled_factor(self, name: str, values: FactorValues[dict])
     return factor_storage, CustomPandasMaterializer
 
 
+class AmbiguousAttributeError(ValueError):
+    pass
+
+
 def resolve_ambiguous(objs: Sequence[Any], attr: str) -> Any:
     """Given a list of objects, return an attribute if it is the same between all object. Otherwise raise an error."""
     if not objs:
@@ -180,7 +184,7 @@ def resolve_ambiguous(objs: Sequence[Any], attr: str) -> Any:
     # Check if the attribute is the same for all objects
     for obj in objs[1:]:
         if getattr(obj, attr) != first_obj_attr:
-            raise ValueError(f"Ambiguous attribute '{attr}': values differ between objects")
+            raise AmbiguousAttributeError(f"Ambiguous attribute '{attr}': values differ between objects")
 
     # If attribute is the same for all objects, return it
     return first_obj_attr
diff --git a/src/multi_condition_comparisions/methods/_base.py b/src/multi_condition_comparisions/methods/_base.py
index a00c2e9..1379d44 100644
--- a/src/multi_condition_comparisions/methods/_base.py
+++ b/src/multi_condition_comparisions/methods/_base.py
@@ -9,7 +9,12 @@
 from anndata import AnnData
 
 from multi_condition_comparisions._util import check_is_numeric_matrix
-from multi_condition_comparisions._util.formulaic import Factor, get_factor_storage_and_materializer, resolve_ambiguous
+from multi_condition_comparisions._util.formulaic import (
+    AmbiguousAttributeError,
+    Factor,
+    get_factor_storage_and_materializer,
+    resolve_ambiguous,
+)
 
 
 @dataclass
@@ -364,8 +369,12 @@ def cond(self, **kwargs) -> np.ndarray:
             # individual variablesl `A` and `B`). Some terms have multiple metadata objects, because they are specified
             # multiple times in the formula, or forumlaic resolves them multiple times internally.
             terms_metadata = list(chain.from_iterable(self.factor_storage[term] for term in terms))
+
+            # If the variable is specified in the cond_dict explicitly, we just keep it as is.
+            # We still verify that it's a valid category, otherwise simple typos are not caught and lead to
+            # zeros in the design matrix.
             if var in cond_dict:
-                # In this case we keep the specified value in the dict, but we verify that it's a valid category
+                # it should never be non-ambiguous. If it happens, it's an edge case we don't know about (-> let it fail)
                 tmp_categories = resolve_ambiguous(terms_metadata, "categories")
                 if (
                     resolve_ambiguous(terms_metadata, "kind") == Factor.Kind.CATEGORICAL
@@ -374,14 +383,24 @@ def cond(self, **kwargs) -> np.ndarray:
                     raise ValueError(
                         f"You specified a non-existant category for {var}. Possible categories: {', '.join(tmp_categories)}"
                     )
+
+            # If the variable is not specified, we want to fill it with its default value (i.e. the base category)
             else:
-                # fill with default values
                 if resolve_ambiguous(terms_metadata, "kind") == Factor.Kind.CATEGORICAL:
-                    tmp_base = resolve_ambiguous(terms_metadata, "base")
-                    cond_dict[var] = (
-                        tmp_base if tmp_base is not None else "\0"
-                    )  # can be any other string that is not a category, but not None
+                    # In this case it can be ambiguous for some formulas -> Tell the user to specify the variable explicitly
+                    try:
+                        tmp_base = resolve_ambiguous(terms_metadata, "base")
+                    except AmbiguousAttributeError as e:
+                        raise ValueError(
+                            f"Could not automatically resolve base category for variable {var}. Please specify it explicity in `model.cond`."
+                        ) from e
+
+                    # if tmp_base is None (no-intercept model), set it to the NUL string \0.
+                    # In principle, it could be any string that is not a valid category, but it cannot be None
+                    # because this leads to an error in `get_model_matrix`
+                    cond_dict[var] = tmp_base if tmp_base is not None else "\0"
                 else:
+                    # Set to zero for continuous variables
                     cond_dict[var] = 0
 
         df = pd.DataFrame([kwargs])

From ef99eb4cc3437380b0f1cf060d345885ef19eaef Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Tue, 9 Apr 2024 17:30:51 +0200
Subject: [PATCH 25/36] Add test for resolve ambiguous

---
 tests/test_formulaic.py | 40 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 35 insertions(+), 5 deletions(-)

diff --git a/tests/test_formulaic.py b/tests/test_formulaic.py
index 33a3f19..a6d8ae2 100644
--- a/tests/test_formulaic.py
+++ b/tests/test_formulaic.py
@@ -2,7 +2,12 @@
 import pytest
 from formulaic.parser.types import Factor
 
-from multi_condition_comparisions._util.formulaic import get_factor_storage_and_materializer
+from multi_condition_comparisions._util.formulaic import (
+    AmbiguousAttributeError,
+    FactorMetadata,
+    get_factor_storage_and_materializer,
+    resolve_ambiguous,
+)
 
 
 @pytest.mark.parametrize(
@@ -147,7 +152,10 @@
             None,
             {
                 "condition": {"reduced_rank": True, "custom_encoder": False, "base": "A"},
-                "donor": {"reduced_rank": True, "custom_encoder": False, "base": "D0"},
+                "donor": {
+                    "custom_encoder": False,
+                    "drop_field": "D0",
+                },  # `reduced_rank` and `base` will be ambigous here because Formulaic generates both version of the factor internally
             },
         ],
     ],
@@ -173,7 +181,29 @@ def test_custom_materializer(test_adata_minimal, formula, reorder_categorical, e
     materializer(test_adata_minimal.obs, record_factor_metadata=True).get_model_matrix(formula)
     for factor, expected_metadata in expected_factor_metadata.items():
         actual_metadata = factor_storage[factor]
-        assert len(actual_metadata) == 1
-        actual_metadata = actual_metadata[0]
         for k in expected_metadata:
-            assert getattr(actual_metadata, k) == expected_metadata[k]
+            assert resolve_ambiguous(actual_metadata, k) == expected_metadata[k]
+
+
+def test_resolve_ambiguous():
+    obj1 = FactorMetadata("F1", True, True, ["A", "B"], Factor.Kind.CATEGORICAL)
+    obj2 = FactorMetadata("F2", True, False, ["A", "B"], Factor.Kind.CATEGORICAL)
+    obj3 = FactorMetadata("F3", True, False, None, Factor.Kind.NUMERICAL)
+
+    with pytest.raises(ValueError):
+        resolve_ambiguous([], "foo")
+
+    with pytest.raises(AttributeError):
+        resolve_ambiguous([obj1, obj2], "doesntexist")
+
+    with pytest.raises(AmbiguousAttributeError):
+        assert resolve_ambiguous([obj1, obj2], "name")
+
+    assert resolve_ambiguous([obj1, obj2, obj3], "reduced_rank") is True
+    assert resolve_ambiguous([obj1, obj2], "categories") == ["A", "B"]
+
+    with pytest.raises(AmbiguousAttributeError):
+        assert resolve_ambiguous([obj1, obj2, obj3], "categories")
+
+    with pytest.raises(AmbiguousAttributeError):
+        assert resolve_ambiguous([obj1, obj3], "kind")

From e59e760a2e20b80bcc73f56894e3c02d5b045e15 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Thu, 11 Apr 2024 12:19:37 +0200
Subject: [PATCH 26/36] Add more test cases and fix others

---
 .../methods/_base.py                          |  2 +-
 tests/test_base.py                            | 37 +++++++++++++------
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/src/multi_condition_comparisions/methods/_base.py b/src/multi_condition_comparisions/methods/_base.py
index 1379d44..2cd81db 100644
--- a/src/multi_condition_comparisions/methods/_base.py
+++ b/src/multi_condition_comparisions/methods/_base.py
@@ -374,7 +374,7 @@ def cond(self, **kwargs) -> np.ndarray:
             # We still verify that it's a valid category, otherwise simple typos are not caught and lead to
             # zeros in the design matrix.
             if var in cond_dict:
-                # it should never be non-ambiguous. If it happens, it's an edge case we don't know about (-> let it fail)
+                # Getting the categories should never be non-ambiguous. If it happens, it's an edge case we don't know about (-> let it fail)
                 tmp_categories = resolve_ambiguous(terms_metadata, "categories")
                 if (
                     resolve_ambiguous(terms_metadata, "kind") == Factor.Kind.CATEGORICAL
diff --git a/tests/test_base.py b/tests/test_base.py
index 010585f..7a06ad1 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -24,37 +24,50 @@ def _test_single_contrast(self, contrast: Sequence[float], **kwargs) -> DataFram
 @pytest.mark.parametrize(
     "formula,cond_kwargs,expected_contrast",
     [
+        # single variable
+        ["~ condition", {}, [1, 0]],
         ["~ condition", {"condition": "A"}, [1, 0]],
         ["~ condition", {"condition": "B"}, [1, 1]],
         ["~ condition", {"condition": "42"}, ValueError],  # non-existant category
+        # no-intercept models
         ["~ 0 + condition", {"condition": "A"}, [1, 0]],
         ["~ 0 + condition", {"condition": "B"}, [0, 1]],
+        # Different way of specifying dummy coding
         ["~ donor", {"donor": "D0"}, [1, 0, 0, 0]],
         ["~ C(donor)", {"donor": "D0"}, [1, 0, 0, 0]],
         ["~ C(donor, contr.treatment(base='D2'))", {"donor": "D2"}, [1, 0, 0, 0]],
         ["~ C(donor, contr.treatment(base='D2'))", {"donor": "D0"}, [1, 1, 0, 0]],
+        # Handle continuous covariates
         ["~ donor + continuous", {"donor": "D1"}, [1, 1, 0, 0, 0]],
         ["~ donor + np.log1p(continuous)", {"donor": "D1"}, [1, 1, 0, 0, 0]],
+        ["~ donor + continuous + np.log1p(continuous)", {"donor": "D0"}, [1, 0, 0, 0, 0, 0]],
+        # Nonsense models repeating the same variable, which are nonetheless allowed by formulaic
+        ["~ donor + C(donor)", {"donor": "D1"}, [1, 1, 0, 0, 1, 0, 0]],
+        ["~ donor + C(donor, contr.treatment(base='D2'))", {"donor": "D0"}, [1, 0, 0, 0, 1, 0, 0]],
         [
-            "~ donor + continuous + np.log(continuous)",
-            {"donor": "D0"},
+            "~ condition + donor + C(donor, contr.treatment(base='D2'))",
+            {"condition": "A"},
             ValueError,
-        ],  # current limitation: variables may only be used once per formula
-        [
-            "~ donor + C(donor)",
-            {"donor": "D0"},
-            ValueError,
-        ],  # current limitation: variables may only be used once per formula
+        ],  # donor base category can't be resolved because it's ambiguous -> ValueError
+        # Sum2zero coding
         ["~ C(donor, contr.sum)", {"donor": "D0"}, [1, 1, 0, 0]],
         ["~ C(donor, contr.sum)", {"donor": "D3"}, [1, -1, -1, -1]],
+        # Multiple categorical variables
         ["~ condition + donor", {"condition": "A"}, [1, 0, 0, 0, 0]],
-        ["~ 0 + condition + donor", {"donor": "D1"}, [0, 0, 1, 0, 0]],
         ["~ condition + donor", {"donor": "D2"}, [1, 0, 0, 1, 0]],
         ["~ condition + donor", {"condition": "B", "donor": "D2"}, [1, 1, 0, 1, 0]],
-        ["~ condition * donor", {"condition": "A"}, [1, 0, 0, 0, 0]],
-        ["~ condition + donor + condition:donor", {"condition": "A"}, [1, 0, 0, 0, 0]],
+        ["~ 0 + condition + donor", {"donor": "D1"}, [0, 0, 1, 0, 0]],
+        # Interaction terms
+        ["~ condition * donor", {"condition": "A"}, [1, 0, 0, 0, 0, 0, 0, 0]],
+        ["~ condition + donor + condition:donor", {"condition": "A"}, [1, 0, 0, 0, 0, 0, 0, 0]],
+        ["~ condition * donor", {"condition": "B", "donor": "D2"}, [1, 1, 0, 1, 0, 0, 1, 0]],
+        ["~ condition * C(donor, contr.treatment(base='D2'))", {"condition": "A"}, [1, 0, 0, 0, 0, 0, 0, 0]],
+        [
+            "~ condition * C(donor, contr.treatment(base='D2'))",
+            {"condition": "B", "donor": "D0"},
+            [1, 1, 1, 0, 0, 1, 0, 0],
+        ],
         ["~ condition:donor", {"condition": "A"}, [1, 0, 0, 0, 0]],
-        # TODO: also include tests for errors that should be caught by .cond
     ],
 )
 def test_model_cond(test_adata_minimal, MockLinearModel, formula, cond_kwargs, expected_contrast):

From 43f77e18649e04b4459a444db7720fe9a1ddbe24 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Thu, 11 Apr 2024 12:51:00 +0200
Subject: [PATCH 27/36] Use mapping variable -> factor instead of variable ->
 term

---
 .../_util/formulaic.py                         | 18 ++++++++++++++++--
 .../methods/_base.py                           | 12 +++++++-----
 tests/test_base.py                             |  4 +++-
 3 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index 96eca7e..3125350 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -1,4 +1,11 @@
-"""Helpers to interact with Formulaic Formulas"""
+"""Helpers to interact with Formulaic Formulas
+
+TODO Glossary:
+ * factor
+ * term
+ * variable
+
+"""
 
 from collections import defaultdict
 from collections.abc import Mapping, Sequence
@@ -81,12 +88,16 @@ def get_factor_storage_and_materializer() -> tuple[dict[str, list[FactorMetadata
     -------
     factor_storage
         A dictionary pointing to Metadata for each factor processed by the custom materializer
+    variable_to_factors
+        A dictionary mapping variables to factor names (similar to model_spec.variable_terms), except that it maps
+        to *factors* rather than *terms*
     CustomPandasMaterializer
         A materializer class that is tied to the particular instance of `factor_storage`.
     """
     # There can be multiple FactorMetadata entries per sample, for instance when formulaic generates an interaction
     # term, it generates the factor with both full rank and reduced rank.
     factor_storage: dict[str, list[FactorMetadata]] = defaultdict(list)
+    variable_to_factors: dict[str, list[set]] = defaultdict(set)
 
     class CustomPandasMaterializer(PandasMaterializer):
         """An extension of the PandasMaterializer that records all cateogrical variables and their (base) categories."""
@@ -120,6 +131,7 @@ def __init__(
                 passed to PandasMaterializer
             """
             self.factor_metadata_storage = factor_storage if record_factor_metadata else None
+            self.variable_to_factors = variable_to_factors if record_factor_metadata else None
             # temporary pointer to metadata of factor that is currently evaluated
             self._current_factor: FactorMetadata = None
             super().__init__(data, context, **params)
@@ -141,6 +153,8 @@ def _encode_evaled_factor(
                 if factor.expr in self.encoded_cache or (factor.expr, reduced_rank) in self.encoded_cache:
                     assert factor.expr in self.factor_metadata_storage, "Factor should be there since it's cached"
                 else:
+                    for var in factor.variables:
+                        self.variable_to_factors[var].add(factor.expr)
                     self._current_factor = FactorMetadata(
                         name=factor.expr,
                         reduced_rank=reduced_rank,
@@ -167,7 +181,7 @@ def _flatten_encoded_evaled_factor(self, name: str, values: FactorValues[dict])
 
             return super()._flatten_encoded_evaled_factor(name, values)
 
-    return factor_storage, CustomPandasMaterializer
+    return factor_storage, variable_to_factors, CustomPandasMaterializer
 
 
 class AmbiguousAttributeError(ValueError):
diff --git a/src/multi_condition_comparisions/methods/_base.py b/src/multi_condition_comparisions/methods/_base.py
index 2cd81db..666286f 100644
--- a/src/multi_condition_comparisions/methods/_base.py
+++ b/src/multi_condition_comparisions/methods/_base.py
@@ -152,11 +152,12 @@ def __init__(
         self._check_counts()
 
         self.factor_storage = None
+        self.variable_to_factors = None
         """Object to store metadata of formulaic factors which is used for building contrasts later. If a design matrix
         is passed directly, this remains None."""
 
         if isinstance(design, str):
-            self.factor_storage, materializer_class = get_factor_storage_and_materializer()
+            self.factor_storage, self.variable_to_factors, materializer_class = get_factor_storage_and_materializer()
             self.design = materializer_class(adata.obs, record_factor_metadata=True).get_model_matrix(design)
         else:
             self.design = design
@@ -362,13 +363,14 @@ def cond(self, **kwargs) -> np.ndarray:
             # A variable can refer to one or multiple terms. Either if a variable is specified
             # multiple times in the model (e.g. ~ var + C(var); ~ continuous + np.log(continuous))
             # or when there's an interaction term (e.g. ~ A * B ==> terms `A`, `B`, `A:B`)
-            terms = self.design.model_spec.variable_terms[var]
+            factors = self.variable_to_factors[var]
 
-            # Get list of all Metadata objects for the associated terms.
+            # Get list of all Metadata objects for the associated Factors.
             # Some terms don't have metadata (e.g. instead of an interaction term `A:B`, metadata exists only for the
-            # individual variablesl `A` and `B`). Some terms have multiple metadata objects, because they are specified
+            # individual variables `A` and `B`). Some terms have multiple metadata objects, because they are specified
             # multiple times in the formula, or forumlaic resolves them multiple times internally.
-            terms_metadata = list(chain.from_iterable(self.factor_storage[term] for term in terms))
+            # terms_metadata = list(chain.from_iterable(self.factor_storage[term] for term in terms))
+            terms_metadata = list(chain.from_iterable(self.factor_storage[f] for f in factors))
 
             # If the variable is specified in the cond_dict explicitly, we just keep it as is.
             # We still verify that it's a valid category, otherwise simple typos are not caught and lead to
diff --git a/tests/test_base.py b/tests/test_base.py
index 7a06ad1..ec8caf0 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -67,7 +67,9 @@ def _test_single_contrast(self, contrast: Sequence[float], **kwargs) -> DataFram
             {"condition": "B", "donor": "D0"},
             [1, 1, 1, 0, 0, 1, 0, 0],
         ],
-        ["~ condition:donor", {"condition": "A"}, [1, 0, 0, 0, 0]],
+        ["~ condition:donor", {"condition": "A"}, [1, 0, 0, 0]],
+        ["~ condition:C(donor)", {"condition": "A", "donor": "D1"}, [1, 0, 0, 0]],
+        ["~ condition:donor", {"condition": "A", "donor": "D1"}, [1, 0, 0, 0]],
     ],
 )
 def test_model_cond(test_adata_minimal, MockLinearModel, formula, cond_kwargs, expected_contrast):

From 54a0d0b73365899c34699346b477830c23c1bcb6 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Thu, 11 Apr 2024 16:09:42 +0200
Subject: [PATCH 28/36] Fix remaining model.cond testcases

---
 tests/test_base.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/test_base.py b/tests/test_base.py
index ec8caf0..cf825d5 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -67,9 +67,13 @@ def _test_single_contrast(self, contrast: Sequence[float], **kwargs) -> DataFram
             {"condition": "B", "donor": "D0"},
             [1, 1, 1, 0, 0, 1, 0, 0],
         ],
-        ["~ condition:donor", {"condition": "A"}, [1, 0, 0, 0]],
-        ["~ condition:C(donor)", {"condition": "A", "donor": "D1"}, [1, 0, 0, 0]],
-        ["~ condition:donor", {"condition": "A", "donor": "D1"}, [1, 0, 0, 0]],
+        [
+            "~ condition:donor",
+            {"condition": "A"},
+            ValueError,
+        ],  # Can't automatically resolve base category, because Formulaic builds a reduced-rank and full-rank factor internally
+        ["~ condition:donor", {"condition": "A", "donor": "D1"}, [1, 1, 0, 0, 0, 0, 0, 0]],
+        ["~ condition:C(donor)", {"condition": "A", "donor": "D1"}, [1, 1, 0, 0, 0, 0, 0, 0]],
     ],
 )
 def test_model_cond(test_adata_minimal, MockLinearModel, formula, cond_kwargs, expected_contrast):

From 86f114a2f025a4360bc4048df4c690b3f1bc4217 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Thu, 11 Apr 2024 16:12:09 +0200
Subject: [PATCH 29/36] Fix formulaic testcase

---
 tests/test_formulaic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_formulaic.py b/tests/test_formulaic.py
index a6d8ae2..da64be5 100644
--- a/tests/test_formulaic.py
+++ b/tests/test_formulaic.py
@@ -177,7 +177,7 @@ def test_custom_materializer(test_adata_minimal, formula, reorder_categorical, e
     if reorder_categorical is not None:
         for col, order in reorder_categorical.items():
             test_adata_minimal.obs[col] = pd.Categorical(test_adata_minimal.obs[col], categories=order)
-    factor_storage, materializer = get_factor_storage_and_materializer()
+    factor_storage, _, materializer = get_factor_storage_and_materializer()
     materializer(test_adata_minimal.obs, record_factor_metadata=True).get_model_matrix(formula)
     for factor, expected_metadata in expected_factor_metadata.items():
         actual_metadata = factor_storage[factor]

From 2c8df267615b4815c523626ebc23477ae68d0f75 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Thu, 11 Apr 2024 16:12:59 +0200
Subject: [PATCH 30/36] Reset example notebook

---
 docs/notebooks/example.ipynb | 132 +++--------------------------------
 1 file changed, 9 insertions(+), 123 deletions(-)

diff --git a/docs/notebooks/example.ipynb b/docs/notebooks/example.ipynb
index d80220a..ce7dbad 100644
--- a/docs/notebooks/example.ipynb
+++ b/docs/notebooks/example.ipynb
@@ -9,146 +9,32 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import anndata as ad\n",
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import pytest\n",
-    "from pydeseq2.utils import load_example_data\n",
-    "from formulaic import model_matrix\n",
-    "\n",
-    "\n",
-    "@pytest.fixture\n",
-    "def test_counts():\n",
-    "    return load_example_data(\n",
-    "        modality=\"raw_counts\",\n",
-    "        dataset=\"synthetic\",\n",
-    "        debug=False,\n",
-    "    )\n",
-    "\n",
-    "\n",
-    "@pytest.fixture\n",
-    "def test_metadata():\n",
-    "    return load_example_data(\n",
-    "        modality=\"metadata\",\n",
-    "        dataset=\"synthetic\",\n",
-    "        debug=False,\n",
-    "    )\n",
-    "\n",
-    "\n",
-    "@pytest.fixture\n",
-    "def test_adata(test_counts, test_metadata):\n",
-    "    return ad.AnnData(X=test_counts, obs=test_metadata)\n",
-    "\n",
-    "\n",
-    "def test_adata_minimal():\n",
-    "    matrix_format = np.array\n",
-    "    n_obs = 80\n",
-    "    n_donors = n_obs // 4\n",
-    "    rng = np.random.default_rng(9)  # make tests deterministic\n",
-    "    obs = pd.DataFrame(\n",
-    "        {\n",
-    "            \"condition\": [\"A\", \"B\"] * (n_obs // 2),\n",
-    "            \"donor\": sum(([f\"D{i}\"] * n_donors for i in range(n_obs // n_donors)), []),\n",
-    "            \"other\": ([\"X\"] * (n_obs // 4)) + ([\"Y\"] * ((3 * n_obs) // 4)),\n",
-    "            \"pairing\": sum(([str(i), str(i)] for i in range(n_obs // 2)), []),\n",
-    "            \"continuous\": [rng.uniform(0, 1) * 4000 for _ in range(n_obs)],\n",
-    "        },\n",
-    "    )\n",
-    "    var = pd.DataFrame(index=[\"gene1\", \"gene2\"])\n",
-    "    group1 = rng.negative_binomial(20, 0.1, n_obs // 2)  # large mean\n",
-    "    group2 = rng.negative_binomial(5, 0.5, n_obs // 2)  # small mean\n",
-    "\n",
-    "    condition_data = np.empty((n_obs,), dtype=group1.dtype)\n",
-    "    condition_data[0::2] = group1\n",
-    "    condition_data[1::2] = group2\n",
-    "\n",
-    "    donor_data = np.empty((n_obs,), dtype=group1.dtype)\n",
-    "    donor_data[0:n_donors] = group2[:n_donors]\n",
-    "    donor_data[n_donors : (2 * n_donors)] = group1[n_donors:]\n",
-    "\n",
-    "    donor_data[(2 * n_donors) : (3 * n_donors)] = group2[:n_donors]\n",
-    "    donor_data[(3 * n_donors) :] = group1[n_donors:]\n",
-    "\n",
-    "    X = matrix_format(np.vstack([condition_data, donor_data]).T)\n",
-    "    return ad.AnnData(X=X, obs=obs, var=var)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/sturm/apps/micromamba/envs/multi-condition-comparisons/lib/python3.10/site-packages/anndata/_core/anndata.py:183: ImplicitModificationWarning: Transforming to str index.\n",
-      "  warnings.warn(\"Transforming to str index.\", ImplicitModificationWarning)\n"
+      "/home/sturm/apps/micromamba/envs/multi-condition-comparisons/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
      ]
     }
    ],
    "source": [
-    "adata = test_adata_minimal()"
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "from multi_condition_comparisions.tl.de import StatsmodelsDE\n",
+    "import scanpy as sc"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
-    "design = model_matrix(\"~ condition + C(condition) + donor + continuous\", adata.obs)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{1: set(),\n",
-       " condition: {'condition'},\n",
-       " C(condition): {'C', 'condition'},\n",
-       " donor: {'donor'},\n",
-       " continuous: {'continuous'}}"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.model_spec.term_variables"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'condition': {C(condition), condition},\n",
-       " 'C': {C(condition)},\n",
-       " 'donor': {donor},\n",
-       " 'continuous': {continuous}}"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.model_spec.variable_terms"
+    "adata = sc.read_h5ad(\"./airway.h5ad\")"
    ]
   },
   {

From 99992c1565d6ab8d84ecbf6586247dbc27f327b5 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Thu, 11 Apr 2024 16:13:37 +0200
Subject: [PATCH 31/36] Restet conftest

---
 tests/conftest.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 5eb3cd0..936ccf3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,3 @@
-# TODO: temp workaround for Gregor
-import os
-
 import anndata as ad
 import numpy as np
 import pandas as pd
@@ -8,8 +5,6 @@
 import scipy.sparse as sp
 from pydeseq2.utils import load_example_data
 
-os.environ["R_HOME"] = "/home/sturm/apps/micromamba/envs/multi-condition-comparisons/lib/R"
-
 
 @pytest.fixture
 def test_counts():

From 6104af082e68177656e64d704351d042d9439a78 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Thu, 11 Apr 2024 16:52:31 +0200
Subject: [PATCH 32/36] Refactor

---
 .../_util/formulaic.py                        |   4 +-
 .../methods/_base.py                          | 134 ++++++++++++------
 2 files changed, 94 insertions(+), 44 deletions(-)

diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index 3125350..4a61a44 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -78,7 +78,7 @@ def base(self) -> str | None:
                 return self.drop_field
 
 
-def get_factor_storage_and_materializer() -> tuple[dict[str, list[FactorMetadata]], type]:
+def get_factor_storage_and_materializer() -> tuple[dict[str, list[FactorMetadata]], dict[str, set[str]], type]:
     """
     Keep track of categorical factors used in a model spec.
 
@@ -97,7 +97,7 @@ def get_factor_storage_and_materializer() -> tuple[dict[str, list[FactorMetadata
     # There can be multiple FactorMetadata entries per sample, for instance when formulaic generates an interaction
     # term, it generates the factor with both full rank and reduced rank.
     factor_storage: dict[str, list[FactorMetadata]] = defaultdict(list)
-    variable_to_factors: dict[str, list[set]] = defaultdict(set)
+    variable_to_factors: dict[str, set[str]] = defaultdict(set)
 
     class CustomPandasMaterializer(PandasMaterializer):
         """An extension of the PandasMaterializer that records all cateogrical variables and their (base) categories."""
diff --git a/src/multi_condition_comparisions/methods/_base.py b/src/multi_condition_comparisions/methods/_base.py
index 666286f..1e27d1a 100644
--- a/src/multi_condition_comparisions/methods/_base.py
+++ b/src/multi_condition_comparisions/methods/_base.py
@@ -3,6 +3,7 @@
 from dataclasses import dataclass
 from itertools import chain
 from types import MappingProxyType
+from typing import Any
 
 import numpy as np
 import pandas as pd
@@ -12,6 +13,7 @@
 from multi_condition_comparisions._util.formulaic import (
     AmbiguousAttributeError,
     Factor,
+    FactorMetadata,
     get_factor_storage_and_materializer,
     resolve_ambiguous,
 )
@@ -151,12 +153,15 @@ def __init__(
         super().__init__(adata, mask=mask, layer=layer)
         self._check_counts()
 
-        self.factor_storage = None
-        self.variable_to_factors = None
+        self.factor_storage: dict[str, list[FactorMetadata]] = None
         """Object to store metadata of formulaic factors which is used for building contrasts later. If a design matrix
         is passed directly, this remains None."""
 
+        self.variable_to_factors: dict[str, set[str]] = None
+        """Stores mapping from variables to formulaic factors"""
+
         if isinstance(design, str):
+            # Use custom formulaic materializer that will record factor metadata while creating the model matrix
             self.factor_storage, self.variable_to_factors, materializer_class = get_factor_storage_and_materializer()
             self.design = materializer_class(adata.obs, record_factor_metadata=True).get_model_matrix(design)
         else:
@@ -356,58 +361,103 @@ def cond(self, **kwargs) -> np.ndarray:
         # To do so, we keep the values that were already specified, and fill all other values with the default category
         # (the one that is usually dropped from the model for being redundant).
         #
-        # `model_spec.variable_terms` is a mapping from variable to a set of terms. Unless a variable is used twice in the
-        # same formula, it contains exactly one element. It also contains
-        # "non-data" variables such as `C` - therefore we use `self.variables` to loop through.
+        # self.variables only contains "data-variables", but no "factor variables", such as `C`
         for var in self.variables:
-            # A variable can refer to one or multiple terms. Either if a variable is specified
-            # multiple times in the model (e.g. ~ var + C(var); ~ continuous + np.log(continuous))
-            # or when there's an interaction term (e.g. ~ A * B ==> terms `A`, `B`, `A:B`)
-            factors = self.variable_to_factors[var]
-
-            # Get list of all Metadata objects for the associated Factors.
-            # Some terms don't have metadata (e.g. instead of an interaction term `A:B`, metadata exists only for the
-            # individual variables `A` and `B`). Some terms have multiple metadata objects, because they are specified
-            # multiple times in the formula, or forumlaic resolves them multiple times internally.
-            # terms_metadata = list(chain.from_iterable(self.factor_storage[term] for term in terms))
-            terms_metadata = list(chain.from_iterable(self.factor_storage[f] for f in factors))
-
             # If the variable is specified in the cond_dict explicitly, we just keep it as is.
             # We still verify that it's a valid category, otherwise simple typos are not caught and lead to
             # zeros in the design matrix.
             if var in cond_dict:
-                # Getting the categories should never be non-ambiguous. If it happens, it's an edge case we don't know about (-> let it fail)
-                tmp_categories = resolve_ambiguous(terms_metadata, "categories")
-                if (
-                    resolve_ambiguous(terms_metadata, "kind") == Factor.Kind.CATEGORICAL
-                    and cond_dict[var] not in tmp_categories
-                ):
-                    raise ValueError(
-                        f"You specified a non-existant category for {var}. Possible categories: {', '.join(tmp_categories)}"
-                    )
+                self._check_category(var, cond_dict[var])
 
             # If the variable is not specified, we want to fill it with its default value (i.e. the base category)
             else:
-                if resolve_ambiguous(terms_metadata, "kind") == Factor.Kind.CATEGORICAL:
-                    # In this case it can be ambiguous for some formulas -> Tell the user to specify the variable explicitly
-                    try:
-                        tmp_base = resolve_ambiguous(terms_metadata, "base")
-                    except AmbiguousAttributeError as e:
-                        raise ValueError(
-                            f"Could not automatically resolve base category for variable {var}. Please specify it explicity in `model.cond`."
-                        ) from e
-
-                    # if tmp_base is None (no-intercept model), set it to the NUL string \0.
-                    # In principle, it could be any string that is not a valid category, but it cannot be None
-                    # because this leads to an error in `get_model_matrix`
-                    cond_dict[var] = tmp_base if tmp_base is not None else "\0"
-                else:
-                    # Set to zero for continuous variables
-                    cond_dict[var] = 0
+                cond_dict[var] = self._get_default_value(var)
 
         df = pd.DataFrame([kwargs])
         return self.design.model_spec.get_model_matrix(df).iloc[0]
 
+    def _get_factor_metadata_for_variable(self, var) -> list[FactorMetadata]:
+        """
+        Get the Metadata objects of all factors defined by a given variable.
+
+        A variable can refer to one or multiple factors. Either if a variable is specified
+        multiple times in the model (e.g. ~ var + C(var); ~ continuous + np.log(continuous))
+        or when there's an interaction term (e.g. ~ A * B ==> terms `A`, `B`, `A:B`).
+
+        Some Factors can have multiple metadata objects, because they are specified
+        multiple times in the formula, or forumlaic resolves them multiple times internally.
+
+        Even if there are multiple factors per variable, they could still contain the same metadata and we
+        can unambiguously retreive metadata for a given variable, e.g. using the `_util.formulaic.resolve_ambiguous`
+        function.
+
+        Returns
+        -------
+        a list of FactorMetadata objects
+        """
+        factors = self.variable_to_factors[var]
+
+        return list(chain.from_iterable(self.factor_storage[f] for f in factors))
+
+    def _get_default_value(self, var) -> Any:
+        r"""
+        Get the default value (base category) for variable `var`.
+
+        Special cases:
+         * If the variable is continuous, return 0.
+         * For a variable without reduced rank (as it happens for the first variable in a no-intercept model `~0 + xxx`)
+           the function returns the null string "\0". This is to ensure that when passed to `formulaic.get_model_matrix`,
+           a row is returned that sets all columns of that variable to 0. It could be any string that is not
+           a valid category. It cannot be `None`, because this results in failure of `get_model_matrix`, therefore the
+           null string was chosen because of clear semantics and it being surely not a valid category.
+
+        Raises
+        ------
+        ValueError
+            If there is no way to unambiguously infer the base category from the formulaic formula.
+
+        Returns
+        -------
+        The default value of the given variable.
+        """
+        factor_metadata = self._get_factor_metadata_for_variable(var)
+        if resolve_ambiguous(factor_metadata, "kind") == Factor.Kind.CATEGORICAL:
+            # In this case it can be ambiguous for some formulas -> Tell the user to specify the variable explicitly
+            try:
+                tmp_base = resolve_ambiguous(factor_metadata, "base")
+            except AmbiguousAttributeError as e:
+                raise ValueError(
+                    f"Could not automatically resolve base category for variable {var}. Please specify it explicity in `model.cond`."
+                ) from e
+
+            # if tmp_base is None (no-intercept model), set it to the NUL string \0.
+            # In principle, it could be any string that is not a valid category, but it cannot be None
+            # because this leads to an error in `get_model_matrix`
+            return tmp_base if tmp_base is not None else "\0"
+        else:
+            # Set to zero for continuous variables
+            return 0
+
+    def _check_category(self, var, value) -> None:
+        """
+        Check if `value` is a valid category of the factor defined by `var`.
+
+        If `var` is a continuous variable this passes silently.
+
+        Raises
+        ------
+        ValueError
+            If `value` is not a valid category.
+        """
+        factor_metadata = self._get_factor_metadata_for_variable(var)
+
+        # Getting the categories should never be non-ambiguous. If it happens, it's an edge case we don't know about (-> let it fail)
+        tmp_categories = resolve_ambiguous(factor_metadata, "categories")
+        if resolve_ambiguous(factor_metadata, "kind") == Factor.Kind.CATEGORICAL and value not in tmp_categories:
+            raise ValueError(
+                f"You specified a non-existant category for {var}. Possible categories: {', '.join(tmp_categories)}"
+            )
+
     def contrast(self, column: str, baseline: str, group_to_compare: str) -> pd.Series:
         """
         Build a simple contrast for pairwise comparisons.

From e180e7ae6e686361ae457a8219d2bd54e91306d4 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Fri, 12 Apr 2024 12:26:59 +0200
Subject: [PATCH 33/36] Add formulaic glossary

---
 src/multi_condition_comparisions/_util/formulaic.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index 4a61a44..86e3b3d 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -1,10 +1,9 @@
 """Helpers to interact with Formulaic Formulas
 
-TODO Glossary:
- * factor
- * term
- * variable
-
+Some helpful definitions for working with formulaic formulas (e.g. `~ 0 + C(donor):treatment + np.log1p(continuous)`):
+ * A *term* refers to an expression in the formula, separated by `+`, e.g. `C(donor):treatment`, or `np.log1p(continuous)`.
+ * A *variable* refers to a column of the data frame passed to formulaic, e.g. `donor`.
+ * A *factor* is the specification of how a certain variable is represented in the design matrix, e.g. treatment coding with base level "A" and reduced rank.
 """
 
 from collections import defaultdict

From c2d76877d9065686de6714f2ae3141d383eee7c8 Mon Sep 17 00:00:00 2001
From: Lukas Heumos <lukas.heumos@posteo.net>
Date: Fri, 12 Apr 2024 16:06:15 +0200
Subject: [PATCH 34/36] Fix typo

---
 src/multi_condition_comparisions/methods/_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/multi_condition_comparisions/methods/_base.py b/src/multi_condition_comparisions/methods/_base.py
index 1e27d1a..28b46bf 100644
--- a/src/multi_condition_comparisions/methods/_base.py
+++ b/src/multi_condition_comparisions/methods/_base.py
@@ -356,7 +356,7 @@ def cond(self, **kwargs) -> np.ndarray:
             )
 
         # We now fill `cond_dict` such that it is equivalent to a data row from `adata.obs`.
-        # This data can then be passed to the `get_model_matrix` of formulaic to retreive a correpsonding
+        # This data can then be passed to the `get_model_matrix` of formulaic to retrieve a corresponding
         # contrast vector.
         # To do so, we keep the values that were already specified, and fill all other values with the default category
         # (the one that is usually dropped from the model for being redundant).

From 66f9919c5aa73e28ebcf097bcb3ee7d35beda567 Mon Sep 17 00:00:00 2001
From: Lukas Heumos <lukas.heumos@posteo.net>
Date: Fri, 12 Apr 2024 16:06:24 +0200
Subject: [PATCH 35/36] Fix typo

---
 src/multi_condition_comparisions/_util/formulaic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/multi_condition_comparisions/_util/formulaic.py b/src/multi_condition_comparisions/_util/formulaic.py
index 86e3b3d..11a9d99 100644
--- a/src/multi_condition_comparisions/_util/formulaic.py
+++ b/src/multi_condition_comparisions/_util/formulaic.py
@@ -99,7 +99,7 @@ def get_factor_storage_and_materializer() -> tuple[dict[str, list[FactorMetadata
     variable_to_factors: dict[str, set[str]] = defaultdict(set)
 
     class CustomPandasMaterializer(PandasMaterializer):
-        """An extension of the PandasMaterializer that records all cateogrical variables and their (base) categories."""
+        """An extension of the PandasMaterializer that records all categorical variables and their (base) categories."""
 
         REGISTER_NAME = "custom_pandas"
         REGISTER_INPUTS = ("pandas.core.frame.DataFrame",)

From 4cc220dc39d170e96a3bcd8fa2b86a1187aafe4b Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Thu, 18 Apr 2024 20:51:53 +0200
Subject: [PATCH 36/36] Update
 src/multi_condition_comparisions/methods/_base.py

Co-authored-by: Ilan Gold <ilanbassgold@gmail.com>
---
 src/multi_condition_comparisions/methods/_base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/multi_condition_comparisions/methods/_base.py b/src/multi_condition_comparisions/methods/_base.py
index 28b46bf..a8b05ab 100644
--- a/src/multi_condition_comparisions/methods/_base.py
+++ b/src/multi_condition_comparisions/methods/_base.py
@@ -153,11 +153,11 @@ def __init__(
         super().__init__(adata, mask=mask, layer=layer)
         self._check_counts()
 
-        self.factor_storage: dict[str, list[FactorMetadata]] = None
+        self.factor_storage: dict[str, list[FactorMetadata]] | None = None
         """Object to store metadata of formulaic factors which is used for building contrasts later. If a design matrix
         is passed directly, this remains None."""
 
-        self.variable_to_factors: dict[str, set[str]] = None
+        self.variable_to_factors: dict[str, set[str]] | None = None
         """Stores mapping from variables to formulaic factors"""
 
         if isinstance(design, str):