Merge pull request #142 from cnellington/dev

Release v0.2.2
cnellington · Nov 7, 2022 · d821522 · d821522
2 parents c64c353 + 4294ca3
commit d821522
Show file tree

Hide file tree

Showing 13 changed files with 807 additions and 308 deletions.
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
@@ -18,6 +18,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install pylint
+        pip install .
     - name: Analysing the code with pylint
       run: |
         pylint $(git ls-files '*.py')
diff --git a/.pylintrc b/.pylintrc
@@ -572,7 +572,8 @@ good-names=i,
            W_train, W_val, W_test,
            beta, mu,
            Y_true, Y_pred,
-           W_true, W_pred
+           W_true, W_pred,
+           X_names, C_names
 
 # Good variable names regexes, separated by a comma. If names match any regex,
 # they will always be accepted

diff --git a/README.md b/README.md
@@ -1,37 +1,52 @@
 ![Preview](contextualized_logo.png)
 #
-![pylint Score](pylint.svg)
+
 ![License](https://img.shields.io/github/license/cnellington/contextualized.svg?style=flat-square)
+![python](https://img.shields.io/badge/python-3.7%20|%203.8%20|%203.9%20|%203.10-blue)
+[![PyPI version](https://badge.fury.io/py/contextualized-ml.svg)](https://badge.fury.io/py/contextualized-ml)
 ![Maintenance](https://img.shields.io/maintenance/yes/2022?style=flat-square)
+[![Downloads](https://pepy.tech/badge/contextualized-ml)](https://pepy.tech/project/contextualized-ml)
+![pylint Score](pylint.svg)
+<a href="https://github.com/psf/black"><img alt="Code style: black" src="https://img.shields.io/badge/code%20style-black-000000.svg"></a>
+
 
 A statistical machine learning toolbox for estimating models, distributions, and functions with context-specific parameters.
 
-Context-specific parameters are essential for:
-- Finding hidden heterogeneity in data -- are all samples the same?
-- Identifying context-specific predictors -- are there different reasons for outcomes?
-- Domain adaptation -- can our learned models extrapolate to new contexts?
+Context-specific parameters:
+- Find hidden heterogeneity in data -- are all samples the same?
+- Identify context-specific predictors -- are there different reasons for outcomes?
+- Enable domain adaptation -- can learned models extrapolate to new contexts?
 
-## Install and Use Contextualized
+
+## Quick Start
+
+### Installation
 ```
-pip install git+https://github.com/cnellington/Contextualized.git
+pip install contextualized-ml
 ```
 
-Take a look at the [main demo](docs/demos/main_demo.ipynb) for a complete overview with code, or the [easy demo](docs/demos/Easy-demo/easy_demo.ipynb) for a quickstart with sklearn-style wrappers!
-
-### Quick Start
+Take a look at the [easy demo](docs/demos/easy_regression.ipynb) for a quickstart with sklearn-style wrappers.
 
-#### Build a Contextualized Model
+### Build a Contextualized Model
 ```
 from contextualized.easy import ContextualizedRegressor
 model = ContextualizedRegressor()
 model.fit(C, X, Y)
 ```
 
-#### Predict Context-Specific Parameters
+### Predict Context-Specific Parameters
 ```
 model.predict_params(C)
 ```
 
+See the [docs](https://contextualized.ml/docs) for more examples.
+
+### Important links
+
+- [Documentation](https://contextualized.ml/docs)
+- [Pypi package index](https://pypi.python.org/pypi/contextualized-ml)
+
+
 ## Contextualized Family
 Context-dependent modeling is a universal problem, and every domain presents unique challenges and opportunities.
 Here are some layers that others have added on top of Contextualized.
@@ -45,13 +60,18 @@ Feel free to add your own page(s) by sending a PR or request an improvement by c
 </table>
 
 
-## Acknowledgements
+## Thanks to all our contributors
+
+<a href="https://github.com/cnellington/contextualized/graphs/contributors">
+  <img src="https://contributors-img.web.app/image?repo=cnellington/contextualized" />
+</a>
 
-ContextualizedML was originally implemented by [Caleb Ellington](https://calebellington.com/) (CMU) and [Ben Lengerich](http://web.mit.edu/~blengeri/www/index.shtml) (MIT).
+ContextualizedML was originally implemented by [Caleb Ellington](https://calebellington.com/) (CMU) and [Ben Lengerich](http://web.mit.edu/~blengeri/www) (MIT).
 
 Many people have helped. Check out [ACKNOWLEDGEMENTS.md](https://github.com/cnellington/Contextualized/blob/main/ACKNOWLEDGEMENTS.md)!
 
 
+
 ## Related Publications and Pre-prints
 - [Automated Interpretable Discovery of Heterogeneous Treatment Effectiveness: A COVID-19 Case Study](https://www.sciencedirect.com/science/article/pii/S1532046422001022)
 - [NOTMAD: Estimating Bayesian Networks with Sample-Specific Structures and Parameters](http://arxiv.org/abs/2111.01104)

diff --git a/contextualized/__init__.py b/contextualized/__init__.py
@@ -3,4 +3,9 @@
 	For more details, please refer to contextualized.ml.
 """
 
+from contextualized import analysis
+from contextualized import dags
+from contextualized import easy
+from contextualized import regression
+from contextualized import utils
 from contextualized.utils import *
diff --git a/contextualized/analysis/__init__.py b/contextualized/analysis/__init__.py
@@ -0,0 +1,14 @@
+"""
+Utilities for analyzing contextualized models.
+"""
+
+from contextualized.analysis.accuracy_split import print_acc_by_covars
+from contextualized.analysis.embeddings import (
+    plot_lowdim_rep,
+    plot_embedding_for_all_covars,
+)
+from contextualized.analysis.effects import (
+    plot_homogeneous_context_effects,
+    plot_homogeneous_predictor_effects,
+    plot_heterogeneous_predictor_effects,
+)
diff --git a/contextualized/analysis/accuracy_split.py b/contextualized/analysis/accuracy_split.py
@@ -0,0 +1,60 @@
+"""
+Utilities for post-hoc analysis of trained Contextualized models.
+"""
+
+import numpy as np
+from sklearn.metrics import roc_auc_score as roc
+
+
+def get_roc(Y_true, Y_pred):
+    """Measures ROC. Return np.nan if no valid ROC value."""
+    try:
+        return roc(Y_true, Y_pred)
+    except (IndexError, ValueError):
+        return np.nan
+
+
+def print_acc_by_covars(Y_true, Y_pred, covar_df, **kwargs):
+    """
+    Prints Accuracy for different data splits with covariates.
+    Assume Y_true and Y_pred are np arrays.
+    Allows train_idx and test_idx as Boolean locators.
+    """
+    Y_true = np.squeeze(Y_true)
+    Y_pred = np.squeeze(Y_pred)
+    for i, covar in enumerate(covar_df.columns):
+        my_labels = covar_df.values[:, i]
+        if len(set(my_labels)) > kwargs.get("max_classes", 20):
+            continue
+        if kwargs.get("covar_stds", None) is not None:
+            my_labels *= kwargs["covar_stds"][i]
+        if kwargs.get("covar_means", None) is not None:
+            my_labels += kwargs["covar_means"][i]
+        if kwargs.get("covar_encoders", None) is not None:
+            try:
+                my_labels = kwargs["covar_encoders"][i].inverse_transform(
+                    my_labels.astype(int)
+                )
+            except (AttributeError, TypeError, ValueError):
+                pass
+        print("=" * 20)
+        print(covar)
+        print("-" * 10)
+
+        for label in sorted(set(my_labels)):
+            label_idxs = my_labels == label
+            if (
+                kwargs.get("train_idx", None) is not None
+                and kwargs.get("test_idx", None) is not None
+            ):
+                my_train_idx = np.logical_and(label_idxs, kwargs["train_idx"])
+                my_test_idx = np.logical_and(label_idxs, kwargs["test_idx"])
+                train_roc = get_roc(Y_true[my_train_idx], Y_pred[my_train_idx])
+                test_roc = get_roc(Y_true[my_test_idx], Y_pred[my_test_idx])
+                print(
+                    f"{label}:\t Train ROC: {train_roc:.2f}, Test ROC: {test_roc:.2f}"
+                )
+            else:
+                overall_roc = get_roc(Y_true[label_idxs], Y_pred[label_idxs])
+                print(f"{label}:\t ROC: {overall_roc:.2f}")
+        print("=" * 20)