From 340f5d3229ea5fecc331abed86e5b3b1b9366247 Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Mon, 16 Jul 2012 23:46:08 -0400
Subject: [PATCH 01/33] MISC: updating .gitignore

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index d7f1163..1caac9b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,6 @@
 __pycache__
 .idea
 build
+*.DS_Store
+*~
+.*swp

From fa1369250346b9eb3dc8950f302a0a0613503eb2 Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Tue, 17 Jul 2012 12:20:48 -0400
Subject: [PATCH 02/33] ENH: added d-prime calculation from a confusion matrix

---
 bangmetric/dprime.py | 80 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 79 insertions(+), 1 deletion(-)

diff --git a/bangmetric/dprime.py b/bangmetric/dprime.py
index a80fcf2..2b757b9 100644
--- a/bangmetric/dprime.py
+++ b/bangmetric/dprime.py
@@ -2,13 +2,18 @@
 
 # Authors: Nicolas Pinto <nicolas.pinto@gmail.com>
 #          Nicolas Poilvert <nicolas.poilvert@gmail.com>
+#          Ha Hong <hahong84@gmail.com>
 #
 # License: BSD
 
-__all__ = ['dprime']
+__all__ = ['dprime', 'dprime_ova_from_confusion']
 
 import numpy as np
+from scipy.stats import norm
 
+DEFAULT_FUDGE_FACTOR = 0.5
+DEFAULT_FUDGE_MODE = 'correction'
+ATOL = 1e-7
 
 def dprime(y_pred, y_true):
     """Computes the d-prime sensitivity index of the predictions.
@@ -61,3 +66,76 @@ def dprime(y_pred, y_true):
         dp = num / div
 
     return dp
+
+
+def dprime_ova_from_confusion(M, fudge_mode=DEFAULT_FUDGE_MODE, \
+        fudge_fac=DEFAULT_FUDGE_FACTOR, atol=ATOL):
+    """Computes the one-vs-all d-prime sensitivity index of the confusion matrix.
+
+    Parameters
+    ----------
+    M: array, shape = [n_classes (true), n_classes (pred)] 
+        Confusion matrix, where the element M_{rc} means the number of
+        times when the classifier guesses that a test sample in the r-th class
+        belongs to the c-th class.
+
+    fudge_fac: float, optional
+        A small factor to avoid non-finite numbers when TPR or FPR becomes 0 or 1.
+
+    fudge_mode: str, optional
+        Determins how to apply the fudge factor
+            'always': always apply the fudge factor 
+            'correction': apply only when needed
+
+    atol: float, optional
+        Tolerance to simplify the dp from a  2-way (i.e., 2x2) confusion matrix.
+
+    Returns
+    -------
+    dp: array, shape = [n_classes]
+        Array of d-primes, each element corresponding to each class
+
+    References
+    ----------
+    http://en.wikipedia.org/wiki/D'
+    http://en.wikipedia.org/wiki/Confusion_matrix
+
+    XXX: no normalization for unbalanced data
+    """
+
+    M = np.array(M)
+    assert M.ndim == 2
+    assert M.shape[0] == M.shape[1]
+    
+    P = np.sum(M, axis=1)   # number of positives, for each class
+    N = np.sum(P) - P
+
+    TP = np.diag(M)
+    FP = np.sum(M, axis=0) - TP
+
+    if fudge_mode == 'always':    # always apply fudge factor
+        TPR = (TP.astype('float') + fudge_fac) / (P + 2.*fudge_fac)
+        FPR = (FP.astype('float') + fudge_fac) / (N + 2.*fudge_fac)
+
+    elif fudge_mode == 'correction':   # apply fudge factor only when needed
+        TP = TP.astype('float')
+        FP = FP.astype('float')
+
+        TP[TP == P] = P[TP == P] - fudge_fac    # 100% correct
+        TP[TP == 0] = fudge_fac                 # 0% correct
+        FP[FP == N] = N[FP == N] - fudge_fac    # always FAR
+        FP[FP == 0] = fudge_fac                 # no false alarm
+
+        TPR = TP / P
+        FPR = FP / N
+
+    else:
+        assert False, 'Not implemented'
+
+    dp = norm.ppf(TPR) - norm.ppf(FPR)
+    # if there's only two dp's then, it's must be "A" vs. "~A" task.  If so, just give one value
+    if len(dp) == 2 and np.abs(dp[0] - dp[1]) < atol:
+        dp = np.array([dp[0]])
+
+    return dp
+

From 0b00116c8c9d8f756912057b70515fe6f166a9aa Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Tue, 17 Jul 2012 16:39:46 -0400
Subject: [PATCH 03/33] MISC: small cosmetics changes and assertions to check
 positives and negatives

---
 bangmetric/dprime.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/bangmetric/dprime.py b/bangmetric/dprime.py
index 8400be0..bd15b87 100644
--- a/bangmetric/dprime.py
+++ b/bangmetric/dprime.py
@@ -15,6 +15,7 @@
 DEFAULT_FUDGE_MODE = 'correction'
 ATOL = 1e-7
 
+
 def dprime(y_pred, y_true):
     """Computes the d-prime sensitivity index of the predictions.
 
@@ -52,6 +53,10 @@ def dprime(y_pred, y_true):
     # -- actual computation
     pos = y_true > 0
     neg = ~pos
+
+    assert pos.sum() > 1, 'Not enough positives to estimate the variance'
+    assert neg.sum() > 1, 'Not enough negatives to estimate the variance'
+
     pos_mean = y_pred[pos].mean()
     neg_mean = y_pred[neg].mean()
     pos_var = y_pred[pos].var(ddof=1)
@@ -67,7 +72,7 @@ def dprime(y_pred, y_true):
     return dp
 
 
-def dprime_ova_from_confusion(M, fudge_mode=DEFAULT_FUDGE_MODE, \
+def dprime_from_confusion_ova(M, fudge_mode=DEFAULT_FUDGE_MODE, \
         fudge_fac=DEFAULT_FUDGE_FACTOR, atol=ATOL):
     """Computes the one-vs-all d-prime sensitivity index of the confusion matrix.
 

From 69f09742023a77f717ad30b9ad48362b0422d494 Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Wed, 18 Jul 2012 01:40:21 -0400
Subject: [PATCH 04/33] ENH: added d-prime calcualtion function that directly
 takes sample values (+ minor changes) --- tests should be added

---
 bangmetric/dprime.py | 89 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 74 insertions(+), 15 deletions(-)

diff --git a/bangmetric/dprime.py b/bangmetric/dprime.py
index bd15b87..3e5ac94 100644
--- a/bangmetric/dprime.py
+++ b/bangmetric/dprime.py
@@ -6,17 +6,17 @@
 #
 # License: BSD
 
-__all__ = ['dprime', 'dprime_ova_from_confusion']
+__all__ = ['dprime', 'dprime_from_confusion_ova']
 
 import numpy as np
 from scipy.stats import norm
 
 DEFAULT_FUDGE_FACTOR = 0.5
 DEFAULT_FUDGE_MODE = 'correction'
-ATOL = 1e-7
+ATOL = 1e-6
 
 
-def dprime(y_pred, y_true):
+def dprime(y_pred, y_true, **kwargs):
     """Computes the d-prime sensitivity index of the predictions.
 
     Parameters
@@ -29,10 +29,14 @@ def dprime(y_pred, y_true):
     y_pred: array, shape = [n_samples]
         Predicted values (real).
 
+    kwargs: named arguments, optional
+        Passed to ``dprime_from_samp()``.
+
     Returns
     -------
     dp: float or None
-        d-prime, None if d-prime is undefined
+        d-prime, None if d-prime is undefined and raw d-prime value (``safedp=False``)
+        is not requested (default).
 
     References
     ----------
@@ -51,23 +55,78 @@ def dprime(y_pred, y_true):
     assert y_pred.ndim == 1
 
     # -- actual computation
-    pos = y_true > 0
-    neg = ~pos
+    i_pos = y_true > 0
+    i_neg = ~i_pos
+
+    pos = y_pred[i_pos]
+    neg = y_pred[i_neg]
+
+    dp = dprime_from_samp(pos, neg, bypass_nchk=True, **kwargs)
+    return dp
+
+
+def dprime_from_samp(pos, neg, maxv=None, minv=None, safedp=True, bypass_nchk=False):
+    """Computes the d-prime sensitivity index from positive and negative samples.
+
+    Parameters
+    ----------
+    pos: array-like
+        Positive sample values (e.g., raw projection values of the positive classifier).
+
+    neg: array-like
+        Negative sample values.
+
+    maxv: float, optional
+        Maximum possible d-prime value. If None (default), there's no limit on
+        the maximum value.
+
+    minv: float, optional
+        Minimum possible d-prime value. If None (default), there's no limit.
+
+    safedp: bool, optional
+        If True (default), this function will return None if the resulting d-prime 
+        value becomes non-finite.
+
+    bypass_nchk: bool, optional
+        If False (default), do not bypass the test to ensure that enough positive 
+        and negatives samples are there for the variance estimation.
 
-    assert pos.sum() > 1, 'Not enough positives to estimate the variance'
-    assert neg.sum() > 1, 'Not enough negatives to estimate the variance'
+    Returns
+    -------
+    dp: float or None
+        d-prime, None if d-prime is undefined and raw d-prime value (``safedp=False``)
+        is not requested (default).
+
+    References
+    ----------
+    http://en.wikipedia.org/wiki/D'
+    """
+
+    pos = np.array(pos)
+    neg = np.array(neg)
 
-    pos_mean = y_pred[pos].mean()
-    neg_mean = y_pred[neg].mean()
-    pos_var = y_pred[pos].var(ddof=1)
-    neg_var = y_pred[neg].var(ddof=1)
+    if not bypass_nchk:
+        assert pos.size > 1, 'Not enough positive samples to estimate the variance'
+        assert neg.size > 1, 'Not enough negative samples to estimate the variance'
+
+    pos_mean = pos.mean()
+    neg_mean = neg.mean()
+    pos_var = pos.var(ddof=1)
+    neg_var = neg.var(ddof=1)
 
     num = pos_mean - neg_mean
     div = np.sqrt((pos_var + neg_var) / 2.)
-    if div == 0:
+
+    # from Dan's suggestion about clipping d' values...
+    if maxv is None:
+        maxv = np.inf
+    if minv is None:
+        minv = -np.inf
+
+    dp = np.clip(num / div, minv, maxv)
+
+    if safedp and not np.isfinite(dp):
         dp = None
-    else:
-        dp = num / div
 
     return dp
 

From 43cabf5ddeee7880f194cb75cd3f529eee3d3750 Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Wed, 18 Jul 2012 12:57:43 -0400
Subject: [PATCH 05/33] MISC: small chanages for 2x2 confusion matrix d'
 calculation

---
 bangmetric/dprime.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/bangmetric/dprime.py b/bangmetric/dprime.py
index 3e5ac94..eee5198 100644
--- a/bangmetric/dprime.py
+++ b/bangmetric/dprime.py
@@ -13,7 +13,6 @@
 
 DEFAULT_FUDGE_FACTOR = 0.5
 DEFAULT_FUDGE_MODE = 'correction'
-ATOL = 1e-6
 
 
 def dprime(y_pred, y_true, **kwargs):
@@ -132,7 +131,7 @@ def dprime_from_samp(pos, neg, maxv=None, minv=None, safedp=True, bypass_nchk=Fa
 
 
 def dprime_from_confusion_ova(M, fudge_mode=DEFAULT_FUDGE_MODE, \
-        fudge_fac=DEFAULT_FUDGE_FACTOR, atol=ATOL):
+        fudge_fac=DEFAULT_FUDGE_FACTOR):
     """Computes the one-vs-all d-prime sensitivity index of the confusion matrix.
 
     Parameters
@@ -150,9 +149,6 @@ def dprime_from_confusion_ova(M, fudge_mode=DEFAULT_FUDGE_MODE, \
             'always': always apply the fudge factor 
             'correction': apply only when needed
 
-    atol: float, optional
-        Tolerance to simplify the dp from a  2-way (i.e., 2x2) confusion matrix.
-
     Returns
     -------
     dp: array, shape = [n_classes]
@@ -197,7 +193,7 @@ def dprime_from_confusion_ova(M, fudge_mode=DEFAULT_FUDGE_MODE, \
 
     dp = norm.ppf(TPR) - norm.ppf(FPR)
     # if there's only two dp's then, it's must be "A" vs. "~A" task.  If so, just give one value
-    if len(dp) == 2 and np.abs(dp[0] - dp[1]) < atol:
+    if len(dp) == 2:
         dp = np.array([dp[0]])
 
     return dp

From 5f8f071451d85d7ca17e8000f67913f456501dd4 Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Wed, 18 Jul 2012 12:59:39 -0400
Subject: [PATCH 06/33] MISC: no need to "balance" data for d' calculation

---
 bangmetric/dprime.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bangmetric/dprime.py b/bangmetric/dprime.py
index eee5198..32d4799 100644
--- a/bangmetric/dprime.py
+++ b/bangmetric/dprime.py
@@ -158,8 +158,6 @@ def dprime_from_confusion_ova(M, fudge_mode=DEFAULT_FUDGE_MODE, \
     ----------
     http://en.wikipedia.org/wiki/D'
     http://en.wikipedia.org/wiki/Confusion_matrix
-
-    XXX: no normalization for unbalanced data
     """
 
     M = np.array(M)

From d6a9cf68ac141dc6dc92b377882317ee075f08eb Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Wed, 18 Jul 2012 23:48:46 -0400
Subject: [PATCH 07/33] MISC: addressing most stuffs in
 github.com/npinto/bangmetric/pull/8 (thanks @npinto!)

---
 bangmetric/dprime.py | 98 ++++++++++++++++++++++----------------------
 1 file changed, 50 insertions(+), 48 deletions(-)

diff --git a/bangmetric/dprime.py b/bangmetric/dprime.py
index 32d4799..ee3ff08 100644
--- a/bangmetric/dprime.py
+++ b/bangmetric/dprime.py
@@ -6,7 +6,7 @@
 #
 # License: BSD
 
-__all__ = ['dprime', 'dprime_from_confusion_ova']
+__all__ = ['dprime', 'dprime_from_samp', 'dprime_from_confusion_ova']
 
 import numpy as np
 from scipy.stats import norm
@@ -33,9 +33,8 @@ def dprime(y_pred, y_true, **kwargs):
 
     Returns
     -------
-    dp: float or None
-        d-prime, None if d-prime is undefined and raw d-prime value (``safedp=False``)
-        is not requested (default).
+    dp: float
+        d-prime
 
     References
     ----------
@@ -60,11 +59,11 @@ def dprime(y_pred, y_true, **kwargs):
     pos = y_pred[i_pos]
     neg = y_pred[i_neg]
 
-    dp = dprime_from_samp(pos, neg, bypass_nchk=True, **kwargs)
+    dp = dprime_from_samp(pos, neg, **kwargs)
     return dp
 
 
-def dprime_from_samp(pos, neg, maxv=None, minv=None, safedp=True, bypass_nchk=False):
+def dprime_from_samp(pos, neg, max_value=np.inf, min_value=-np.inf):
     """Computes the d-prime sensitivity index from positive and negative samples.
 
     Parameters
@@ -75,26 +74,16 @@ def dprime_from_samp(pos, neg, maxv=None, minv=None, safedp=True, bypass_nchk=Fa
     neg: array-like
         Negative sample values.
 
-    maxv: float, optional
-        Maximum possible d-prime value. If None (default), there's no limit on
-        the maximum value.
+    max_value: float, optional
+        Maximum possible d-prime value. Default is ``np.inf``.
 
-    minv: float, optional
-        Minimum possible d-prime value. If None (default), there's no limit.
-
-    safedp: bool, optional
-        If True (default), this function will return None if the resulting d-prime 
-        value becomes non-finite.
-
-    bypass_nchk: bool, optional
-        If False (default), do not bypass the test to ensure that enough positive 
-        and negatives samples are there for the variance estimation.
+    min_value: float, optional
+        Minimum possible d-prime value. Default is ``-np.inf``.
 
     Returns
     -------
-    dp: float or None
-        d-prime, None if d-prime is undefined and raw d-prime value (``safedp=False``)
-        is not requested (default).
+    dp: float
+        d-prime
 
     References
     ----------
@@ -104,9 +93,10 @@ def dprime_from_samp(pos, neg, maxv=None, minv=None, safedp=True, bypass_nchk=Fa
     pos = np.array(pos)
     neg = np.array(neg)
 
-    if not bypass_nchk:
-        assert pos.size > 1, 'Not enough positive samples to estimate the variance'
-        assert neg.size > 1, 'Not enough negative samples to estimate the variance'
+    if pos.size <= 1:
+        raise ValueError('Not enough positive samples to estimate the variance')
+    if neg.size <= 1:
+        raise ValueError('Not enough negative samples to estimate the variance')
 
     pos_mean = pos.mean()
     neg_mean = neg.mean()
@@ -117,22 +107,16 @@ def dprime_from_samp(pos, neg, maxv=None, minv=None, safedp=True, bypass_nchk=Fa
     div = np.sqrt((pos_var + neg_var) / 2.)
 
     # from Dan's suggestion about clipping d' values...
-    if maxv is None:
-        maxv = np.inf
-    if minv is None:
-        minv = -np.inf
-
-    dp = np.clip(num / div, minv, maxv)
-
-    if safedp and not np.isfinite(dp):
-        dp = None
+    dp = np.clip(num / div, min_value, max_value)
 
     return dp
 
 
 def dprime_from_confusion_ova(M, fudge_mode=DEFAULT_FUDGE_MODE, \
-        fudge_fac=DEFAULT_FUDGE_FACTOR):
+        fudge_factor=DEFAULT_FUDGE_FACTOR, max_value=np.inf, min_value=-np.inf):
     """Computes the one-vs-all d-prime sensitivity index of the confusion matrix.
+    This function is mostly for when there is no access to internal representation 
+    and/or decision making (like human data).
 
     Parameters
     ----------
@@ -141,13 +125,21 @@ def dprime_from_confusion_ova(M, fudge_mode=DEFAULT_FUDGE_MODE, \
         times when the classifier guesses that a test sample in the r-th class
         belongs to the c-th class.
 
-    fudge_fac: float, optional
+    fudge_factor: float, optional
         A small factor to avoid non-finite numbers when TPR or FPR becomes 0 or 1.
 
     fudge_mode: str, optional
-        Determins how to apply the fudge factor
+        Determins how to apply the fudge factor.  Can be one of:
             'always': always apply the fudge factor 
             'correction': apply only when needed
+            'none': no fudging --- equivalent to ``fudge_factor=0``
+
+    max_value: float, optional
+        Maximum possible d-prime value. Default is ``np.inf``.
+
+    min_value: float, optional
+        Minimum possible d-prime value. Default is ``-np.inf``.
+
 
     Returns
     -------
@@ -170,26 +162,36 @@ def dprime_from_confusion_ova(M, fudge_mode=DEFAULT_FUDGE_MODE, \
     TP = np.diag(M)
     FP = np.sum(M, axis=0) - TP
 
-    if fudge_mode == 'always':    # always apply fudge factor
-        TPR = (TP.astype('float') + fudge_fac) / (P + 2.*fudge_fac)
-        FPR = (FP.astype('float') + fudge_fac) / (N + 2.*fudge_fac)
+
+    # -- application of fudge factor
+
+    if fudge_mode == 'none':           # no fudging
+        fudge_mode = 'always'
+        fudge_factor = 0
+
+    if fudge_mode == 'always':         # always apply fudge factor
+        TPR = (TP.astype('float64') + fudge_factor) / (P + 2.*fudge_factor)
+        FPR = (FP.astype('float64') + fudge_factor) / (N + 2.*fudge_factor)
 
     elif fudge_mode == 'correction':   # apply fudge factor only when needed
-        TP = TP.astype('float')
-        FP = FP.astype('float')
+        TP = TP.astype('float64')
+        FP = FP.astype('float64')
 
-        TP[TP == P] = P[TP == P] - fudge_fac    # 100% correct
-        TP[TP == 0] = fudge_fac                 # 0% correct
-        FP[FP == N] = N[FP == N] - fudge_fac    # always FAR
-        FP[FP == 0] = fudge_fac                 # no false alarm
+        TP[TP == P] = P[TP == P] - fudge_factor    # 100% correct
+        TP[TP == 0] = fudge_factor                 # 0% correct
+        FP[FP == N] = N[FP == N] - fudge_factor    # always FAR
+        FP[FP == 0] = fudge_factor                 # no false alarm
 
         TPR = TP / P
         FPR = FP / N
 
     else:
-        assert False, 'Not implemented'
+        raise ValueError('Invalid fudge_mode')
+
+
+    # -- done. compute the d'
 
-    dp = norm.ppf(TPR) - norm.ppf(FPR)
+    dp = np.clip(norm.ppf(TPR) - norm.ppf(FPR), min_value, max_value)
     # if there's only two dp's then, it's must be "A" vs. "~A" task.  If so, just give one value
     if len(dp) == 2:
         dp = np.array([dp[0]])

From 6f5cbac75972f0c06ad035f72dbdec67b9cee5f8 Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Wed, 18 Jul 2012 23:56:14 -0400
Subject: [PATCH 08/33] DOC: small retouches

---
 bangmetric/dprime.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/bangmetric/dprime.py b/bangmetric/dprime.py
index ee3ff08..147429a 100644
--- a/bangmetric/dprime.py
+++ b/bangmetric/dprime.py
@@ -127,11 +127,12 @@ def dprime_from_confusion_ova(M, fudge_mode=DEFAULT_FUDGE_MODE, \
 
     fudge_factor: float, optional
         A small factor to avoid non-finite numbers when TPR or FPR becomes 0 or 1.
+        Default is 0.5.
 
     fudge_mode: str, optional
         Determins how to apply the fudge factor.  Can be one of:
-            'always': always apply the fudge factor 
-            'correction': apply only when needed
+            'correction': apply only when needed (default)
+            'always': always apply the fudge factor
             'none': no fudging --- equivalent to ``fudge_factor=0``
 
     max_value: float, optional
@@ -144,7 +145,7 @@ def dprime_from_confusion_ova(M, fudge_mode=DEFAULT_FUDGE_MODE, \
     Returns
     -------
     dp: array, shape = [n_classes]
-        Array of d-primes, each element corresponding to each class
+        Array of d-primes, where each element corresponds to each class
 
     References
     ----------

From afa86fe0dca8b842bb6c2328a5b05696849377c6 Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Thu, 19 Jul 2012 00:02:58 -0400
Subject: [PATCH 09/33] COSMIT

---
 bangmetric/dprime.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/bangmetric/dprime.py b/bangmetric/dprime.py
index 147429a..46fa009 100644
--- a/bangmetric/dprime.py
+++ b/bangmetric/dprime.py
@@ -162,37 +162,34 @@ def dprime_from_confusion_ova(M, fudge_mode=DEFAULT_FUDGE_MODE, \
 
     TP = np.diag(M)
     FP = np.sum(M, axis=0) - TP
-
+    TP = TP.astype('float64')
+    FP = FP.astype('float64')
 
     # -- application of fudge factor
-
     if fudge_mode == 'none':           # no fudging
         fudge_mode = 'always'
         fudge_factor = 0
 
     if fudge_mode == 'always':         # always apply fudge factor
-        TPR = (TP.astype('float64') + fudge_factor) / (P + 2.*fudge_factor)
-        FPR = (FP.astype('float64') + fudge_factor) / (N + 2.*fudge_factor)
+        TP += fudge_factor
+        FP += fudge_factor
+        P += 2.*fudge_factor
+        N += 2.*fudge_factor
 
     elif fudge_mode == 'correction':   # apply fudge factor only when needed
-        TP = TP.astype('float64')
-        FP = FP.astype('float64')
-
         TP[TP == P] = P[TP == P] - fudge_factor    # 100% correct
         TP[TP == 0] = fudge_factor                 # 0% correct
         FP[FP == N] = N[FP == N] - fudge_factor    # always FAR
         FP[FP == 0] = fudge_factor                 # no false alarm
 
-        TPR = TP / P
-        FPR = FP / N
-
     else:
         raise ValueError('Invalid fudge_mode')
 
-
     # -- done. compute the d'
-
+    TPR = TP / P
+    FPR = FP / N
     dp = np.clip(norm.ppf(TPR) - norm.ppf(FPR), min_value, max_value)
+
     # if there's only two dp's then, it's must be "A" vs. "~A" task.  If so, just give one value
     if len(dp) == 2:
         dp = np.array([dp[0]])

From 8babb28e085c1da481223581aba5610f54a307ae Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Thu, 19 Jul 2012 00:05:47 -0400
Subject: [PATCH 10/33] COSMIT

---
 bangmetric/dprime.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/bangmetric/dprime.py b/bangmetric/dprime.py
index 46fa009..9568d98 100644
--- a/bangmetric/dprime.py
+++ b/bangmetric/dprime.py
@@ -167,10 +167,9 @@ def dprime_from_confusion_ova(M, fudge_mode=DEFAULT_FUDGE_MODE, \
 
     # -- application of fudge factor
     if fudge_mode == 'none':           # no fudging
-        fudge_mode = 'always'
-        fudge_factor = 0
+        pass
 
-    if fudge_mode == 'always':         # always apply fudge factor
+    elif fudge_mode == 'always':       # always apply fudge factor
         TP += fudge_factor
         FP += fudge_factor
         P += 2.*fudge_factor

From d6ab20be4e19fff4c8d6e5910de92964c2d5cfa1 Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Thu, 19 Jul 2012 01:54:31 -0400
Subject: [PATCH 11/33] ENH: more general dprime_from_confusion (thanks,
 @npinto!)

---
 bangmetric/dprime.py | 55 +++++++++++++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 16 deletions(-)

diff --git a/bangmetric/dprime.py b/bangmetric/dprime.py
index 9568d98..00f496e 100644
--- a/bangmetric/dprime.py
+++ b/bangmetric/dprime.py
@@ -6,7 +6,7 @@
 #
 # License: BSD
 
-__all__ = ['dprime', 'dprime_from_samp', 'dprime_from_confusion_ova']
+__all__ = ['dprime', 'dprime_from_samp', 'dprime_from_confusion']
 
 import numpy as np
 from scipy.stats import norm
@@ -112,7 +112,7 @@ def dprime_from_samp(pos, neg, max_value=np.inf, min_value=-np.inf):
     return dp
 
 
-def dprime_from_confusion_ova(M, fudge_mode=DEFAULT_FUDGE_MODE, \
+def dprime_from_confusion(M, collation=None, fudge_mode=DEFAULT_FUDGE_MODE, \
         fudge_factor=DEFAULT_FUDGE_FACTOR, max_value=np.inf, min_value=-np.inf):
     """Computes the one-vs-all d-prime sensitivity index of the confusion matrix.
     This function is mostly for when there is no access to internal representation 
@@ -120,11 +120,23 @@ def dprime_from_confusion_ova(M, fudge_mode=DEFAULT_FUDGE_MODE, \
 
     Parameters
     ----------
-    M: array, shape = [n_classes (true), n_classes (pred)] 
+    M: array-like, shape = [n_classes (true), n_classes (pred)] 
         Confusion matrix, where the element M_{rc} means the number of
         times when the classifier guesses that a test sample in the r-th class
         belongs to the c-th class.
 
+    collation: None (default) or array-like with shape = [n_grouping, n_classes]
+        Defines how to group entries in `M` to compute TPR and FPR.  
+        Entries shoule be {+1, 0, -1}.  A row defines one instance of grouping,
+        where +1, -1, and 0 designate the corresponding class as a
+        positive, negative, and ignored class, respectively.  For example, 
+        the following `collation` defines a 3-way one vs. rest grouping 
+        (given that `M` is a 3x3 matrix):
+            [[+1, -1, -1],
+             [-1, +1, -1],
+             [-1, -1, +1]]
+        If `None` (default), one vs. rest grouping is assumed.
+
     fudge_factor: float, optional
         A small factor to avoid non-finite numbers when TPR or FPR becomes 0 or 1.
         Default is 0.5.
@@ -144,8 +156,9 @@ def dprime_from_confusion_ova(M, fudge_mode=DEFAULT_FUDGE_MODE, \
 
     Returns
     -------
-    dp: array, shape = [n_classes]
-        Array of d-primes, where each element corresponds to each class
+    dp: array, shape = [n_grouping]
+        Array of d-primes, where each element corresponds to each grouping
+        defined by `collation`.
 
     References
     ----------
@@ -153,17 +166,31 @@ def dprime_from_confusion_ova(M, fudge_mode=DEFAULT_FUDGE_MODE, \
     http://en.wikipedia.org/wiki/Confusion_matrix
     """
 
+    # M: confusion matrix, row means true classes, col means predicted classes
     M = np.array(M)
     assert M.ndim == 2
     assert M.shape[0] == M.shape[1]
-    
-    P = np.sum(M, axis=1)   # number of positives, for each class
-    N = np.sum(P) - P
+    n_classes = M.shape[0]
 
-    TP = np.diag(M)
-    FP = np.sum(M, axis=0) - TP
-    TP = TP.astype('float64')
-    FP = FP.astype('float64')
+    if collation is None:    
+        # make it one vs. rest
+        collation = -np.ones((n_classes, n_classes), dtype='int8')
+        collation += 2 * np.eye(n_classes, dtype='int8')
+    else:
+        collation = np.array(collation, dtype='int8')
+        assert collation.ndim == 2
+        assert collation.shape[1] == n_classes
+    
+    # P0: number of positives, for each class
+    # P: number of positives, for each grouping
+    # N: number of negatives, for each grouping
+    # TP: number of true positives, for each grouping
+    # FP: number of false positives, for each grouping
+    P0 = np.sum(M, axis=1)   
+    P = np.array([np.sum(P0[coll == +1]) for coll in collation], dtype='float64')
+    N = np.array([np.sum(P0[coll == -1]) for coll in collation], dtype='float64')
+    TP = np.array([np.sum(M[coll == +1][:, coll == +1]) for coll in collation], dtype='float64')
+    FP = np.array([np.sum(M[coll == -1][:, coll == +1]) for coll in collation], dtype='float64')
 
     # -- application of fudge factor
     if fudge_mode == 'none':           # no fudging
@@ -189,9 +216,5 @@ def dprime_from_confusion_ova(M, fudge_mode=DEFAULT_FUDGE_MODE, \
     FPR = FP / N
     dp = np.clip(norm.ppf(TPR) - norm.ppf(FPR), min_value, max_value)
 
-    # if there's only two dp's then, it's must be "A" vs. "~A" task.  If so, just give one value
-    if len(dp) == 2:
-        dp = np.array([dp[0]])
-
     return dp
 

From 60814d8f5c72884a18eab577d4360b06d65cdd58 Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Thu, 19 Jul 2012 15:17:16 -0400
Subject: [PATCH 12/33] COSMIT

---
 bangmetric/dprime.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/bangmetric/dprime.py b/bangmetric/dprime.py
index 00f496e..8d8521c 100644
--- a/bangmetric/dprime.py
+++ b/bangmetric/dprime.py
@@ -114,7 +114,7 @@ def dprime_from_samp(pos, neg, max_value=np.inf, min_value=-np.inf):
 
 def dprime_from_confusion(M, collation=None, fudge_mode=DEFAULT_FUDGE_MODE, \
         fudge_factor=DEFAULT_FUDGE_FACTOR, max_value=np.inf, min_value=-np.inf):
-    """Computes the one-vs-all d-prime sensitivity index of the confusion matrix.
+    """Computes the d-prime sensitivity index of the confusion matrix.
     This function is mostly for when there is no access to internal representation 
     and/or decision making (like human data).
 
@@ -125,7 +125,7 @@ def dprime_from_confusion(M, collation=None, fudge_mode=DEFAULT_FUDGE_MODE, \
         times when the classifier guesses that a test sample in the r-th class
         belongs to the c-th class.
 
-    collation: None (default) or array-like with shape = [n_grouping, n_classes]
+    collation: None or array-like with shape = [n_groupings, n_classes], optional
         Defines how to group entries in `M` to compute TPR and FPR.  
         Entries shoule be {+1, 0, -1}.  A row defines one instance of grouping,
         where +1, -1, and 0 designate the corresponding class as a
@@ -156,7 +156,7 @@ def dprime_from_confusion(M, collation=None, fudge_mode=DEFAULT_FUDGE_MODE, \
 
     Returns
     -------
-    dp: array, shape = [n_grouping]
+    dp: array, shape = [n_groupings]
         Array of d-primes, where each element corresponds to each grouping
         defined by `collation`.
 

From 056aa5e3f3d8918f18890bcb55a667276a85442d Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Thu, 19 Jul 2012 16:38:57 -0400
Subject: [PATCH 13/33] ENH: refactoring out a function that computes stats of
 a confu matrix.

---
 bangmetric/utils.py | 130 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 130 insertions(+)
 create mode 100644 bangmetric/utils.py

diff --git a/bangmetric/utils.py b/bangmetric/utils.py
new file mode 100644
index 0000000..2ae01c3
--- /dev/null
+++ b/bangmetric/utils.py
@@ -0,0 +1,130 @@
+"""Other utility functions"""
+
+# Authors: Ha Hong <hahong84@gmail.com>
+#
+# License: BSD
+
+__all__ = ['confusion_stats']
+
+import numpy as np
+
+DEFAULT_FUDGE_FACTOR = 0.5
+DEFAULT_FUDGE_MODE = 'correction'
+
+
+def confusion_stats(M, collation=None, \
+        fudge_mode=DEFAULT_FUDGE_MODE, fudge_factor=DEFAULT_FUDGE_FACTOR):
+    """Computes classification statistics of sub-confusion matrices inside 
+    the given original confusion matrix M.  If no ``collation`` is given,
+    statistics for each one vs. rest sub-confusion matrix will be computed.
+
+    Parameters
+    ----------
+    M: array-like, shape = [n_classes (true), n_classes (pred)] 
+        Confusion matrix, where the element M_{rc} means the number of
+        times when the classifier guesses that a test sample in the r-th class
+        belongs to the c-th class.
+
+    collation: None or array-like with shape = [n_groupings, n_classes], optional
+        Defines how to group entries in `M` to compute TPR and FPR.  
+        Entries shoule be {+1, 0, -1}.  A row defines one instance of grouping,
+        where +1, -1, and 0 designate the corresponding class as a
+        positive, negative, and ignored class, respectively.  For example, 
+        the following `collation` defines a 3-way one vs. rest grouping 
+        (given that `M` is a 3x3 matrix):
+            [[+1, -1, -1],
+             [-1, +1, -1],
+             [-1, -1, +1]]
+        If `None` (default), one vs. rest grouping is assumed.
+
+    fudge_factor: float, optional
+        A small factor to avoid non-finite numbers when TPR or FPR becomes 0 or 1.
+        Default is 0.5.
+
+    fudge_mode: str, optional
+        Determins how to apply the fudge factor.  Can be one of:
+            'correction': apply only when needed (default)
+            'always': always apply the fudge factor
+            'none': no fudging --- equivalent to ``fudge_factor=0``
+
+
+    Returns
+    -------
+    P: array, shape = [n_groupings]
+        Array of the number of positives, where each element corresponds to each 
+        grouping defined by `collation`.
+    N: array, shape = [n_groupings]
+        Same as P, except that this is an array of the number of negatives.
+    TP: array, shape = [n_groupings]
+        Same as P, except that this is an array of the number of true positives.
+    TN: array, shape = [n_groupings]
+        Same as P, except that this is an array of the number of true negatives.
+    FP: array, shape = [n_groupings]
+        Same as P, except that this is an array of the number of false positives.
+    FN: array, shape = [n_groupings]
+        Same as P, except that this is an array of the number of false negatives.
+
+
+    References
+    ----------
+    http://en.wikipedia.org/wiki/Confusion_matrix
+    http://en.wikipedia.org/wiki/Receiver_operating_characteristic
+    """
+
+    # M: confusion matrix, row means true classes, col means predicted classes
+    M = np.array(M)
+    assert M.ndim == 2
+    assert M.shape[0] == M.shape[1]
+    n_classes = M.shape[0]
+
+    if collation is None:    
+        # make it one vs. rest
+        collation = -np.ones((n_classes, n_classes), dtype='int8')
+        collation += 2 * np.eye(n_classes, dtype='int8')
+    else:
+        collation = np.array(collation, dtype='int8')
+        assert collation.ndim == 2
+        assert collation.shape[1] == n_classes
+    
+    # P0: number of positives, for each class
+    # P: number of positives, for each grouping
+    # N: number of negatives, for each grouping
+    # TP: number of true positives, for each grouping
+    # FP: number of false positives, for each grouping
+    P0 = np.sum(M, axis=1)   
+    P = np.array([np.sum(P0[coll == +1]) for coll in collation], dtype='float64')
+    N = np.array([np.sum(P0[coll == -1]) for coll in collation], dtype='float64')
+    TP = np.array([np.sum(M[coll == +1][:, coll == +1]) for coll in collation], dtype='float64')
+    TN = np.array([np.sum(M[coll == -1][:, coll == -1]) for coll in collation], dtype='float64')
+    FP = np.array([np.sum(M[coll == -1][:, coll == +1]) for coll in collation], dtype='float64')
+    FN = np.array([np.sum(M[coll == +1][:, coll == -1]) for coll in collation], dtype='float64')
+
+    # -- application of fudge factor
+    if fudge_mode == 'none':           # no fudging
+        pass
+
+    elif fudge_mode == 'always':       # always apply fudge factor
+        TP += fudge_factor
+        FP += fudge_factor
+        TN += fudge_factor
+        FN += fudge_factor
+        P += 2.*fudge_factor
+        N += 2.*fudge_factor
+
+    elif fudge_mode == 'correction':   # apply fudge factor only when needed
+        TP[TP == P] = P[TP == P] - fudge_factor    # 100% correct
+        TP[TP == 0] = fudge_factor                 # 0% correct
+        FP[FP == N] = N[FP == N] - fudge_factor    # always FAR
+        FP[FP == 0] = fudge_factor                 # no false alarm
+
+        TN[TN == N] = N[TN == N] - fudge_factor    
+        TN[TN == 0] = fudge_factor                 
+        FN[FN == P] = P[FN == P] - fudge_factor    
+        FN[FN == 0] = fudge_factor                 
+
+    else:
+        raise ValueError('Invalid fudge_mode')
+
+    # -- done
+    return P, N, TP, TN, FP, FN
+

From 396224ba48336b15e7f8cd6800c86049a040a927 Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Thu, 19 Jul 2012 16:41:25 -0400
Subject: [PATCH 14/33] COSMIT: refactoring confusion matrix handling part

---
 bangmetric/dprime.py | 89 +++++++-------------------------------------
 1 file changed, 14 insertions(+), 75 deletions(-)

diff --git a/bangmetric/dprime.py b/bangmetric/dprime.py
index 8d8521c..6b2f44d 100644
--- a/bangmetric/dprime.py
+++ b/bangmetric/dprime.py
@@ -10,9 +10,7 @@
 
 import numpy as np
 from scipy.stats import norm
-
-DEFAULT_FUDGE_FACTOR = 0.5
-DEFAULT_FUDGE_MODE = 'correction'
+from .utils import confusion_stats
 
 
 def dprime(y_pred, y_true, **kwargs):
@@ -112,11 +110,12 @@ def dprime_from_samp(pos, neg, max_value=np.inf, min_value=-np.inf):
     return dp
 
 
-def dprime_from_confusion(M, collation=None, fudge_mode=DEFAULT_FUDGE_MODE, \
-        fudge_factor=DEFAULT_FUDGE_FACTOR, max_value=np.inf, min_value=-np.inf):
-    """Computes the d-prime sensitivity index of the confusion matrix.
-    This function is mostly for when there is no access to internal representation 
-    and/or decision making (like human data).
+def dprime_from_confusion(M, max_value=np.inf, min_value=-np.inf, **kwargs):
+    """Computes the d-prime sensitivity index of the given confusion matrix.
+    This function is designed mostly for when there is no access to internal 
+    representations and/or decision making mechanisms (like human data).  
+    If no ``collation`` is defined in ``kwargs`` this function computes 
+    one vs. rest d-prime for each class.
 
     Parameters
     ----------
@@ -125,34 +124,17 @@ def dprime_from_confusion(M, collation=None, fudge_mode=DEFAULT_FUDGE_MODE, \
         times when the classifier guesses that a test sample in the r-th class
         belongs to the c-th class.
 
-    collation: None or array-like with shape = [n_groupings, n_classes], optional
-        Defines how to group entries in `M` to compute TPR and FPR.  
-        Entries shoule be {+1, 0, -1}.  A row defines one instance of grouping,
-        where +1, -1, and 0 designate the corresponding class as a
-        positive, negative, and ignored class, respectively.  For example, 
-        the following `collation` defines a 3-way one vs. rest grouping 
-        (given that `M` is a 3x3 matrix):
-            [[+1, -1, -1],
-             [-1, +1, -1],
-             [-1, -1, +1]]
-        If `None` (default), one vs. rest grouping is assumed.
-
-    fudge_factor: float, optional
-        A small factor to avoid non-finite numbers when TPR or FPR becomes 0 or 1.
-        Default is 0.5.
-
-    fudge_mode: str, optional
-        Determins how to apply the fudge factor.  Can be one of:
-            'correction': apply only when needed (default)
-            'always': always apply the fudge factor
-            'none': no fudging --- equivalent to ``fudge_factor=0``
-
     max_value: float, optional
         Maximum possible d-prime value. Default is ``np.inf``.
 
     min_value: float, optional
         Minimum possible d-prime value. Default is ``-np.inf``.
 
+    kwargs: named arguments, optional
+        Passed to ``confusion_stats()``.  By passing ``collation``, ``fudge_mode``,
+        ``fudge_factor``, etc. one can change the behavior of d-prime computation 
+        (see ``confusion_stats()`` for details). 
+
 
     Returns
     -------
@@ -167,51 +149,8 @@ def dprime_from_confusion(M, collation=None, fudge_mode=DEFAULT_FUDGE_MODE, \
     """
 
     # M: confusion matrix, row means true classes, col means predicted classes
-    M = np.array(M)
-    assert M.ndim == 2
-    assert M.shape[0] == M.shape[1]
-    n_classes = M.shape[0]
-
-    if collation is None:    
-        # make it one vs. rest
-        collation = -np.ones((n_classes, n_classes), dtype='int8')
-        collation += 2 * np.eye(n_classes, dtype='int8')
-    else:
-        collation = np.array(collation, dtype='int8')
-        assert collation.ndim == 2
-        assert collation.shape[1] == n_classes
-    
-    # P0: number of positives, for each class
-    # P: number of positives, for each grouping
-    # N: number of negatives, for each grouping
-    # TP: number of true positives, for each grouping
-    # FP: number of false positives, for each grouping
-    P0 = np.sum(M, axis=1)   
-    P = np.array([np.sum(P0[coll == +1]) for coll in collation], dtype='float64')
-    N = np.array([np.sum(P0[coll == -1]) for coll in collation], dtype='float64')
-    TP = np.array([np.sum(M[coll == +1][:, coll == +1]) for coll in collation], dtype='float64')
-    FP = np.array([np.sum(M[coll == -1][:, coll == +1]) for coll in collation], dtype='float64')
-
-    # -- application of fudge factor
-    if fudge_mode == 'none':           # no fudging
-        pass
-
-    elif fudge_mode == 'always':       # always apply fudge factor
-        TP += fudge_factor
-        FP += fudge_factor
-        P += 2.*fudge_factor
-        N += 2.*fudge_factor
-
-    elif fudge_mode == 'correction':   # apply fudge factor only when needed
-        TP[TP == P] = P[TP == P] - fudge_factor    # 100% correct
-        TP[TP == 0] = fudge_factor                 # 0% correct
-        FP[FP == N] = N[FP == N] - fudge_factor    # always FAR
-        FP[FP == 0] = fudge_factor                 # no false alarm
-
-    else:
-        raise ValueError('Invalid fudge_mode')
-
-    # -- done. compute the d'
+    P, N, TP, _, FP, _ = confusion_stats(M, **kwargs)
+
     TPR = TP / P
     FPR = FP / N
     dp = np.clip(norm.ppf(TPR) - norm.ppf(FPR), min_value, max_value)

From ad8e3afae5bf28d07f4fbd1353fc69cd380d6218 Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Thu, 19 Jul 2012 17:00:08 -0400
Subject: [PATCH 15/33] COSMIT

---
 bangmetric/utils.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/bangmetric/utils.py b/bangmetric/utils.py
index 2ae01c3..a687aa6 100644
--- a/bangmetric/utils.py
+++ b/bangmetric/utils.py
@@ -26,7 +26,7 @@ def confusion_stats(M, collation=None, \
         belongs to the c-th class.
 
     collation: None or array-like with shape = [n_groupings, n_classes], optional
-        Defines how to group entries in `M` to compute TPR and FPR.  
+        Defines how to group entries in `M` to make sub-confusion matrices.  
         Entries shoule be {+1, 0, -1}.  A row defines one instance of grouping,
         where +1, -1, and 0 designate the corresponding class as a
         positive, negative, and ignored class, respectively.  For example, 
@@ -78,7 +78,10 @@ def confusion_stats(M, collation=None, \
     n_classes = M.shape[0]
 
     if collation is None:    
-        # make it one vs. rest
+        # make it one vs. rest.  E.g., for a 3-classes case:
+        #  [[+1, -1, -1],
+        #   [-1, +1, -1],
+        #   [-1, -1, +1]]
         collation = -np.ones((n_classes, n_classes), dtype='int8')
         collation += 2 * np.eye(n_classes, dtype='int8')
     else:

From b1d8b77de7456c837650c91df92793e5caa12b1f Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Thu, 19 Jul 2012 17:05:50 -0400
Subject: [PATCH 16/33] DOC: small changes

---
 bangmetric/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bangmetric/utils.py b/bangmetric/utils.py
index a687aa6..482dbf5 100644
--- a/bangmetric/utils.py
+++ b/bangmetric/utils.py
@@ -38,8 +38,8 @@ def confusion_stats(M, collation=None, \
         If `None` (default), one vs. rest grouping is assumed.
 
     fudge_factor: float, optional
-        A small factor to avoid non-finite numbers when TPR or FPR becomes 0 or 1.
-        Default is 0.5.
+        A small factor to avoid TPR, FPR, TNR, or FNR becoming 0 or 1.
+        Mostly intended for d-prime calculation. Default is 0.5.
 
     fudge_mode: str, optional
         Determins how to apply the fudge factor.  Can be one of:

From b0d58c17871e646199b8397942a89c84c3e6fc9d Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Thu, 19 Jul 2012 17:09:21 -0400
Subject: [PATCH 17/33] DOC: small changes

---
 bangmetric/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bangmetric/utils.py b/bangmetric/utils.py
index 482dbf5..fc75db3 100644
--- a/bangmetric/utils.py
+++ b/bangmetric/utils.py
@@ -52,7 +52,7 @@ def confusion_stats(M, collation=None, \
     -------
     P: array, shape = [n_groupings]
         Array of the number of positives, where each element corresponds to each 
-        grouping defined by `collation`.
+        grouping (row) defined by `collation`.
     N: array, shape = [n_groupings]
         Same as P, except that this is an array of the number of negatives.
     TP: array, shape = [n_groupings]

From 15295c56001921730501f6d01f23d3aecf4afffe Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Fri, 20 Jul 2012 23:00:17 -0400
Subject: [PATCH 18/33] COSMIT: combined dprime() and dprime_from_samp()

---
 bangmetric/dprime.py | 127 ++++++++++++++++++++++++-------------------
 1 file changed, 71 insertions(+), 56 deletions(-)

diff --git a/bangmetric/dprime.py b/bangmetric/dprime.py
index 6b2f44d..59c8e94 100644
--- a/bangmetric/dprime.py
+++ b/bangmetric/dprime.py
@@ -6,70 +6,37 @@
 #
 # License: BSD
 
-__all__ = ['dprime', 'dprime_from_samp', 'dprime_from_confusion']
+__all__ = ['dprime', 'dprime_from_confusion_matrix']
 
 import numpy as np
 from scipy.stats import norm
 from .utils import confusion_stats
 
 
-def dprime(y_pred, y_true, **kwargs):
-    """Computes the d-prime sensitivity index of the predictions.
+def dprime(y_pred=None, y_true=None, pos=None, neg=None, max_value=np.inf, min_value=-np.inf):
+    """Computes the d-prime sensitivity index.
+    One must provide either y_pred and y_true or pos and neg.
+    This function computes the d-prime of predictions given by 
+    y_pred and y_true by default.  If pos and neg are provided 
+    both y_pred and y_true are ignored and this function 
+    computes the d-prime from positive and negative samples
+    given by pos and neg.
 
     Parameters
     ----------
-    y_true: array, shape = [n_samples]
+    y_true: array, shape = [n_samples], optional
         True values, interpreted as strictly positive or not
         (i.e. converted to binary).
         Could be in {-1, +1} or {0, 1} or {False, True}.
 
-    y_pred: array, shape = [n_samples]
+    y_pred: array, shape = [n_samples], optional
         Predicted values (real).
 
-    kwargs: named arguments, optional
-        Passed to ``dprime_from_samp()``.
-
-    Returns
-    -------
-    dp: float
-        d-prime
-
-    References
-    ----------
-    http://en.wikipedia.org/wiki/D'
-    """
-
-    # -- basic checks and conversion
-    assert len(y_true) == len(y_pred)
-    assert np.isfinite(y_true).all()
-    assert np.isfinite(y_pred).all()
+    pos: array-like, optional
+        Positive sample values (e.g., raw projection values of 
+        the positive classifier).
 
-    y_true = np.array(y_true)
-    assert y_true.ndim == 1
-
-    y_pred = np.array(y_pred)
-    assert y_pred.ndim == 1
-
-    # -- actual computation
-    i_pos = y_true > 0
-    i_neg = ~i_pos
-
-    pos = y_pred[i_pos]
-    neg = y_pred[i_neg]
-
-    dp = dprime_from_samp(pos, neg, **kwargs)
-    return dp
-
-
-def dprime_from_samp(pos, neg, max_value=np.inf, min_value=-np.inf):
-    """Computes the d-prime sensitivity index from positive and negative samples.
-
-    Parameters
-    ----------
-    pos: array-like
-        Positive sample values (e.g., raw projection values of the positive classifier).
-
-    neg: array-like
+    neg: array-like, optional
         Negative sample values.
 
     max_value: float, optional
@@ -88,14 +55,37 @@ def dprime_from_samp(pos, neg, max_value=np.inf, min_value=-np.inf):
     http://en.wikipedia.org/wiki/D'
     """
 
-    pos = np.array(pos)
-    neg = np.array(neg)
+    # -- basic checks and conversion
+    if pos is not None and neg is not None:
+        pos = np.array(pos)
+        neg = np.array(neg)
+
+    else:
+        assert len(y_true) == len(y_pred)
+        assert np.isfinite(y_true).all()
+
+        y_true = np.array(y_true)
+        assert y_true.ndim == 1
+
+        y_pred = np.array(y_pred)
+        assert y_pred.ndim == 1
+
+        # -- actual computation
+        i_pos = y_true > 0
+        i_neg = ~i_pos
+
+        pos = y_pred[i_pos]
+        neg = y_pred[i_neg]
+
+    assert np.isfinite(pos).all()
+    assert np.isfinite(neg).all()
 
     if pos.size <= 1:
         raise ValueError('Not enough positive samples to estimate the variance')
     if neg.size <= 1:
         raise ValueError('Not enough negative samples to estimate the variance')
 
+    # -- compute d'
     pos_mean = pos.mean()
     neg_mean = neg.mean()
     pos_var = pos.var(ddof=1)
@@ -110,8 +100,8 @@ def dprime_from_samp(pos, neg, max_value=np.inf, min_value=-np.inf):
     return dp
 
 
-def dprime_from_confusion(M, max_value=np.inf, min_value=-np.inf, **kwargs):
-    """Computes the d-prime sensitivity index of the given confusion matrix.
+def dprime_from_confusion_matrix(M, max_value=np.inf, min_value=-np.inf, **kwargs):
+    """Computes the d-prime sensitivity indices of the given confusion matrix.
     This function is designed mostly for when there is no access to internal 
     representations and/or decision making mechanisms (like human data).  
     If no ``collation`` is defined in ``kwargs`` this function computes 
@@ -121,8 +111,8 @@ def dprime_from_confusion(M, max_value=np.inf, min_value=-np.inf, **kwargs):
     ----------
     M: array-like, shape = [n_classes (true), n_classes (pred)] 
         Confusion matrix, where the element M_{rc} means the number of
-        times when the classifier guesses that a test sample in the r-th class
-        belongs to the c-th class.
+        times when the classifier/subject guesses that a test sample in 
+        the r-th class belongs to the c-th class.
 
     max_value: float, optional
         Maximum possible d-prime value. Default is ``np.inf``.
@@ -135,12 +125,11 @@ def dprime_from_confusion(M, max_value=np.inf, min_value=-np.inf, **kwargs):
         ``fudge_factor``, etc. one can change the behavior of d-prime computation 
         (see ``confusion_stats()`` for details). 
 
-
     Returns
     -------
     dp: array, shape = [n_groupings]
         Array of d-primes, where each element corresponds to each grouping
-        defined by `collation`.
+        defined by `collation` (see ``confusion_stats()`` for details).
 
     References
     ----------
@@ -157,3 +146,29 @@ def dprime_from_confusion(M, max_value=np.inf, min_value=-np.inf, **kwargs):
 
     return dp
 
+
+    """Computes the population d-primes from the given set of confusion matrices.
+    Note: it is advised to read the documentation of  ``dprime_from_confusion()`` 
+    for understanding of ``kwargs``.
+
+    Parameters
+    ----------
+    M: array-like, shape = [n_individuals, n_classes (true), n_classes (pred)] 
+        Set of confusion matrices, where the element M_{irc} means the number of 
+        times when the i-th individual guesses that a test sample in the r-th class
+        belongs to the c-th class.
+
+    kwargs: named arguments, optional
+        Passed to ``dprime_from_confusion()``. 
+
+    Returns
+    -------
+    dp: array, shape = [n_groupings]
+        Array of population d-primes, where each element corresponds to each 
+        grouping defined by `collation` (see ``confusion_stats()`` for details).
+
+    References
+    ----------
+    http://en.wikipedia.org/wiki/D'
+    http://en.wikipedia.org/wiki/Confusion_matrix
+    """

From 69f89ec3730619e8d37c54a5a1ec77f4f43406cd Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Fri, 20 Jul 2012 23:02:19 -0400
Subject: [PATCH 19/33] COSMIT

---
 bangmetric/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bangmetric/utils.py b/bangmetric/utils.py
index fc75db3..adfbdb9 100644
--- a/bangmetric/utils.py
+++ b/bangmetric/utils.py
@@ -4,7 +4,7 @@
 #
 # License: BSD
 
-__all__ = ['confusion_stats']
+__all__ = ['confusion_matrix_stats']
 
 import numpy as np
 
@@ -12,7 +12,7 @@
 DEFAULT_FUDGE_MODE = 'correction'
 
 
-def confusion_stats(M, collation=None, \
+def confusion_matrix_stats(M, collation=None, \
         fudge_mode=DEFAULT_FUDGE_MODE, fudge_factor=DEFAULT_FUDGE_FACTOR):
     """Computes classification statistics of sub-confusion matrices inside 
     the given original confusion matrix M.  If no ``collation`` is given,

From 341d29a158e7a9caf11ebdadd68c73595ff2f774 Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Sat, 21 Jul 2012 00:36:57 -0400
Subject: [PATCH 20/33] COSMIT

---
 bangmetric/dprime.py | 35 +++++------------------------------
 1 file changed, 5 insertions(+), 30 deletions(-)

diff --git a/bangmetric/dprime.py b/bangmetric/dprime.py
index 59c8e94..73757ec 100644
--- a/bangmetric/dprime.py
+++ b/bangmetric/dprime.py
@@ -17,8 +17,8 @@ def dprime(y_pred=None, y_true=None, pos=None, neg=None, max_value=np.inf, min_v
     """Computes the d-prime sensitivity index.
     One must provide either y_pred and y_true or pos and neg.
     This function computes the d-prime of predictions given by 
-    y_pred and y_true by default.  If pos and neg are provided 
-    both y_pred and y_true are ignored and this function 
+    y_pred and y_true by default. If pos and neg are provided,
+    both y_pred and y_true are ignored, and this function 
     computes the d-prime from positive and negative samples
     given by pos and neg.
 
@@ -37,7 +37,8 @@ def dprime(y_pred=None, y_true=None, pos=None, neg=None, max_value=np.inf, min_v
         the positive classifier).
 
     neg: array-like, optional
-        Negative sample values.
+        Negative sample values. If both pos and neg are
+        provided, y_true and y_pred are ignored.
 
     max_value: float, optional
         Maximum possible d-prime value. Default is ``np.inf``.
@@ -138,7 +139,7 @@ def dprime_from_confusion_matrix(M, max_value=np.inf, min_value=-np.inf, **kwarg
     """
 
     # M: confusion matrix, row means true classes, col means predicted classes
-    P, N, TP, _, FP, _ = confusion_stats(M, **kwargs)
+    P, N, TP, _, FP, _ = confusion_matrix_stats(M, **kwargs)
 
     TPR = TP / P
     FPR = FP / N
@@ -146,29 +147,3 @@ def dprime_from_confusion_matrix(M, max_value=np.inf, min_value=-np.inf, **kwarg
 
     return dp
 
-
-    """Computes the population d-primes from the given set of confusion matrices.
-    Note: it is advised to read the documentation of  ``dprime_from_confusion()`` 
-    for understanding of ``kwargs``.
-
-    Parameters
-    ----------
-    M: array-like, shape = [n_individuals, n_classes (true), n_classes (pred)] 
-        Set of confusion matrices, where the element M_{irc} means the number of 
-        times when the i-th individual guesses that a test sample in the r-th class
-        belongs to the c-th class.
-
-    kwargs: named arguments, optional
-        Passed to ``dprime_from_confusion()``. 
-
-    Returns
-    -------
-    dp: array, shape = [n_groupings]
-        Array of population d-primes, where each element corresponds to each 
-        grouping defined by `collation` (see ``confusion_stats()`` for details).
-
-    References
-    ----------
-    http://en.wikipedia.org/wiki/D'
-    http://en.wikipedia.org/wiki/Confusion_matrix
-    """

From de48e4696ed7fab11003c1eee8a04755a5ecf6a1 Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Tue, 24 Jul 2012 11:36:40 -0400
Subject: [PATCH 21/33] MISC: small errors and cosmetic changes

---
 bangmetric/dprime.py | 139 +++++++++++++++++++++++++++----------------
 bangmetric/utils.py  |  63 ++++++++++----------
 2 files changed, 120 insertions(+), 82 deletions(-)

diff --git a/bangmetric/dprime.py b/bangmetric/dprime.py
index 73757ec..b85b075 100644
--- a/bangmetric/dprime.py
+++ b/bangmetric/dprime.py
@@ -10,35 +10,53 @@
 
 import numpy as np
 from scipy.stats import norm
-from .utils import confusion_stats
+from .utils import confusion_matrix_stats
 
+DEFAULT_DPRIME_MODE = 'binary'
 
-def dprime(y_pred=None, y_true=None, pos=None, neg=None, max_value=np.inf, min_value=-np.inf):
-    """Computes the d-prime sensitivity index.
-    One must provide either y_pred and y_true or pos and neg.
-    This function computes the d-prime of predictions given by 
-    y_pred and y_true by default. If pos and neg are provided,
-    both y_pred and y_true are ignored, and this function 
-    computes the d-prime from positive and negative samples
-    given by pos and neg.
+
+def dprime(A, B=None, mode=DEFAULT_DPRIME_MODE, max_value=np.inf,\
+        min_value=-np.inf):
+    """Computes the d-prime sensitivity index of predictions
+    from various data formats.  Depending on the choice of
+    `mode`, this function can take one of the following format:
+
+    * Binary classification outputs (`mode='binary'`; default)
+    * Positive and negative samples (`mode='sample'`)
+    * True positive and false positive rate (`mode='rate'`)
 
     Parameters
     ----------
-    y_true: array, shape = [n_samples], optional
-        True values, interpreted as strictly positive or not
-        (i.e. converted to binary).
-        Could be in {-1, +1} or {0, 1} or {False, True}.
+    A, B:
+        If `mode` is 'binary':
+
+            A: array, shape = [n_samples],
+                True values, interpreted as strictly positive or not
+                (i.e. converted to binary).
+                Could be in {-1, +1} or {0, 1} or {False, True}.
+
+            B: array, shape = [n_samples],
+                Predicted values (real).
+
+        If `mode` is 'sample':
 
-    y_pred: array, shape = [n_samples], optional
-        Predicted values (real).
+            A: array-like,
+                Positive sample values (e.g., raw projection values
+                of the positive classifier).
 
-    pos: array-like, optional
-        Positive sample values (e.g., raw projection values of 
-        the positive classifier).
+            B: array-like,
+                Negative sample values.
 
-    neg: array-like, optional
-        Negative sample values. If both pos and neg are
-        provided, y_true and y_pred are ignored.
+        If `mode` is 'rate':
+
+            A: array-like, shape = [n_groupings]
+                True positive rates
+
+            B: array-like, shape = [n_groupings]
+                False positive rates
+
+    mode: {'binary', 'sample', 'rate'}, optional
+        Directs the interpretation of A and B
 
     max_value: float, optional
         Maximum possible d-prime value. Default is ``np.inf``.
@@ -57,11 +75,12 @@ def dprime(y_pred=None, y_true=None, pos=None, neg=None, max_value=np.inf, min_v
     """
 
     # -- basic checks and conversion
-    if pos is not None and neg is not None:
-        pos = np.array(pos)
-        neg = np.array(neg)
+    if mode == 'sample':
+        pos, neg = np.array(A), np.array(B)
+
+    elif mode == 'binary':
+        y_true, y_pred = A, B
 
-    else:
         assert len(y_true) == len(y_pred)
         assert np.isfinite(y_true).all()
 
@@ -71,48 +90,64 @@ def dprime(y_pred=None, y_true=None, pos=None, neg=None, max_value=np.inf, min_v
         y_pred = np.array(y_pred)
         assert y_pred.ndim == 1
 
-        # -- actual computation
         i_pos = y_true > 0
         i_neg = ~i_pos
 
         pos = y_pred[i_pos]
         neg = y_pred[i_neg]
 
-    assert np.isfinite(pos).all()
-    assert np.isfinite(neg).all()
+    elif mode == 'rate':
+        TPR, FPR = np.array(A), np.array(B)
+        assert TPR.shape == FPR.shape
 
-    if pos.size <= 1:
-        raise ValueError('Not enough positive samples to estimate the variance')
-    if neg.size <= 1:
-        raise ValueError('Not enough negative samples to estimate the variance')
+    else:
+        raise ValueError('Invalid mode')
 
     # -- compute d'
-    pos_mean = pos.mean()
-    neg_mean = neg.mean()
-    pos_var = pos.var(ddof=1)
-    neg_var = neg.var(ddof=1)
+    if mode == 'sample' or mode == 'binary':
+        assert np.isfinite(pos).all()
+        assert np.isfinite(neg).all()
+
+        if pos.size <= 1:
+            raise ValueError('Not enough positive samples'\
+                    'to estimate the variance')
+        if neg.size <= 1:
+            raise ValueError('Not enough negative samples'\
+                    'to estimate the variance')
+
+        pos_mean = pos.mean()
+        neg_mean = neg.mean()
+        pos_var = pos.var(ddof=1)
+        neg_var = neg.var(ddof=1)
 
-    num = pos_mean - neg_mean
-    div = np.sqrt((pos_var + neg_var) / 2.)
+        num = pos_mean - neg_mean
+        div = np.sqrt((pos_var + neg_var) / 2.)
+
+        dp = num / div
+
+    else:   # mode == 'rate'
+        dp = norm.ppf(TPR) - norm.ppf(FPR)
 
     # from Dan's suggestion about clipping d' values...
-    dp = np.clip(num / div, min_value, max_value)
+    dp = np.clip(dp, min_value, max_value)
 
     return dp
 
 
-def dprime_from_confusion_matrix(M, max_value=np.inf, min_value=-np.inf, **kwargs):
-    """Computes the d-prime sensitivity indices of the given confusion matrix.
-    This function is designed mostly for when there is no access to internal 
-    representations and/or decision making mechanisms (like human data).  
-    If no ``collation`` is defined in ``kwargs`` this function computes 
+def dprime_from_confusion_matrix(M, max_value=np.inf, \
+        min_value=-np.inf, **kwargs):
+    """Computes the d-prime sensitivity indices of predictions from
+    the given confusion matrix. This function is designed mostly for
+    when there is no access to internal representations and/or
+    decision making mechanisms (like human data).  If no ``collation``
+    is defined in ``kwargs`` this function computes
     one vs. rest d-prime for each class.
 
     Parameters
     ----------
-    M: array-like, shape = [n_classes (true), n_classes (pred)] 
+    M: array-like, shape = [n_classes (true), n_classes (pred)]
         Confusion matrix, where the element M_{rc} means the number of
-        times when the classifier/subject guesses that a test sample in 
+        times when the classifier/subject guesses that a test sample in
         the r-th class belongs to the c-th class.
 
     max_value: float, optional
@@ -122,9 +157,10 @@ def dprime_from_confusion_matrix(M, max_value=np.inf, min_value=-np.inf, **kwarg
         Minimum possible d-prime value. Default is ``-np.inf``.
 
     kwargs: named arguments, optional
-        Passed to ``confusion_stats()``.  By passing ``collation``, ``fudge_mode``,
-        ``fudge_factor``, etc. one can change the behavior of d-prime computation 
-        (see ``confusion_stats()`` for details). 
+        Passed to ``confusion_stats()``.  By assigning ``collation``,
+        ``fudge_mode``, ``fudge_factor``, etc. one can change the
+        behavior of d-prime computation
+        (see ``confusion_stats()`` for details).
 
     Returns
     -------
@@ -143,7 +179,6 @@ def dprime_from_confusion_matrix(M, max_value=np.inf, min_value=-np.inf, **kwarg
 
     TPR = TP / P
     FPR = FP / N
-    dp = np.clip(norm.ppf(TPR) - norm.ppf(FPR), min_value, max_value)
-
-    return dp
 
+    return dprime(TPR, FPR, mode='rate', \
+            max_value=max_value, min_value=min_value)
diff --git a/bangmetric/utils.py b/bangmetric/utils.py
index adfbdb9..979de10 100644
--- a/bangmetric/utils.py
+++ b/bangmetric/utils.py
@@ -14,23 +14,23 @@
 
 def confusion_matrix_stats(M, collation=None, \
         fudge_mode=DEFAULT_FUDGE_MODE, fudge_factor=DEFAULT_FUDGE_FACTOR):
-    """Computes classification statistics of sub-confusion matrices inside 
+    """Computes classification statistics of sub-confusion matrices inside
     the given original confusion matrix M.  If no ``collation`` is given,
     statistics for each one vs. rest sub-confusion matrix will be computed.
 
     Parameters
     ----------
-    M: array-like, shape = [n_classes (true), n_classes (pred)] 
+    M: array-like, shape = [n_classes (true), n_classes (pred)]
         Confusion matrix, where the element M_{rc} means the number of
         times when the classifier guesses that a test sample in the r-th class
         belongs to the c-th class.
 
-    collation: None or array-like with shape = [n_groupings, n_classes], optional
-        Defines how to group entries in `M` to make sub-confusion matrices.  
+    collation: None or array-like of shape = [n_groupings, n_classes], optional
+        Defines how to group entries in `M` to make sub-confusion matrices.
         Entries shoule be {+1, 0, -1}.  A row defines one instance of grouping,
         where +1, -1, and 0 designate the corresponding class as a
-        positive, negative, and ignored class, respectively.  For example, 
-        the following `collation` defines a 3-way one vs. rest grouping 
+        positive, negative, and ignored class, respectively.  For example,
+        the following `collation` defines a 3-way one vs. rest grouping
         (given that `M` is a 3x3 matrix):
             [[+1, -1, -1],
              [-1, +1, -1],
@@ -47,23 +47,21 @@ def confusion_matrix_stats(M, collation=None, \
             'always': always apply the fudge factor
             'none': no fudging --- equivalent to ``fudge_factor=0``
 
-
     Returns
     -------
     P: array, shape = [n_groupings]
-        Array of the number of positives, where each element corresponds to each 
-        grouping (row) defined by `collation`.
+        Array of the number of positives, where each element corresponds to
+        each grouping (row) defined by `collation`.
     N: array, shape = [n_groupings]
         Same as P, except that this is an array of the number of negatives.
     TP: array, shape = [n_groupings]
-        Same as P, except that this is an array of the number of true positives.
+        Same as P, except an array of the number of true positives.
     TN: array, shape = [n_groupings]
-        Same as P, except that this is an array of the number of true negatives.
+        Same as P, except an array of the number of true negatives.
     FP: array, shape = [n_groupings]
-        Same as P, except that this is an array of the number of false positives.
+        Same as P, except an array of the number of false positives.
     FN: array, shape = [n_groupings]
-        Same as P, except that this is an array of the number of false negatives.
-
+        Same as P, except an array of the number of false negatives.
 
     References
     ----------
@@ -77,7 +75,7 @@ def confusion_matrix_stats(M, collation=None, \
     assert M.shape[0] == M.shape[1]
     n_classes = M.shape[0]
 
-    if collation is None:    
+    if collation is None:
         # make it one vs. rest.  E.g., for a 3-classes case:
         #  [[+1, -1, -1],
         #   [-1, +1, -1],
@@ -88,19 +86,25 @@ def confusion_matrix_stats(M, collation=None, \
         collation = np.array(collation, dtype='int8')
         assert collation.ndim == 2
         assert collation.shape[1] == n_classes
-    
+
     # P0: number of positives, for each class
     # P: number of positives, for each grouping
     # N: number of negatives, for each grouping
     # TP: number of true positives, for each grouping
     # FP: number of false positives, for each grouping
-    P0 = np.sum(M, axis=1)   
-    P = np.array([np.sum(P0[coll == +1]) for coll in collation], dtype='float64')
-    N = np.array([np.sum(P0[coll == -1]) for coll in collation], dtype='float64')
-    TP = np.array([np.sum(M[coll == +1][:, coll == +1]) for coll in collation], dtype='float64')
-    TN = np.array([np.sum(M[coll == -1][:, coll == -1]) for coll in collation], dtype='float64')
-    FP = np.array([np.sum(M[coll == -1][:, coll == +1]) for coll in collation], dtype='float64')
-    FN = np.array([np.sum(M[coll == +1][:, coll == -1]) for coll in collation], dtype='float64')
+    P0 = np.sum(M, axis=1)
+    P = np.array([np.sum(P0[coll == +1]) \
+            for coll in collation], dtype='float64')
+    N = np.array([np.sum(P0[coll == -1]) \
+            for coll in collation], dtype='float64')
+    TP = np.array([np.sum(M[coll == +1][:, coll == +1]) \
+            for coll in collation], dtype='float64')
+    TN = np.array([np.sum(M[coll == -1][:, coll == -1]) \
+            for coll in collation], dtype='float64')
+    FP = np.array([np.sum(M[coll == -1][:, coll == +1]) \
+            for coll in collation], dtype='float64')
+    FN = np.array([np.sum(M[coll == +1][:, coll == -1]) \
+            for coll in collation], dtype='float64')
 
     # -- application of fudge factor
     if fudge_mode == 'none':           # no fudging
@@ -111,8 +115,8 @@ def confusion_matrix_stats(M, collation=None, \
         FP += fudge_factor
         TN += fudge_factor
         FN += fudge_factor
-        P += 2.*fudge_factor
-        N += 2.*fudge_factor
+        P += 2. * fudge_factor
+        N += 2. * fudge_factor
 
     elif fudge_mode == 'correction':   # apply fudge factor only when needed
         TP[TP == P] = P[TP == P] - fudge_factor    # 100% correct
@@ -120,14 +124,13 @@ def confusion_matrix_stats(M, collation=None, \
         FP[FP == N] = N[FP == N] - fudge_factor    # always FAR
         FP[FP == 0] = fudge_factor                 # no false alarm
 
-        TN[TN == N] = N[TN == N] - fudge_factor    
-        TN[TN == 0] = fudge_factor                 
-        FN[FN == P] = P[FN == P] - fudge_factor    
-        FN[FN == 0] = fudge_factor                 
+        TN[TN == N] = N[TN == N] - fudge_factor
+        TN[TN == 0] = fudge_factor
+        FN[FN == P] = P[FN == P] - fudge_factor
+        FN[FN == 0] = fudge_factor
 
     else:
         raise ValueError('Invalid fudge_mode')
 
     # -- done
     return P, N, TP, TN, FP, FN
-

From cad170b30fd6162acc6e8819a0de3f6e70c44e0f Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Tue, 24 Jul 2012 11:53:52 -0400
Subject: [PATCH 22/33] MISC: merge dprime_from_confusion_matrix and dprime

---
 bangmetric/dprime.py | 95 +++++++++++++++++---------------------------
 1 file changed, 37 insertions(+), 58 deletions(-)

diff --git a/bangmetric/dprime.py b/bangmetric/dprime.py
index b85b075..a5b65de 100644
--- a/bangmetric/dprime.py
+++ b/bangmetric/dprime.py
@@ -6,7 +6,7 @@
 #
 # License: BSD
 
-__all__ = ['dprime', 'dprime_from_confusion_matrix']
+__all__ = ['dprime']
 
 import numpy as np
 from scipy.stats import norm
@@ -16,7 +16,7 @@
 
 
 def dprime(A, B=None, mode=DEFAULT_DPRIME_MODE, max_value=np.inf,\
-        min_value=-np.inf):
+        min_value=-np.inf, **kwargs):
     """Computes the d-prime sensitivity index of predictions
     from various data formats.  Depending on the choice of
     `mode`, this function can take one of the following format:
@@ -24,11 +24,12 @@ def dprime(A, B=None, mode=DEFAULT_DPRIME_MODE, max_value=np.inf,\
     * Binary classification outputs (`mode='binary'`; default)
     * Positive and negative samples (`mode='sample'`)
     * True positive and false positive rate (`mode='rate'`)
+    * Confusion matrix (`mode='confusionmat'`)
 
     Parameters
     ----------
     A, B:
-        If `mode` is 'binary':
+        If `mode` is 'binary' (default):
 
             A: array, shape = [n_samples],
                 True values, interpreted as strictly positive or not
@@ -55,8 +56,18 @@ def dprime(A, B=None, mode=DEFAULT_DPRIME_MODE, max_value=np.inf,\
             B: array-like, shape = [n_groupings]
                 False positive rates
 
+        if `mode` is 'confusionmat':
+
+            A: array-like, shape = [n_classes (true), n_classes (pred)]
+                Confusion matrix, where the element M_{rc} means
+                the number of times when the classifier or subject
+                guesses that a test sample in the r-th class
+                belongs to the c-th class.
+
+            B: ignored
+
     mode: {'binary', 'sample', 'rate'}, optional
-        Directs the interpretation of A and B
+        Directs the interpretation of A and B. Default is 'binary'.
 
     max_value: float, optional
         Maximum possible d-prime value. Default is ``np.inf``.
@@ -64,14 +75,24 @@ def dprime(A, B=None, mode=DEFAULT_DPRIME_MODE, max_value=np.inf,\
     min_value: float, optional
         Minimum possible d-prime value. Default is ``-np.inf``.
 
+    kwargs: named arguments, optional
+        Passed to ``confusion_stats()`` and used only when `mode`
+        is 'confusionmat'.  By assigning ``collation``,
+        ``fudge_mode``, ``fudge_factor``, etc. one can
+        change the behavior of d-prime computation
+        (see ``confusion_stats()`` for details).
+
     Returns
     -------
-    dp: float
-        d-prime
+    dp: float or array of shape = [n_groupings]
+        A d-prime value or array of d-primes, where each element
+        corresponds to each grouping of positives and negatives
+        (when `mode` is 'rate' or 'confusionmat')
 
     References
     ----------
     http://en.wikipedia.org/wiki/D'
+    http://en.wikipedia.org/wiki/Confusion_matrix
     """
 
     # -- basic checks and conversion
@@ -100,11 +121,19 @@ def dprime(A, B=None, mode=DEFAULT_DPRIME_MODE, max_value=np.inf,\
         TPR, FPR = np.array(A), np.array(B)
         assert TPR.shape == FPR.shape
 
+    elif mode == 'confusionmat':
+        # A: confusion mat
+        # row means true classes, col means predicted classes
+        P, N, TP, _, FP, _ = confusion_matrix_stats(A, **kwargs)
+
+        TPR = TP / P
+        FPR = FP / N
+
     else:
         raise ValueError('Invalid mode')
 
     # -- compute d'
-    if mode == 'sample' or mode == 'binary':
+    if mode in ['sample', 'binary']:
         assert np.isfinite(pos).all()
         assert np.isfinite(neg).all()
 
@@ -125,60 +154,10 @@ def dprime(A, B=None, mode=DEFAULT_DPRIME_MODE, max_value=np.inf,\
 
         dp = num / div
 
-    else:   # mode == 'rate'
+    else:   # mode is rate or confusionmat
         dp = norm.ppf(TPR) - norm.ppf(FPR)
 
     # from Dan's suggestion about clipping d' values...
     dp = np.clip(dp, min_value, max_value)
 
     return dp
-
-
-def dprime_from_confusion_matrix(M, max_value=np.inf, \
-        min_value=-np.inf, **kwargs):
-    """Computes the d-prime sensitivity indices of predictions from
-    the given confusion matrix. This function is designed mostly for
-    when there is no access to internal representations and/or
-    decision making mechanisms (like human data).  If no ``collation``
-    is defined in ``kwargs`` this function computes
-    one vs. rest d-prime for each class.
-
-    Parameters
-    ----------
-    M: array-like, shape = [n_classes (true), n_classes (pred)]
-        Confusion matrix, where the element M_{rc} means the number of
-        times when the classifier/subject guesses that a test sample in
-        the r-th class belongs to the c-th class.
-
-    max_value: float, optional
-        Maximum possible d-prime value. Default is ``np.inf``.
-
-    min_value: float, optional
-        Minimum possible d-prime value. Default is ``-np.inf``.
-
-    kwargs: named arguments, optional
-        Passed to ``confusion_stats()``.  By assigning ``collation``,
-        ``fudge_mode``, ``fudge_factor``, etc. one can change the
-        behavior of d-prime computation
-        (see ``confusion_stats()`` for details).
-
-    Returns
-    -------
-    dp: array, shape = [n_groupings]
-        Array of d-primes, where each element corresponds to each grouping
-        defined by `collation` (see ``confusion_stats()`` for details).
-
-    References
-    ----------
-    http://en.wikipedia.org/wiki/D'
-    http://en.wikipedia.org/wiki/Confusion_matrix
-    """
-
-    # M: confusion matrix, row means true classes, col means predicted classes
-    P, N, TP, _, FP, _ = confusion_matrix_stats(M, **kwargs)
-
-    TPR = TP / P
-    FPR = FP / N
-
-    return dprime(TPR, FPR, mode='rate', \
-            max_value=max_value, min_value=min_value)

From f0a4f1b0be473aaa6cb0fcf7f8d25ab57041bddb Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Tue, 24 Jul 2012 11:55:41 -0400
Subject: [PATCH 23/33] DOC: small changes

---
 bangmetric/accuracy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bangmetric/accuracy.py b/bangmetric/accuracy.py
index f34b06d..6a119a3 100644
--- a/bangmetric/accuracy.py
+++ b/bangmetric/accuracy.py
@@ -11,7 +11,7 @@
 
 
 def accuracy(y_true, y_pred, balanced=False):
-    """Computes the Accuracy of the predictions (also known as the
+    """Computes the accuracy of the predictions (also known as the
     zero-one score).
 
     Parameters

From 5b07e4ca6fa310fa0a4fa561f0425c8383f67d96 Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Tue, 24 Jul 2012 13:12:26 -0400
Subject: [PATCH 24/33] COSMIT

---
 bangmetric/utils.py | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/bangmetric/utils.py b/bangmetric/utils.py
index 979de10..1c30c46 100644
--- a/bangmetric/utils.py
+++ b/bangmetric/utils.py
@@ -10,6 +10,7 @@
 
 DEFAULT_FUDGE_FACTOR = 0.5
 DEFAULT_FUDGE_MODE = 'correction'
+DTYPE = np.float64
 
 
 def confusion_matrix_stats(M, collation=None, \
@@ -25,7 +26,8 @@ def confusion_matrix_stats(M, collation=None, \
         times when the classifier guesses that a test sample in the r-th class
         belongs to the c-th class.
 
-    collation: None or array-like of shape = [n_groupings, n_classes], optional
+    collation: None or array-like of shape = [n_groupings,
+        n_classes], optional (default=None)
         Defines how to group entries in `M` to make sub-confusion matrices.
         Entries shoule be {+1, 0, -1}.  A row defines one instance of grouping,
         where +1, -1, and 0 designate the corresponding class as a
@@ -37,13 +39,13 @@ def confusion_matrix_stats(M, collation=None, \
              [-1, -1, +1]]
         If `None` (default), one vs. rest grouping is assumed.
 
-    fudge_factor: float, optional
+    fudge_factor: float, optional (default=0.5)
         A small factor to avoid TPR, FPR, TNR, or FNR becoming 0 or 1.
-        Mostly intended for d-prime calculation. Default is 0.5.
+        Mostly intended for d-prime calculation.
 
-    fudge_mode: str, optional
+    fudge_mode: str, optional (default='correction')
         Determins how to apply the fudge factor.  Can be one of:
-            'correction': apply only when needed (default)
+            'correction': apply only when needed
             'always': always apply the fudge factor
             'none': no fudging --- equivalent to ``fudge_factor=0``
 
@@ -94,17 +96,17 @@ def confusion_matrix_stats(M, collation=None, \
     # FP: number of false positives, for each grouping
     P0 = np.sum(M, axis=1)
     P = np.array([np.sum(P0[coll == +1]) \
-            for coll in collation], dtype='float64')
+            for coll in collation], dtype=DTYPE)
     N = np.array([np.sum(P0[coll == -1]) \
-            for coll in collation], dtype='float64')
+            for coll in collation], dtype=DTYPE)
     TP = np.array([np.sum(M[coll == +1][:, coll == +1]) \
-            for coll in collation], dtype='float64')
+            for coll in collation], dtype=DTYPE)
     TN = np.array([np.sum(M[coll == -1][:, coll == -1]) \
-            for coll in collation], dtype='float64')
+            for coll in collation], dtype=DTYPE)
     FP = np.array([np.sum(M[coll == -1][:, coll == +1]) \
-            for coll in collation], dtype='float64')
+            for coll in collation], dtype=DTYPE)
     FN = np.array([np.sum(M[coll == +1][:, coll == -1]) \
-            for coll in collation], dtype='float64')
+            for coll in collation], dtype=DTYPE)
 
     # -- application of fudge factor
     if fudge_mode == 'none':           # no fudging

From 0748c3a5bd84fb67b73cd5b5b2d501172284953e Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Tue, 24 Jul 2012 14:06:17 -0400
Subject: [PATCH 25/33] ENH: added metrics for human data

---
 bangmetric/__init__.py     |  2 +
 bangmetric/human_metric.py | 86 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)
 create mode 100644 bangmetric/human_metric.py

diff --git a/bangmetric/__init__.py b/bangmetric/__init__.py
index e4b0765..26ae973 100644
--- a/bangmetric/__init__.py
+++ b/bangmetric/__init__.py
@@ -5,3 +5,5 @@
 from rmse import *  # pyflakes.ignore
 from kernel_analysis import *  # pyflakes.ignore
 from nk import *  # pyflakes.ignore
+from utils import *  # pyflakes.ignore
+from human_metric import *  # pyflakes.ignore
diff --git a/bangmetric/human_metric.py b/bangmetric/human_metric.py
new file mode 100644
index 0000000..906a28a
--- /dev/null
+++ b/bangmetric/human_metric.py
@@ -0,0 +1,86 @@
+"""Metrics designed to compute the similarity to human data"""
+
+# Authors: Ha Hong <hahong84@gmail.com>
+#
+# License: BSD
+
+__all__ = ['central_ratio', 'consistency']
+
+import numpy as np
+from .correlation import spearman
+
+DTYPE = np.float64
+
+
+def central_ratio(num, dnm, center=np.median, finite=True):
+    """Computes the central tendency (median, by default) of the ratios
+    between `num` and `dnm`.  By default, this function gives the
+    "Turing ratio" used in the paper by Majaj, Hong, Solomon, and DiCarlo.
+
+    Parameters
+    ----------
+    num: array-like
+        Numerators of ratios
+
+    dnm: array-lie, shape = `num.shape()`
+        Denominators of ratios.  `num` and `dnm` must have the same shape.
+
+    center: function, optional (default=np.median)
+        Function to compute the central tendency.
+
+    finite: boolean, optional (default=True)
+        If True, only finite numbers in `num` and `dnm` will be used for
+        the computation of the central tendency.
+    """
+
+    num = np.array(num, dtype=DTYPE)
+    dnm = np.array(dnm, dtype=DTYPE)
+    assert num.shape == dnm.shape
+
+    num = num.ravel()
+    dnm = dnm.ravel()
+
+    if finite:
+        fi = np.isfinite(dnm) & np.isfinite(num)
+        num = num[fi]
+        dnm = dnm[fi]
+
+    return center(num / dnm)
+
+
+def consistency(A, B, corrcoef=spearman, finite=True):
+    """Computes the consistency (Spearman rank correlation coefficient,
+    by default) between two sets of data points (e.g., d' scores) `A`
+    and `B`.  By default, this function gives the "consistency"
+    used in the paper by Majaj, Hong, Solomon, and DiCarlo.
+
+    Parameters
+    ----------
+    A: array-like
+        A set of data points
+
+    B: array-lie, shape = `A.shape()`
+        Another set of data points to compare with `A`.
+        `A` and `B` must have the same shape.
+
+    corrcoef: function, optional (default=bangmetric.spearman)
+        Function to compute the "consistency."
+
+    finite: boolean, optional (default=True)
+        If True, only finite numbers in `A` and `B` will be used for
+        the computation of the consistency.
+    """
+
+    A = np.array(A, dtype=DTYPE)
+    B = np.array(B, dtype=DTYPE)
+    assert A.shape == B.shape
+
+    A = A.ravel()
+    B = B.ravel()
+
+    if finite:
+        fi = np.isfinite(B) & np.isfinite(A)
+        A = A[fi]
+        B = B[fi]
+
+    return corrcoef(A, B)

From 95ed1fc0b3972159e69e01c6537f76ddb9a671b2 Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Tue, 24 Jul 2012 14:27:29 -0400
Subject: [PATCH 26/33] COSMIT

---
 bangmetric/human_metric.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/bangmetric/human_metric.py b/bangmetric/human_metric.py
index 906a28a..866c1bb 100644
--- a/bangmetric/human_metric.py
+++ b/bangmetric/human_metric.py
@@ -12,7 +12,7 @@
 DTYPE = np.float64
 
 
-def central_ratio(num, dnm, center=np.median, finite=True):
+def central_ratio(num, dnm, centerfn=np.median, finite=True):
     """Computes the central tendency (median, by default) of the ratios
     between `num` and `dnm`.  By default, this function gives the
     "Turing ratio" used in the paper by Majaj, Hong, Solomon, and DiCarlo.
@@ -25,7 +25,7 @@ def central_ratio(num, dnm, center=np.median, finite=True):
     dnm: array-lie, shape = `num.shape()`
         Denominators of ratios.  `num` and `dnm` must have the same shape.
 
-    center: function, optional (default=np.median)
+    centerfn: function, optional (default=np.median)
         Function to compute the central tendency.
 
     finite: boolean, optional (default=True)
@@ -45,10 +45,10 @@ def central_ratio(num, dnm, center=np.median, finite=True):
         num = num[fi]
         dnm = dnm[fi]
 
-    return center(num / dnm)
+    return centerfn(num / dnm)
 
 
-def consistency(A, B, corrcoef=spearman, finite=True):
+def consistency(A, B, consistencyfn=spearman, finite=True):
     """Computes the consistency (Spearman rank correlation coefficient,
     by default) between two sets of data points (e.g., d' scores) `A`
     and `B`.  By default, this function gives the "consistency"
@@ -63,7 +63,7 @@ def consistency(A, B, corrcoef=spearman, finite=True):
         Another set of data points to compare with `A`.
         `A` and `B` must have the same shape.
 
-    corrcoef: function, optional (default=bangmetric.spearman)
+    consistencyfn: function, optional (default=bangmetric.spearman)
         Function to compute the "consistency."
 
     finite: boolean, optional (default=True)
@@ -83,4 +83,4 @@ def consistency(A, B, corrcoef=spearman, finite=True):
         A = A[fi]
         B = B[fi]
 
-    return corrcoef(A, B)
+    return consistencyfn(A, B)

From ab6df56decdf2059a97e419f938dc8210285e505 Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Tue, 24 Jul 2012 15:14:01 -0400
Subject: [PATCH 27/33] ENH: added confusion matrix support to accuracy()

---
 bangmetric/accuracy.py | 99 ++++++++++++++++++++++++++++++++----------
 1 file changed, 75 insertions(+), 24 deletions(-)

diff --git a/bangmetric/accuracy.py b/bangmetric/accuracy.py
index 6a119a3..6d67d12 100644
--- a/bangmetric/accuracy.py
+++ b/bangmetric/accuracy.py
@@ -8,49 +8,100 @@
 __all__ = ['accuracy']
 
 import numpy as np
+from .utils import confusion_matrix_stats
 
+DEFAULT_ACCURACY_MODE = 'binary'
 
-def accuracy(y_true, y_pred, balanced=False):
+
+def accuracy(A, B=None, mode=DEFAULT_ACCURACY_MODE, \
+        balanced=False, collation=None):
     """Computes the accuracy of the predictions (also known as the
-    zero-one score).
+    zero-one score).  Depending on the choice of `mode`, this
+    function can take one of the following data format:
+
+    * Binary classification outputs (`mode='binary'`; default)
+    * Confusion matrix (`mode='confusionmat'`)
 
     Parameters
     ----------
-    y_true: array, shape = [n_samples]
-        True values, interpreted as strictly positive or not
-        (i.e. converted to binary).
+    A, B:
+        If `mode` is 'binary' (default):
+
+            A: array, shape = [n_samples]
+                True values, interpreted as strictly positive or not
+                (i.e. converted to binary).
+
+            B: array, shape = [n_samples]
+                Predicted values, interpreted as strictly positive or not
+                (i.e. converted to binary).
 
-    y_pred: array, shape = [n_samples]
-        Predicted values, interpreted as strictly positive or not
-        (i.e. converted to binary).
+        if `mode` is 'confusionmat':
+
+            A: array-like, shape = [n_classes (true), n_classes (pred)]
+                Confusion matrix, where the element M_{rc} means
+                the number of times when the classifier or subject
+                guesses that a test sample in the r-th class
+                belongs to the c-th class.
+
+            B: ignored
 
     balanced: bool, optional (default=False)
         Returns the balanced accuracy (equal weight for positive and
         negative values).
 
+    collation: None or array-like of shape = [n_groupings,
+        n_classes], optional (default=None)
+        Defines how to group entries in `M` to make sub-confusion matrices
+        when `mode` is 'confusionmat'.  See `confusion_matrix_stats()`
+        for details.
+
     Returns
     -------
-    acc: float
-        Accuracy (zero-one score).
+    acc: float or array of shape = [n_groupings]
+        An accuracy score (zero-one score) or array of accuracies,
+        where each element corresponds to each grouping of
+        positives and negatives (when `mode` is 'confusionmat').
+
+    References
+    ----------
+    http://en.wikipedia.org/wiki/Accuracy
     """
-    assert len(y_true) == len(y_pred)
-    assert np.isfinite(y_true).all()
-    assert np.isfinite(y_pred).all()
 
-    # -- "binarize" the arguments
-    y_true = np.array(y_true) > 0
-    assert y_true.ndim == 1
+    if mode == 'binary':
+        y_true, y_pred = A, B
+        assert len(y_true) == len(y_pred)
+        assert np.isfinite(y_true).all()
+        assert np.isfinite(y_pred).all()
+
+        # -- "binarize" the arguments
+        y_true = np.array(y_true) > 0
+        assert y_true.ndim == 1
+
+        y_pred = np.array(y_pred) > 0
+        assert y_pred.ndim == 1
+
+        i_pos = y_true > 0
+        i_neg = ~i_pos
 
-    y_pred = np.array(y_pred) > 0
-    assert y_pred.ndim == 1
+        P = float(i_pos.sum())
+        N = float(i_neg.sum())
+        TP = float((y_true[i_pos] == y_pred[i_pos]).sum())
+        TN = float((y_true[i_neg] == y_pred[i_neg]).sum())
+
+    elif mode == 'confusionmat':
+        # A: confusion mat
+        # row means true classes, col means predicted classes
+        P, N, TP, TN, _, _ = confusion_matrix_stats(A, \
+                collation=collation, fudge_mode='none')
+
+    else:
+        raise ValueError('Invalid mode')
 
     if balanced:
-        pos = y_true > 0
-        neg = ~pos
-        pos_acc = (y_true[pos] == y_pred[pos]).mean()
-        neg_acc = (y_true[neg] == y_pred[neg]).mean()
-        acc = (pos_acc + neg_acc) / 2.
+        sensitivity = TP / P
+        specificity = TN / N
+        acc = (sensitivity + specificity) / 2.
     else:
-        acc = (y_true == y_pred).mean()
+        acc = (TP + TN) / (P + N)
 
     return acc

From b1dedff7dd75dde00375f50efc88a69d5d4f1256 Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Tue, 24 Jul 2012 15:15:42 -0400
Subject: [PATCH 28/33] DOC: misc changes

---
 bangmetric/dprime.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bangmetric/dprime.py b/bangmetric/dprime.py
index a5b65de..f2be4fa 100644
--- a/bangmetric/dprime.py
+++ b/bangmetric/dprime.py
@@ -76,11 +76,11 @@ def dprime(A, B=None, mode=DEFAULT_DPRIME_MODE, max_value=np.inf,\
         Minimum possible d-prime value. Default is ``-np.inf``.
 
     kwargs: named arguments, optional
-        Passed to ``confusion_stats()`` and used only when `mode`
+        Passed to ``confusion_matrix_stats()`` and used only when `mode`
         is 'confusionmat'.  By assigning ``collation``,
         ``fudge_mode``, ``fudge_factor``, etc. one can
         change the behavior of d-prime computation
-        (see ``confusion_stats()`` for details).
+        (see ``confusion_matrix_stats()`` for details).
 
     Returns
     -------

From 2e34b76ab6f6abc4e6286c5b4b49398224b07ed8 Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Tue, 24 Jul 2012 15:38:30 -0400
Subject: [PATCH 29/33] TST: fixed bugs in reference value

---
 bangmetric/tests/test_dprime.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bangmetric/tests/test_dprime.py b/bangmetric/tests/test_dprime.py
index 374db1a..002f512 100644
--- a/bangmetric/tests/test_dprime.py
+++ b/bangmetric/tests/test_dprime.py
@@ -18,7 +18,7 @@ def test_basic():
     y_true = np.array([False, True, True, True, False, False, False, True])
     y_pred = np.array([0.491, -0.1, 0.64, 1.52, -0.23, -0.23, 1.579, 0.76])
     dp = dprime(y_true, y_pred)
-    reference = 0.47387910220727386
+    reference = 0.39541092958803298
     assert abs(dp - reference) < ATOL
 
 
@@ -27,7 +27,7 @@ def test_basic100():
     y_true = rng.binomial(1, 0.5, size=100)
     y_pred = rng.randn(y_true.size)
     dp = dprime(y_true, y_pred)
-    reference = -0.39852816153409176
+    reference = -0.20652941441924857
     assert abs(dp - reference) < ATOL
 
 

From f3fd043a6a227292bdc6541339caa2d270b6c718 Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Tue, 24 Jul 2012 22:12:47 -0400
Subject: [PATCH 30/33] MISC: small changes to clip ppf values in dprime()

---
 bangmetric/dprime.py | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/bangmetric/dprime.py b/bangmetric/dprime.py
index f2be4fa..658381d 100644
--- a/bangmetric/dprime.py
+++ b/bangmetric/dprime.py
@@ -15,8 +15,10 @@
 DEFAULT_DPRIME_MODE = 'binary'
 
 
-def dprime(A, B=None, mode=DEFAULT_DPRIME_MODE, max_value=np.inf,\
-        min_value=-np.inf, **kwargs):
+def dprime(A, B=None, mode=DEFAULT_DPRIME_MODE,\
+        max_value=np.inf, min_value=-np.inf,\
+        max_ppf_value=np.inf, min_ppf_value=-np.inf,\
+        **kwargs):
     """Computes the d-prime sensitivity index of predictions
     from various data formats.  Depending on the choice of
     `mode`, this function can take one of the following format:
@@ -66,14 +68,22 @@ def dprime(A, B=None, mode=DEFAULT_DPRIME_MODE, max_value=np.inf,\
 
             B: ignored
 
-    mode: {'binary', 'sample', 'rate'}, optional
-        Directs the interpretation of A and B. Default is 'binary'.
+    mode: {'binary', 'sample', 'rate'}, optional, (default='binary')
+        Directs the interpretation of A and B.
 
-    max_value: float, optional
-        Maximum possible d-prime value. Default is ``np.inf``.
+    max_value: float, optional (default=np.inf)
+        Maximum possible d-prime value.
 
-    min_value: float, optional
-        Minimum possible d-prime value. Default is ``-np.inf``.
+    min_value: float, optional (default=-np.inf)
+        Minimum possible d-prime value.
+
+    max_ppf_value: float, optional (default=np.inf)
+        Maximum possible ppf value.
+        Used only when mode is 'rate' or 'confusionmat'.
+
+    min_ppf_value: float, optional (default=-np.inf).
+        Minimum possible ppf value.
+        Used only when mode is 'rate' or 'confusionmat'.
 
     kwargs: named arguments, optional
         Passed to ``confusion_matrix_stats()`` and used only when `mode`
@@ -155,7 +165,11 @@ def dprime(A, B=None, mode=DEFAULT_DPRIME_MODE, max_value=np.inf,\
         dp = num / div
 
     else:   # mode is rate or confusionmat
-        dp = norm.ppf(TPR) - norm.ppf(FPR)
+        ppfTPR = norm.ppf(TPR)
+        ppfFPR = norm.ppf(FPR)
+        ppfTPR = np.clip(ppfTPR, min_ppf_value, max_ppf_value)
+        ppfFPR = np.clip(ppfFPR, min_ppf_value, max_ppf_value)
+        dp = ppfTPR - ppfFPR
 
     # from Dan's suggestion about clipping d' values...
     dp = np.clip(dp, min_value, max_value)

From b58a0fe265fb1c71a42e2dd1b26a494851cc095a Mon Sep 17 00:00:00 2001
From: Ha Hong <hahong84@gmail.com>
Date: Wed, 25 Jul 2012 00:45:22 -0400
Subject: [PATCH 31/33] DOC: small typos..

---
 bangmetric/human_metric.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bangmetric/human_metric.py b/bangmetric/human_metric.py
index 866c1bb..9045226 100644
--- a/bangmetric/human_metric.py
+++ b/bangmetric/human_metric.py
@@ -22,7 +22,7 @@ def central_ratio(num, dnm, centerfn=np.median, finite=True):
     num: array-like
         Numerators of ratios
 
-    dnm: array-lie, shape = `num.shape()`
+    dnm: array-like, shape = `num.shape()`
         Denominators of ratios.  `num` and `dnm` must have the same shape.
 
     centerfn: function, optional (default=np.median)
@@ -59,7 +59,7 @@ def consistency(A, B, consistencyfn=spearman, finite=True):
     A: array-like
         A set of data points
 
-    B: array-lie, shape = `A.shape()`
+    B: array-like, shape = `A.shape()`
         Another set of data points to compare with `A`.
         `A` and `B` must have the same shape.
 

From 9f0cbd75131504dd5d3c1a7881a1b3f1c41ab4f6 Mon Sep 17 00:00:00 2001
From: Charles Cadieu <c.cadieu@gmail.com>
Date: Fri, 26 Oct 2012 15:33:20 -0400
Subject: [PATCH 32/33] fixed a bug: np.sort makes a copy while array.sort is
 inplace

---
 bangmetric/kernel_analysis.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bangmetric/kernel_analysis.py b/bangmetric/kernel_analysis.py
index 2094e27..c45ef13 100644
--- a/bangmetric/kernel_analysis.py
+++ b/bangmetric/kernel_analysis.py
@@ -106,7 +106,7 @@ def kanalysis(X, Y_true, n_components='all', quantiles=DEFAULT_QUANTILES):
 
     # Sort them
     l2_squared_sorted = l2_squared.ravel()
-    np.sort(l2_squared_sorted)
+    l2_squared_sorted.sort()
 
     # ------------------------------------------------------------------------
     # -- Compute Kernel Analysis for each quantile

From d575111d2af839baddaf29c9509f3152badbd851 Mon Sep 17 00:00:00 2001
From: Jonas Kubilius <qbilius@gmail.com>
Date: Wed, 7 Dec 2016 16:43:17 -0500
Subject: [PATCH 33/33] updated installation

---
 README.md              |   5 +
 bangmetric/__init__.py |   2 +
 requirements.txt       |   2 -
 setup.py               | 358 ++++++++++++-----------------------------
 4 files changed, 113 insertions(+), 254 deletions(-)
 create mode 100644 README.md
 delete mode 100644 requirements.txt
 mode change 100755 => 100644 setup.py

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..bac209a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,5 @@
+# bangmetric
+
+# License
+
+New BSD
\ No newline at end of file
diff --git a/bangmetric/__init__.py b/bangmetric/__init__.py
index 26ae973..2f7a683 100644
--- a/bangmetric/__init__.py
+++ b/bangmetric/__init__.py
@@ -7,3 +7,5 @@
 from nk import *  # pyflakes.ignore
 from utils import *  # pyflakes.ignore
 from human_metric import *  # pyflakes.ignore
+
+__version__ = '0.0.1'
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 7f2ecdb..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-numpy>=1.6.1
-scikit-learn>=0.10
diff --git a/setup.py b/setup.py
old mode 100755
new mode 100644
index 8802ad3..e989deb
--- a/setup.py
+++ b/setup.py
@@ -1,252 +1,106 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-""" distribute- and pip-enabled setup.py """
-
-import logging
-import os
-import re
-
-# ----- overrides -----
-
-# set these to anything but None to override the automatic defaults
-packages = None
-package_name = None
-package_data = None
-scripts = None
-requirements_file = None
-requirements = None
-dependency_links = None
-use_numpy = True
-
-# ---------------------
-
-
-# ----- control flags -----
-
-# fallback to setuptools if distribute isn't found
-setup_tools_fallback = False
-
-# don't include subdir named 'tests' in package_data
-skip_tests = True
-
-# print some extra debugging info
-debug = True
-
-# -------------------------
-
-if debug:
-    logging.basicConfig(level=logging.DEBUG)
-# distribute import and testing
-try:
-    import distribute_setup
-    distribute_setup.use_setuptools()
-    logging.debug("distribute_setup.py imported and used")
-except ImportError:
-    # fallback to setuptools?
-    # distribute_setup.py was not in this directory
-    if not (setup_tools_fallback):
-        import setuptools
-        if not (hasattr(setuptools, '_distribute') and \
-                setuptools._distribute):
-            raise ImportError(\
-                    "distribute was not found and fallback " \
-                    "to setuptools was not allowed")
-        else:
-            logging.debug("distribute_setup.py not found, \
-                    defaulted to system distribute")
-    else:
-        logging.debug("distribute_setup.py not found, " \
-                "defaulting to system setuptools")
-
-import setuptools
-
-
-def find_scripts():
-    return [s for s in setuptools.findall('scripts/') \
-            if os.path.splitext(s)[1] != '.pyc']
-
-
-def package_to_path(package):
-    """
-    Convert a package (as found by setuptools.find_packages)
-    e.g. "foo.bar" to usable path
-    e.g. "foo/bar"
-
-    No idea if this works on windows
-    """
-    return package.replace('.', '/')
-
-
-def find_subdirectories(package):
-    """
-    Get the subdirectories within a package
-    This will include resources (non-submodules) and submodules
-    """
-    try:
-        subdirectories = os.walk(package_to_path(package)).next()[1]
-    except StopIteration:
-        subdirectories = []
-    return subdirectories
-
-
-def subdir_findall(dir, subdir):
-    """
-    Find all files in a subdirectory and return paths relative to dir
-
-    This is similar to (and uses) setuptools.findall
-    However, the paths returned are in the form needed for package_data
-    """
-    strip_n = len(dir.split('/'))
-    path = '/'.join((dir, subdir))
-    return ['/'.join(s.split('/')[strip_n:]) for s in setuptools.findall(path)]
-
-
-def find_package_data(packages):
-    """
-    For a list of packages, find the package_data
-
-    This function scans the subdirectories of a package and considers all
-    non-submodule subdirectories as resources, including them in
-    the package_data
-
-    Returns a dictionary suitable for setup(package_data=<result>)
-    """
-    package_data = {}
-    for package in packages:
-        package_data[package] = []
-        for subdir in find_subdirectories(package):
-            if '.'.join((package, subdir)) in packages:  # skip submodules
-                logging.debug("skipping submodule %s/%s" % (package, subdir))
-                continue
-            if skip_tests and (subdir == 'tests'):  # skip tests
-                logging.debug("skipping tests %s/%s" % (package, subdir))
-                continue
-            package_data[package] += \
-                    subdir_findall(package_to_path(package), subdir)
-    return package_data
-
-
-def parse_requirements(file_name):
-    """
-    from:
-        http://cburgmer.posterous.com/pip-requirementstxt-and-setuppy
-    """
-    requirements = []
-    with open(file_name, 'r') as f:
-        for line in f:
-            if re.match(r'(\s*#)|(\s*$)', line):
-                continue
-            if re.match(r'\s*-e\s+', line):
-                requirements.append(re.sub(r'\s*-e\s+.*#egg=(.*)$',\
-                        r'\1', line).strip())
-            elif re.match(r'\s*-f\s+', line):
-                pass
-            else:
-                requirements.append(line.strip())
-    return requirements
-
-
-def parse_dependency_links(file_name):
-    """
-    from:
-        http://cburgmer.posterous.com/pip-requirementstxt-and-setuppy
-    """
-    dependency_links = []
-    with open(file_name) as f:
-        for line in f:
-            if re.match(r'\s*-[ef]\s+', line):
-                dependency_links.append(re.sub(r'\s*-[ef]\s+',\
-                        '', line))
-    return dependency_links
-
-# ----------- Override defaults here ----------------
-if packages is None:
-    packages = setuptools.find_packages()
-
-if len(packages) == 0:
-    raise Exception("No valid packages found")
-
-if package_name is None:
-    package_name = packages[0]
-
-if package_data is None:
-    package_data = find_package_data(packages)
-
-if scripts is None:
-    scripts = find_scripts()
-
-if requirements_file is None:
-    requirements_file = 'requirements.txt'
-
-if os.path.exists(requirements_file):
-    if requirements is None:
-        requirements = parse_requirements(requirements_file)
-    if dependency_links is None:
-        dependency_links = parse_dependency_links(requirements_file)
-else:
-    if requirements is None:
-        requirements = []
-    if dependency_links is None:
-        dependency_links = []
-
-if debug:
-    logging.debug("Module name: %s" % package_name)
-    for package in packages:
-        logging.debug("Package: %s" % package)
-        logging.debug("\tData: %s" % str(package_data[package]))
-    logging.debug("Scripts:")
-    for script in scripts:
-        logging.debug("\tScript: %s" % script)
-    logging.debug("Requirements:")
-    for req in requirements:
-        logging.debug("\t%s" % req)
-    logging.debug("Dependency links:")
-    for dl in dependency_links:
-        logging.debug("\t%s" % dl)
-
-from distutils.core import Command
-class PyTest(Command):
-    user_options = []
-    def initialize_options(self):
-        pass
-    def finalize_options(self):
-        pass
-    def run(self):
-        import sys,subprocess
-        errno = subprocess.call([sys.executable, 'runtests.py'])
-        raise SystemExit(errno)
-
-
-if __name__ == '__main__':
-
-    sub_packages = packages
-
-    if use_numpy:
-        from numpy.distutils.misc_util import Configuration
-        config = Configuration(package_name, '', None)
-
-        for sub_package in sub_packages:
-            print 'adding', sub_package
-            config.add_subpackage(sub_package)
-
-        from numpy.distutils.core import setup
-        kwargs = config.todict()
-        kwargs['cmdclass'] = dict(test=PyTest)
-        setup(**kwargs)
-
-    else:
-        setuptools.setup(
-            name=package_name,
-            version='dev',
-            packages=packages,
-            scripts=scripts,
-
-            package_data=package_data,
-            include_package_data=True,
-
-            install_requires=requirements,
-            dependency_links=dependency_links,
-
-            cmdclass=dict(test=PyTest),
-        )
+"""A setuptools based setup module.
+
+See:
+https://packaging.python.org/en/latest/distributing.html
+https://github.com/pypa/sampleproject
+"""
+
+# Always prefer setuptools over distutils
+from setuptools import setup, find_packages
+# To use a consistent encoding
+from codecs import open
+from os import path
+
+import bangmetric
+
+
+here = path.abspath(path.dirname(__file__))
+
+# Get the long description from the README file
+with open(path.join(here, 'README.md'), encoding='utf-8') as f:
+    long_description = f.read()
+
+setup(
+    name='bangmetric',
+
+    # Versions should comply with PEP440.  For a discussion on single-sourcing
+    # the version across setup.py and the project code, see
+    # https://packaging.python.org/en/latest/single_source_version.html
+    version=bangmetric.__version__,
+
+    description='',
+    long_description=long_description,
+
+    # The project's main homepage.
+    url='https://github.com/dicarlolab/bangmetric',
+
+    # Author details
+    author='DiCarlo Lab',
+
+    # Choose your license
+    license='New BSD',
+
+    # See https://pypi.python.org/pypi?%3Aaction=list_classifiers
+    classifiers=[
+        # How mature is this project? Common values are
+        #   3 - Alpha
+        #   4 - Beta
+        #   5 - Production/Stable
+
+        # Pick your license as you wish (should match "license" above)
+        'License :: OSI Approved :: New BSD License',
+
+        # Specify the Python versions you support here. In particular, ensure
+        # that you indicate whether you support Python 2, Python 3 or both.
+        'Programming Language :: Python :: 2',
+        'Programming Language :: Python :: 2.7',
+    ],
+
+    # What does your project relate to?
+    keywords='tensorflow deep learning',
+
+    # You can just specify the packages manually here if your project is
+    # simple. Or you can use find_packages().
+    packages=find_packages(exclude=['contrib', 'docs', 'tests']),
+
+    # Alternatively, if you want to distribute just a my_module.py, uncomment
+    # this:
+    #   py_modules=["my_module"],
+
+    # List run-time dependencies here.  These will be installed by pip when
+    # your project is installed. For an analysis of "install_requires" vs pip's
+    # requirements files see:
+    # https://packaging.python.org/en/latest/requirements.html
+    install_requires=['numpy', 'scipy', 'scikit-learn'],
+
+    # List additional groups of dependencies here (e.g. development
+    # dependencies). You can install these using the following syntax,
+    # for example:
+    # $ pip install -e .[dev,test]
+    # extras_require={
+    #     'dev': ['check-manifest'],
+    #     'test': ['coverage'],
+    # },
+
+    # If there are data files included in your packages that need to be
+    # installed, specify them here.  If using Python 2.6 or less, then these
+    # have to be included in MANIFEST.in as well.
+    # package_data={
+    #     'sample': ['package_data.dat'],
+    # },
+
+    # Although 'package_data' is the preferred approach, in some case you may
+    # need to place data files outside of your packages. See:
+    # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa
+    # In this case, 'data_file' will be installed into '<sys.prefix>/my_data'
+    # data_files=[('my_data', ['data/data_file'])],
+
+    # To provide executable scripts, use entry points in preference to the
+    # "scripts" keyword. Entry points provide cross-platform support and allow
+    # pip to create the appropriate form of executable for the target platform.
+    # entry_points={
+    #     'console_scripts': [
+    #         'sample=sample:main',
+    #     ],
+    # },
+)
\ No newline at end of file