From e40efb73cb262c59b2bb484b4d6dea2082e01979 Mon Sep 17 00:00:00 2001
From: Vanya Belyaev <ivan.belyaev@cern.ch>
Date: Fri, 18 Oct 2024 15:34:16 +0200
Subject: [PATCH]   1. add more methods to `Ostap::Math::ECDF` got get the
 ranking   1. optimise `Ostap::Math::ECDF` and skip sorting when possible   1.
 add `ostap.stats.twosamples.py` to make Two Sampel Tests   1. add the test
 for Two Sample Test   1. reduce code duplication between  GoF-1D and
 Two-Sample Tests

---
 ReleaseNotes/release_notes.md            |   8 +-
 ostap/core/ostap_types.py                |  12 +-
 ostap/fitting/fithelpers.py              |   9 +-
 ostap/stats/gof1d.py                     | 369 ++++---------
 ostap/stats/gof_utils.py                 | 221 +++++++-
 ostap/stats/tests/test_stats_2samples.py | 108 ++++
 ostap/stats/twosamples.py                | 625 +++++++++++++++++++++++
 source/include/Ostap/ECDF.h              | 100 ++--
 source/src/ECDF.cpp                      |  48 +-
 9 files changed, 1144 insertions(+), 356 deletions(-)
 create mode 100644 ostap/stats/tests/test_stats_2samples.py
 create mode 100644 ostap/stats/twosamples.py

diff --git a/ReleaseNotes/release_notes.md b/ReleaseNotes/release_notes.md
index f4462fe1..00c685ff 100644
--- a/ReleaseNotes/release_notes.md
+++ b/ReleaseNotes/release_notes.md
@@ -1,5 +1,11 @@
 ## New features
-
+    
+  1. add more methods to `Ostap::Math::ECDF` got get the ranking
+  1. optimise `Ostap::Math::ECDF` and skip sorting when possible
+  1. add `ostap.stats.twosamples.py` to make Two Sampel Tests
+  1. add the test for Two Sample Test
+  1. reduce code duplication between  GoF-1D and Two-Sample Tests
+         
 ## Backward incompatible
 
 ## Bug fixes
diff --git a/ostap/core/ostap_types.py b/ostap/core/ostap_types.py
index 87c511b8..50ddbd7b 100644
--- a/ostap/core/ostap_types.py
+++ b/ostap/core/ostap_types.py
@@ -95,10 +95,13 @@
 if hasattr ( std , 'string'      ) : string_types += ( std.string      , )
 if hasattr ( std , 'string_view' ) : string_types += ( std.string_view , )
 # =============================================================================
-try :
+try : # =======================================================================
+    # =========================================================================
     import numpy as np
-    listlike_types  = listlike_types + ( np.ndarray , )    
-except ImportError :
+    listlike_types  = listlike_types + ( np.ndarray , )
+    # =========================================================================
+except ImportError : # ========================================================
+    # =========================================================================
     pass 
 # =============================================================================
 dict_types      = dict ,
@@ -106,11 +109,12 @@
 sequence_types  = listlike_types + ( Sequence , Collection , Iterable , Generator )
 sized_types     = Sized ,
 path_types      = string_types
+# =============================================================================
 if ( 3 , 6 ) <= python_version :
     path_types = string_types + ( os.PathLike , )
 # =============================================================================
 ## sometimes we need to ensure that dictionary is ordered 
-ordered_dict = dict 
+ordered_dict = dict
 if python_version < ( 3 , 7 ) :
     from collections import OrderedDict as ordered_dict 
     dictlike_types += ( ordered_dict, )
diff --git a/ostap/fitting/fithelpers.py b/ostap/fitting/fithelpers.py
index fe0532b1..8597afc6 100644
--- a/ostap/fitting/fithelpers.py
+++ b/ostap/fitting/fithelpers.py
@@ -617,7 +617,7 @@ def binning ( self , axis , name = '' ) :
 args_timer         =  'timer'     , 'timing'
 args_warnings      =  'warnings'  , 'warning'
 args_sumw2         =  'sumw2'  , 'sumw2err' , 'sumw2errs' , 'sumw2error' , 'sumw2errors'
-args_asymptotic    = ( 'asymptotic'     , 'asymptorocerr'   , 'asymptoticerrs' ,
+args_asymptotic    = ( 'asymptotic'     , 'asymptoticcerr'  , 'asymptoticerrs' ,
                        'asymptoticerror , asymptoticerrors' ) 
 args_batch         = 'batch' , 'batchmode' , 'modebatch'
 args_backend       = 'evalbackend' , 'backendeval' , 'backend'
@@ -777,12 +777,7 @@ def parse_args ( self ,  dataset = None , *args , **kwargs ) :
                 
                 _args.append   (  ROOT.RooFit.Extended ( a ) )
                 
-            elif key in ( 'cpu'              ,
-                          'cpus'             ,
-                          'ncpu'             ,
-                          'ncpus'            ,
-                          'numcpu'           ,
-                          'numcpus'          ) and isinstance ( a , int ) and 1<= a :
+            elif key in args_numcpu and isinstance ( a , int ) and 1<= a :
                 
                 _args.append   (  ROOT.RooFit.NumCPU( a  ) )
                 
diff --git a/ostap/stats/gof1d.py b/ostap/stats/gof1d.py
index 1d4b2b0c..841420e2 100644
--- a/ostap/stats/gof1d.py
+++ b/ostap/stats/gof1d.py
@@ -33,14 +33,17 @@
 from   ostap.core.core        import SE, VE, Ostap, cidict_fun 
 from   ostap.math.base        import doubles, axis_range  
 from   ostap.math.models      import f1_draw 
-from   ostap.math.math_ve     import significance
+from   ostap.stats.gof_utils  import Estimators,Summary
 import ostap.fitting.ds2numpy 
 import ostap.fitting.roofit
 import ROOT, math  
 # =============================================================================
-try :    
+try : # =======================================================================
+    # =========================================================================
     import numpy as np
-except ImportError :
+    # =========================================================================
+except ImportError : # ========================================================
+    # =========================================================================
     np = None
 # =============================================================================
 # logging 
@@ -68,7 +71,7 @@ def kolmogorov_smirnov ( cdf_data ) :
     >>> ks2  = kolmogorov_smirnov ( cdf_data )
     """
     n      = len ( cdf_data ) 
-    result = max ( max ( ( i + 1.0 ) / n - Fi , Fi - float ( i ) / n ) for ( i, Fi )  in enumerate ( cdf_data )  ) 
+    result = max ( max ( ( i + 1.0 ) / n - Fi , Fi - float ( i ) / n ) for ( i, Fi )  in enumerate ( cdf_data )  ) ** 2  
     return math.sqrt ( result )
 # =============================================================================
 ## Get Anderson-Darling  statistiscs AD^2
@@ -187,13 +190,11 @@ def ZC  ( cdf_data ) :
     result = sum ( ( flog ( ( 1.0 / Fi - 1 ) / ( ( n - 0.5 ) / ( i + 0.25 ) - 1 ) ) ) ** 2 for ( i , Fi ) in enumerate ( cdf_data ) )
     return result 
 # ==============================================================================
-    
-# =============================================================================
 ## @class GoF1D
 #  Goodness of 1D-fits 
 #  @see https://doi.org/10.1111/1467-9868.00337
-class GoF1D(object) :
-    """ Goodness of 1D-fits 
+class GoF1D(Estimators) :
+    """ Goodness-of-fit 1D-case 
     - see https://doi.org/10.1111/1467-9868.00337
     """
     def __init__ ( self    ,
@@ -237,15 +238,22 @@ def cdf ( x ) : return fun.cdf ( x )
 
         ## evalute CDF for sorted data 
         self.__cdf_data = self.__vct_cdf ( self.__data )
-        
-        self.__KS_val   = kolmogorov_smirnov ( self.__cdf_data )
-        self.__AD_val   = anderson_darling   ( self.__cdf_data )
-        self.__CM_val   = cramer_von_mises   ( self.__cdf_data )
-        self.__ZK_val   = ZK                 ( self.__cdf_data )
-        self.__ZA_val   = ZA                 ( self.__cdf_data )
-        self.__ZC_val   = ZC                 ( self.__cdf_data )
-        self.__K_val    = kuiper             ( self.__cdf_data )
-               
+
+        self.__estimators = {
+            'KS'  : kolmogorov_smirnov ( self.__cdf_data ) , 
+            'K'   : kuiper             ( self.__cdf_data ) , 
+            'AD'  : anderson_darling   ( self.__cdf_data ) , 
+            'CM'  : cramer_von_mises   ( self.__cdf_data ) , 
+            'ZK'  : ZK                 ( self.__cdf_data ) , 
+            'ZA'  : ZA                 ( self.__cdf_data ) , 
+            'ZC'  : ZC                 ( self.__cdf_data ) , 
+        }
+    # =========================================================================
+    @property
+    def estimators ( self ) :
+        """`estimators` : get a dictionary of all estimators"""
+        return self.__estimators
+    
     # =========================================================================
     ## size of dataset
     @property 
@@ -289,15 +297,23 @@ def cdf_data ( self ) :
     def kolmogorov_smirnov_estimator ( self ) :
         """ Get Kolmogorov-Smirnov statistiscs KS
         """
-        return self.__KS_val 
+        return self.__estimators.get('KS',None) 
 
-    # ===============================================
+    # =========================================================================
+    ## Get Kuiper  statististics 
+    @property 
+    def kuiper_estimator ( self ) :
+        """ Get Kuiperstatistics
+        """        
+        return self.__estimators.get('K', None ) 
+                
+    # =========================================================================
     ## Get Anderson-Darling  statistiscs 
     @property 
     def anderson_darling_estimator ( self ) :
         """ Get Anderson-Darling statistiscs 
         """
-        return self.__AD_val 
+        return self.__estimators.get ( 'AD' , None ) 
 
     # =========================================================================
     ## Get Cramer-von Mises statistics 
@@ -305,7 +321,7 @@ def anderson_darling_estimator ( self ) :
     def cramer_von_mises_estimator ( self ) :
         """ Get Cramer-von Mises statistics 
         """
-        return self.__CM_val 
+        return self.estimators.get ( 'CM' , None ) 
     
     # =========================================================================
     ## Get ZK statististics 
@@ -313,7 +329,7 @@ def cramer_von_mises_estimator ( self ) :
     def ZK_estimator  ( self ) :
         """ Get ZK statistics
         """
-        return self.__ZK_val 
+        return self.estimators.get ( 'ZK' , None ) 
         
     # =========================================================================
     ## Get ZA statististics 
@@ -321,7 +337,7 @@ def ZK_estimator  ( self ) :
     def ZA_estimator  ( self ) :
         """ Get ZA statistics
         """        
-        return self.__ZA_val
+        return self.estimators['ZK'] 
     
     # =========================================================================
     ## Get ZC statististics 
@@ -329,65 +345,10 @@ def ZA_estimator  ( self ) :
     def ZC_estimator ( self ) :
         """ Get ZC statistics
         """        
-        return self.__ZC_val
+        return self.__estimators['ZK'] 
     
-    # =========================================================================
-    ## Get Kuiper  statististics 
-    @property 
-    def kuiper_estimator ( self ) :
-        """ Get Kuiperstatistics
-        """        
-        return self.__K_val 
-                
-    # ==========================================================================
-    ## Print the summary as Table
-    def table ( self , title = '' , prefix = '' , width = 5 , precision = 3 ) :
-        """ Print the summary as Table """
-        rows = [ ( 'Statistics' , 'Value' ) ] 
-        from   ostap.logger.pretty import pretty_float
-        import ostap.logger.table  as     T 
-
-        ks , expo = pretty_float ( self.__KS_val , width = width , precision = precision )
-        if expo : row = 'Kolmogorov-Smirnov' , ks , '10^%+d' % expo
-        else    : row = 'Kolmogorov-Smirnov' , ks              
-        rows.append ( row )
-        
-        ad , expo = pretty_float ( self.__AD_val , width = width , precision = precision )
-        if expo : row = 'Anderson-Darling' , ad , '10^%+d' % expo
-        else    : row = 'Anderson-Darling' , ad              
-        rows.append ( row )
-
-        cm , expo = pretty_float ( self.__CM_val  , width = width , precision = precision )
-        if expo : row = 'Cramer-von Mises' , cm , '10^%+d' % expo
-        else    : row = 'Cramer-von Mises' , cm              
-        rows.append ( row )
-
-        k , expo = pretty_float ( self.__K_val , width = width , precision = precision )
-        if expo : row = 'Kuiper' , k , '10^%+d' % expo
-        else    : row = 'Kuiper' , k              
-        rows.append ( row )
-
-        zk , expo = pretty_float ( self.__ZK_val , width = width , precision = precision )
-        if expo : row = 'ZK' , zk, '10^%+d' % expo
-        else    : row = 'ZK' , zk             
-        rows.append ( row )
-
-        za , expo = pretty_float ( self.__ZA_val , width = width , precision = precision )
-        if expo : row = 'ZA' , za, '10^%+d' % expo
-        else    : row = 'ZA' , za             
-        rows.append ( row )
-
-        zc , expo = pretty_float ( self.__ZC_val , width = width , precision = precision )
-        if expo : row = 'ZC' , zc, '10^%+d' % expo
-        else    : row = 'ZC' , zc             
-        rows.append ( row )
-
-        
-        title = title if title else 'Goodness of 1D-fit' 
-        return T.table ( rows , title = title , prefix = prefix , alignment = 'lcl' )
-
-    __repr__ = table
-    __str__  = table
+    __repr__ = Estimators.table
+    __str__  = Estimators.table
     
     # =========================================================================
     ## Draw fit CDF & empirical ECDF 
@@ -407,7 +368,7 @@ def draw  ( self , opts = '' , *args , **kwargs ) :
             if hasattr ( cdf , 'xmin' ) : xmin = min ( xmin , cdf.xmin () )
             if hasattr ( cdf , 'xmax' ) : xmax = max ( xmax , cdf.xmax () )
 
-        xmin , xmax = axis_range ( xmin , xmax , delta = 0.05 )
+        xmin , xmax = axis_range ( xmin , xmax , delta = 0.10 )
         
         xmin = kwargs.pop ( 'xmin' , xmin )
         xmax = kwargs.pop ( 'xmax' , xmax )
@@ -421,21 +382,13 @@ def draw  ( self , opts = '' , *args , **kwargs ) :
 
         return ecdf.draw ( '%s %s' % ( 'same' , opts ) , color = 2 , linewidth = 3 , xmin = xmin , xmax = xmax , **kwargs )
 
-# =============================================================================
-KS_keys = 'ks' , 'kolmogorov' , 'kolmogorovsmirnov' 
-AD_keys = 'ad' , 'andersen'   , 'andersendarling' 
-CM_keys = 'cm' , 'cramer'     , 'cramervonmises' 
-ZK_keys = 'zk' ,
-ZA_keys = 'za' ,
-ZC_keys = 'zc' ,
-K_keys  = 'k'  , 'kuiper'  
 # =============================================================================
 ## @class GoF1DToys
 #  Check Goodness of 1D-fits using toys 
-class GoF1DToys(GoF1D) :
-    """ Check Goodness of 1D-fits ysing toys 
+class GoF1DToys(GoF1D,Summary) :
+    """ Check Goodness-of-fit with toys (1D-case) 
     """
-    ## result of GoGptoys 
+    ## result of GoF-toys 
     Result = namedtuple ( 'Result' , 'statistics counter pvalue nsigma' )
     # =========================================================================
     def __init__ ( self           ,
@@ -446,21 +399,14 @@ def __init__ ( self           ,
 
         assert isinstance ( nToys , int ) and 0 < nToys , "Invalid `nToys` argument!"
 
-        ## initializat the base
+        ## initialize the base
         GoF1D.__init__ ( self , pdf , dataset ) 
 
-        self.__pdf    = pdf
+        self.__pdf      = pdf
+        
+        self.__counters = defaultdict(SE) 
+        self.__ecdfs    = {}
         
-        self.__KS_cnt = SE ()
-        self.__AD_cnt = SE ()
-        self.__CM_cnt = SE ()
-        self.__ZK_cnt = SE ()
-        self.__ZA_cnt = SE ()
-        self.__ZC_cnt = SE ()
-        self.__K_cnt  = SE ()
-
-        self.__ecdfs  = {}
-
         self.__nToys  = 0 
         if 0 < nToys : self.run ( nToys , silent = silent ) 
 
@@ -473,8 +419,9 @@ def run ( self , nToys = 1000 , silent = False ) :
 
         varname = self.__pdf.xvar.name
         
-        results = defaultdict(list)
-        vct_cdf = self.vcdf
+        results  = defaultdict(list)
+        counters = self.counters 
+        vct_cdf  = self.vcdf
         
         from ostap.utils.progress_bar import progress_bar 
         for i in progress_bar ( nToys , silent = silent , description = 'Toys:') :
@@ -485,29 +432,29 @@ def run ( self , nToys = 1000 , silent = False ) :
             cdf_data = vct_cdf ( data )
             
             ks       = kolmogorov_smirnov ( cdf_data )
+            k        = kuiper             ( cdf_data )
             ad       = anderson_darling   ( cdf_data )
             cm       = cramer_von_mises   ( cdf_data )
             zk       = ZK                 ( cdf_data )
             za       = ZA                 ( cdf_data )
             zc       = ZC                 ( cdf_data )
-            k        = kuiper             ( cdf_data )
-            
-            self.__KS_cnt += ks 
-            self.__AD_cnt += ad 
-            self.__CM_cnt += cm 
-            self.__ZK_cnt += zk 
-            self.__ZA_cnt += za 
-            self.__ZC_cnt += zc 
-            self.__K_cnt  += k 
             
-            results [ 'KS'  ].append ( ks )    
-            results [ 'AD'  ].append ( ad ) 
-            results [ 'CM'  ].append ( cm ) 
-            results [ 'ZK'  ].append ( zk ) 
-            results [ 'ZA'  ].append ( za ) 
-            results [ 'ZC'  ].append ( zc ) 
-            results [ 'K'   ].append ( k  ) 
+            counters [ 'KS' ] += ks
+            counters [ 'K'  ] += k
+            counters [ 'AD' ] += ad
+            counters [ 'CM' ] += cm
+            counters [ 'ZK' ] += zk
+            counters [ 'ZA' ] += za
+            counters [ 'ZC' ] += zc
             
+            results  [ 'KS' ].append ( ks )    
+            results  [ 'K'  ].append ( k  )
+            results  [ 'AD' ].append ( ad ) 
+            results  [ 'CM' ].append ( cm ) 
+            results  [ 'ZK' ].append ( zk ) 
+            results  [ 'ZA' ].append ( za ) 
+            results  [ 'ZC' ].append ( zc ) 
+
             ## delete data
             if isinstance ( dset , ROOT.RooDataSet ) : 
                 dset = Ostap.MoreRooFit.delete_data ( dset )
@@ -541,31 +488,20 @@ def nToys ( self ) :
     def ecdfs ( self ) :
         """`ecdfs` : toy results as empirical cumulative distribution functions"""
         return self.__ecdfs
-
     # =========================================================================
-    ## Helper methof to get result
-    def result ( self , value , counter , label ) :
-        """ Helper method to get result 
-        """
-        if self.__ecdfs and label in self.__ecdfs :
-            pvalue = self.__ecdfs [ label ] . estimate ( value )
-            nsigma = significance ( pvalue )            
-        else : 
-            pvalue = VE ( -1 , 0 )
-            nsigma = VE ( -1 , 0 )
-        ## 
-        return self.Result ( value   ,
-                             counter ,
-                             pvalue  ,
-                             nsigma  )
-            
+    ## Counters  
+    @property 
+    def counters ( self ) :
+        """`counters` : toy results as counters"""
+        return self.__counters
+
     # =========================================================================
     ## Get Kolmogorov-Smirnov statistiscs 
     @property 
     def kolmogorov_smirnov ( self ) :
         """ Get Kolmogorov-Smirnov statistiscs KS
         """
-        return self.result ( self.kolmogorov_smirnov_estimator , self.__KS_cnt , 'KS' ) 
+        return self.result ( 'KS' ) 
     
     # ===============================================
     ## Get Anderson-Darling  statistiscs 
@@ -573,7 +509,7 @@ def kolmogorov_smirnov ( self ) :
     def anderson_darling ( self ) :
         """ Get Anderson-Darling statistiscs 
         """
-        return self.result ( self.anderson_darling_estimator , self.__AD_cnt , 'AD' ) 
+        return self.result ( 'AD' ) 
         
     # =========================================================================
     ## Get Cramer-von Mises statistics 
@@ -581,7 +517,7 @@ def anderson_darling ( self ) :
     def cramer_von_mises ( self ) :
         """ Get Cramer-von Mises statistics 
         """
-        return self.result ( self.cramer_von_mises_estimator , self.__CM_cnt , 'CM' ) 
+        return self.result ( 'CM' ) 
         
     # =========================================================================
     ## Get ZK statististics 
@@ -589,7 +525,7 @@ def cramer_von_mises ( self ) :
     def ZK  ( self ) :
         """ Get ZK statistics 
         """
-        return self.result ( self.ZK_estimator , self.__ZK_cnt , 'ZK' ) 
+        return self.result ( 'ZK' ) 
         
     # =========================================================================
     ## Get ZA statististics 
@@ -597,7 +533,7 @@ def ZK  ( self ) :
     def ZA  ( self ) :
         """ Get ZA statistics
         """        
-        return self.result ( self.ZA_estimator , self.__ZA_cnt , 'ZA' ) 
+        return self.result ( 'ZA' ) 
     
     # =========================================================================
     ## Get ZC statististics 
@@ -605,7 +541,7 @@ def ZA  ( self ) :
     def ZC  ( self ) :
         """ Get ZC statistics 
         """        
-        return self.result ( self.ZC_estimator , self.__ZC_cnt , 'ZC' ) 
+        return self.result ( 'ZC' ) 
 
     # =========================================================================
     ## Get Kuiper statististics 
@@ -613,144 +549,11 @@ def ZC  ( self ) :
     def kuiper ( self ) :
         """ Get Kuiper statistics 
         """        
-        return self.result ( self.kuiper_estimator , self.__K_cnt , 'K' ) 
-    
-    # =========================================================================
-    ## format a row in the table
-    def _row  ( self , what , result , width = 5 , precision = 3 ) :
-        """ Format a row in the table
-        """
-        
-        value      = result.statistics
-        counter    = result.counter
-        pvalue     = result.pvalue
-        nsigma     = result.nsigma
-        
-        mean       = counter.mean   ()
-        rms        = counter.rms    () 
-        vmin, vmax = counter.minmax () 
-        
-        mxv        = max ( value , mean.value() , mean.error() , rms , vmin , vmax )
-        
-        from   ostap.logger.pretty import fmt_pretty_ve 
-        
-        fmt, fmtv , fmte , expo = fmt_pretty_ve ( VE ( mxv ,  mean.cov2() ) ,
-                                                  width       = width       ,
-                                                  precision   = precision   , 
-                                                  parentheses = False       )
-        
-        if expo : scale = 10**expo
-        else    : scale = 1
-        fmt2 = '%s/%s' % ( fmtv , fmtv ) 
-        
-        return ( what  ,
-                 fmtv  %  ( value / scale )                    ,
-                 ( mean / scale ).toString ( fmt )             ,
-                 fmtv  %  ( rms  / scale )                     ,
-                 fmt2  %  ( vmin / scale , vmax / scale )      ,
-                 ( '10^%+d' % expo  if expo else '' )          ,                  
-                 ( 100 * pvalue ) .toString ( '%.2f +/- %-.2f' ) , 
-                 ( nsigma       ) .toString ( '%.1f +/- %-.1f' ) ) 
-    
-    # =========================================================================
-    ## Make a summary table
-    def table ( self , title = '' , prefix = '' , width = 5 , precision = 3 ) :
-        """ Make a summary table
-        """
-        
-        import ostap.logger.table  as     T 
-                
-        rows = [ ( 'Statistics' , 'value' , 'mean' , 'rms' , 'min/max' , 'factor' , 'p-value [%]' , '#sigma' ) ] 
-
-        rows.append ( self._row ( 'Kolmogorov-Smirnov' , self.kolmogorov_smirnov , width = width , precision = precision ) )
-        rows.append ( self._row ( 'Anderson-Darling'   , self.anderson_darling   , width = width , precision = precision ) )
-        rows.append ( self._row ( 'Cramer-von Mises'   , self.cramer_von_mises   , width = width , precision = precision ) )
-        rows.append ( self._row ( 'Kuiper'             , self.kuiper             , width = width , precision = precision ) )
-        rows.append ( self._row ( 'ZK'                 , self.ZK                 , width = width , precision = precision ) )
-        rows.append ( self._row ( 'ZA'                 , self.ZA                 , width = width , precision = precision ) )
-        rows.append ( self._row ( 'ZC'                 , self.ZC                 , width = width , precision = precision ) )
-
-        ## skip empty columns        
-        has_expo = False 
-        for row in rows[1:] :
-            r = list ( row )
-            if r[-3] :
-                has_expo = True
-                break
-
-        if not has_expo :
-            new_rows = []
-            for row in rows :
-                r = list ( row )
-                del r [ -3 ]
-                new_rows.append ( r ) 
-            rows = new_rows 
-
-            
-        if   not title and self.nToys :
-            title = 'Goodness of 1D-fit with #%d toys' % self.nToys  
-        elif not title :
-            title = 'Goodness of 1D-fit'
-        
-        return T.table ( rows , title = title , prefix = prefix , alignment = 'lccccccccccc' )
-
-    __repr__ = table
-    __str__  = table
-
-    # =========================================================================
-    ## Draw fit CDF & empirical ECDF 
-    def draw  ( self , what , opts = '' , *args , **kwargs ) :
-        """ Draw fit CDF & empirical CDF
-        """
-        key = cidict_fun ( what ) 
-        if   key in KS_keys : 
-            result = self.kolmogorov_smirnov 
-            ecdf   = self.__ecdfs [ 'KS' ]
-            logger.info ( 'Toy resuls for Kolmogorov-Smirnov estimate' ) 
-        elif key in AD_keys : 
-            result = self.anderson_darling  
-            ecdf   = self.__ecdfs [ 'AD' ]
-            logger.info ( 'Toy resuls for Anderson-Darling estimate' ) 
-        elif key in CM_keys : 
-            result = self.cramer_von_mises 
-            ecdf   = self.__ecdfs [ 'CM' ]
-            logger.info ( 'Toy resuls for Cramer-von Mises  estimate' ) 
-        elif key in ZK_keys : 
-            result = self.ZK    
-            ecdf   = self.__ecdfs [ 'ZK' ]
-            logger.info ( 'Toy resuls for ZK estimate' ) 
-        elif key in ZA_keys : 
-            result = self.ZA   
-            ecdf   = self.__ecdfs [ 'ZA' ]
-            logger.info ( 'Toy resuls for ZK estimate' ) 
-        elif key in ZC_keys : 
-            result = self.ZC   
-            ecdf   = self.__ecdfs [ 'ZC' ]
-            logger.info ( 'Toy resuls for ZK estimate' ) 
-        elif key in K_keys : 
-            result = self.kuiper   
-            ecdf   = self.__ecdfs [ 'K' ]
-            logger.info ( 'Toy resuls for Kuiper estimate' ) 
-        else :
-            raise KeyError (  "draw: Invalid `what`:%s" % what )
-            
-        xmin,xmax = ecdf.xmin () , ecdf.xmax ()
-        value     = result.statistics
-        xmin      = min ( xmin , value )
-        xmax      = max ( xmax , value )
-        xmin , xmax  = axis_range ( xmin , xmax , delta = 0.10 )
-        xmin      = xmin if 0 < xmin else 0 
-        kwargs [ 'xmin' ] = kwargs.get ( 'xmin' , xmin ) 
-        kwargs [ 'xmax' ] = kwargs.get ( 'xmax' , xmax )
-        result    = ecdf.draw  ( opts , *args , **kwargs ) 
-        line      = ROOT.TLine ( value , 0 , value , 1 )
-        ## 
-        line.SetLineWidth ( 4 ) 
-        line.SetLineColor ( 8 ) 
-        line.draw()
-        ## 
-        return result, line  
+        return self.result ( 'K' ) 
 
+    table    = Summary.table 
+    __repr__ = Summary.table
+    __str__  = Summary.table
 
 # =============================================================================
 if '__main__' == __name__ :
diff --git a/ostap/stats/gof_utils.py b/ostap/stats/gof_utils.py
index bb65a603..e8023f16 100644
--- a/ostap/stats/gof_utils.py
+++ b/ostap/stats/gof_utils.py
@@ -13,17 +13,22 @@
 __author__  = "Vanya BELYAEV Ivan.Belyaev@cern.ch"
 __date__    = "2023-12-06"
 __all__     = (
-    'mean_var'  , ## mean and variance for (weighted) arrays
-    'nEff'      , ## get number of effective entries
-    'normalize' , ## "normalize" variables in dataset/structured array
-    )
+    'mean_var'   , ## mean and variance for (weighted) arrays
+    'nEff'       , ## get number of effective entries
+    'normalize'  , ## "normalize" variables in dataset/structured array
+    'Estimators' , ## helper mixin class to print statistical estimators 
+    'Summary'    , ## helper mixin class to print statistical estimators 
+)
 # =============================================================================
+from   collections              import namedtuple
 from   ostap.core.meta_info     import python_info 
 from   ostap.core.core          import VE, Ostap
 from   ostap.core.ostap_types   import string_types
+from   ostap.math.base          import axis_range  
+from   ostap.math.math_ve       import significance
 from   ostap.stats.counters     import EffCounter
-from   ostap.utils.basic        import numcpu
-from   ostap.utils.utils        import splitter 
+from   ostap.utils.basic        import numcpu, loop_items 
+from   ostap.utils.utils        import splitter
 from   ostap.utils.progress_bar import progress_bar
 import ROOT, sys, warnings  
 # =============================================================================
@@ -374,8 +379,210 @@ def run ( self , NN , silent = False ) :
                 counter += result 
         # 
         return counter
+
+# =============================================================================
+## Short labels for various statitical estimators 
+Labels = {
+    'KS' : 'Kolmogorov-Smirnov' ,
+    'K'  : 'Kuiper'             ,
+    'AD' : 'Anderson-Darling'   ,
+    'CM' : 'Cramers-von Mises'  ,
+    'ZK' : 'Zhang-ZK'           ,
+    'ZA' : 'Zhang-ZA'           ,
+    'ZC' : 'Zhang-ZC'           ,        
+}
+# ============================================================================
+## @class Estimators
+#  Helper mixin class to format the table of estimators 
+class Estimators(object) :
+    """ Helper mixin class to format the table of estimators 
+    """
+    # ==========================================================================
+    ## Print the summary as Table
+    def table ( self , title = '' , prefix = '' , width = 5 , precision = 3 ) :
+        """ Print the summary as Table
+        """
+        import ostap.logger.table  as     T 
+        from   ostap.logger.pretty import pretty_float
+        ## 
+        rows = [ ( 'Statistics' , 'Value' ) ]
+        for label , value  in loop_items ( self.estimators ) :
+            
+            the_label = Labels.get ( label , label )
+
+            result , expo = pretty_float ( value , width = width , precision = precision )
+
+            if expo : row = the_label , result , '10^%+d' % expo
+            else    : row = the_label , result 
+        
+        title = title if title else 'Goodness of 1D-fit' 
+        return T.table ( rows , title = title , prefix = prefix , alignment = 'lcl' )
+
+# ============================================================================
+## @class Summary
+#  Helper mixin class to keep (and format) the statistics
+class Summary(object) :
+    """ Helper mixin class to keep (and format) the statistics
+    """
+    # =============================================================================
+    KS_keys = 'ks' , 'kolmogorov' , 'kolmogorovsmirnov' 
+    K_keys  = 'k'  , 'kuiper'  
+    AD_keys = 'ad' , 'andersen'   , 'andersendarling' 
+    CM_keys = 'cm' , 'cramer'     , 'cramervonmises' 
+    ZK_keys = 'zk' , 'zhangk'     , 'zhangzk'
+    ZA_keys = 'za' , 'zhanga'     , 'zhangza'
+    ZC_keys = 'zc' , 'zhangc'     , 'zhangzc'
+    # =========================================================================
+    ## result of toys/permutations 
+    Result = namedtuple ( 'Result' , 'statistics counter pvalue nsigma' )
+    # =========================================================================
+    ## Helper method to get result
+    def result ( self , label ) :
+        """ Helper method to get result 
+        """
+        if not label in self.estimators : return None
+        if not label in self.ecdfs      : return None
+        if not label in self.counters   : return None
+        ##
+        value   = self.estimators   [ label ]
+        ecdfs   = self.ecdfs        [ label ] 
+        counter = self.counters     [ label ] 
+        ##
+        pvalue = ecdfs. estimate ( value )
+        nsigma = significance ( pvalue )
+        return self.Result ( value   ,
+                             counter ,
+                             pvalue  ,
+                             nsigma  )
+    # =========================================================================
+    ## format a row in the summary table
+    def row  ( self , what , result , width = 5 , precision = 3 ) :
+        """ Format a row in the sumamry table
+        """
+        value      = result.statistics
+        counter    = result.counter
+        pvalue     = result.pvalue
+        nsigma     = result.nsigma
+        
+        mean       = counter.mean   ()
+        rms        = counter.rms    () 
+        vmin, vmax = counter.minmax () 
+        
+        mxv = max ( abs ( value ) , abs ( mean.value() ) , mean.error() , rms , abs ( vmin )  , abs ( vmax ) ) 
+        
+        from   ostap.logger.pretty import fmt_pretty_ve         
+        fmt, fmtv , fmte , expo = fmt_pretty_ve ( VE ( mxv ,  mean.cov2() ) ,
+                                                  width       = width       ,
+                                                  precision   = precision   , 
+                                                  parentheses = False       )
+        
+        if expo : scale = 10**expo
+        else    : scale = 1
+        fmt2 = '%s/%s' % ( fmtv , fmtv ) 
+        
+        return ( what  ,
+                 fmtv  %  ( value / scale )                    ,
+                 ( mean / scale ).toString ( fmt )             ,
+                 fmtv  %  ( rms  / scale )                     ,
+                 fmt2  %  ( vmin / scale , vmax / scale )      ,
+                 ( '10^%+d' % expo  if expo else '' )          ,                  
+                 ( 100 * pvalue ) .toString ( '%.2f +/- %-.2f' ) , 
+                 ( nsigma       ) .toString ( '%.1f +/- %-.1f' ) )
     
-    
+    # =========================================================================
+    ## Make a summary table
+    def table ( self , title = '' , prefix = '' , width = 5 , precision = 3 ) :
+        """ Make a summary table
+        """
+        import ostap.logger.table  as     T                 
+        rows = [ ( 'Statistics' , 'value' , 'mean' , 'rms' , 'min/max' , 'factor' , 'p-value [%]' , '#sigma' ) ]
+        
+        for label in self.ecdfs :
+            
+            result  = self.result ( label )
+            if not result : continue
+
+            the_label = Labels.get ( label , label )
+            row = self.row ( the_label , result , width = width , precision = precision )
+            rows.append ( row ) 
+
+        ## skip empty columns        
+        has_expo = False 
+        for row in rows[1:] :
+            r = list ( row )
+            if r[-3] :
+                has_expo = True
+                break
+
+        if not has_expo :
+            new_rows = []
+            for row in rows :
+                r = list ( row )
+                del r [ -3 ]
+                new_rows.append ( r ) 
+            rows = new_rows 
+            
+        if   not title and self.nToys :
+            title = 'Goodness of 1D-fit with #%d toys' % self.nToys  
+        elif not title :
+            title = 'Goodness of 1D-fit'
+        
+        return T.table ( rows , title = title , prefix = prefix , alignment = 'lccccccccccc' )
+
+    # =========================================================================
+    ## Draw fit CDF & empirical ECDF 
+    def draw  ( self , what , opts = '' , *args , **kwargs ) :
+        """ Draw fit CDF & empirical CDF
+        """
+        key = cidict_fun ( what ) 
+        if   key in self.KS_keys and 'KS' in self.ecdfs :             
+            result = self.result ( 'KS' )
+            ecdf   = self.ecdfs  [ 'KS' ]
+            logger.info ( 'Toy resuls for Kolmogorov-Smirnov estimate' ) 
+        elif key in self.K_keys  and 'K'  in self.ecdfs : 
+            result = self.result ( 'K' )
+            ecdf   = self.ecdfs  [ 'K' ]
+            logger.info ( 'Toy resuls for Kuiper estimate' ) 
+        elif key in self.AD_keys and 'AD' in self.ecdfs :             
+            result = self.result ( 'AD' )
+            ecdf   = self.ecdfs  [ 'AD' ]
+            logger.info ( 'Toy resuls for Anderson-Darling estimate' ) 
+        elif key in self.CM_keys  and 'CM' in self.ecdfs : 
+            result = self.result  ( 'CM' )
+            ecdf   = self.ecdfs   [ 'CM' ]
+            logger.info ( 'Toy resuls for Cramer-von Mises  estimate' ) 
+        elif key in self.ZK_keys  and 'ZK' in self.ecdfs : 
+            result = self.result  ( 'ZK' )
+            ecdf   = self.ecdfs   [ 'ZK' ]
+            logger.info ( 'Toy resuls for ZK estimate' ) 
+        elif key in self.ZA_keys  and 'ZA' in self.ecdfs :  
+            result = self.result  ( 'ZA' )
+            ecdf   = self.ecdfs   [ 'ZA' ]
+            logger.info ( 'Toy resuls for ZK estimate' ) 
+        elif key in self.ZC_keys and 'ZC' in self.ecdfs : 
+            result = self.result  ( 'ZC' )
+            ecdf   = self.ecdfs   [ 'ZC' ]
+            logger.info ( 'Toy resuls for ZK estimate' ) 
+        else :
+            raise KeyError (  "draw: Invalid `what`:%s" % what )
+            
+        xmin , xmax = ecdf.xmin () , ecdf.xmax ()
+        value     = result.statistics
+        xmin      = min ( xmin , value )
+        xmax      = max ( xmax , value )
+        xmin , xmax  = axis_range ( xmin , xmax , delta = 0.10 )
+
+        kwargs [ 'xmin' ] = kwargs.get ( 'xmin' , xmin ) 
+        kwargs [ 'xmax' ] = kwargs.get ( 'xmax' , xmax )
+        result    = ecdf.draw  ( opts , *args , **kwargs ) 
+        line      = ROOT.TLine ( value , 0 , value , 1 )
+        ## 
+        line.SetLineWidth ( 4 ) 
+        line.SetLineColor ( 8 ) 
+        line.draw()
+        ## 
+        return result, line  
+
 # =============================================================================
 if '__main__' == __name__ :
     
diff --git a/ostap/stats/tests/test_stats_2samples.py b/ostap/stats/tests/test_stats_2samples.py
new file mode 100644
index 00000000..4decac03
--- /dev/null
+++ b/ostap/stats/tests/test_stats_2samples.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# ==============================================================================
+# @file ostap/stats/tests/test_stats_gof1d.py 
+# Test Goddness-of-fits for 1D fits 
+# Copyright (c) Ostap developpers.
+# ==============================================================================
+""" Test Goddness-of-fits for 1D fits 
+"""
+# ==============================================================================
+from   ostap.stats.twosamples import TSTest, TSToys 
+from   ostap.core.meta_info   import python_info 
+from   ostap.plotting.canvas  import use_canvas
+from   ostap.logger.pretty    import pretty_float
+from   ostap.utils.utils      import vrange 
+from   ostap.math.math_ve     import significance
+import ostap.logger.table     as     T
+import ROOT, random, array    
+# ==============================================================================
+from   ostap.logger.logger import getLogger 
+if '__main__' ==  __name__ : logger = getLogger ( 'tests_stats_2samples' )
+else                       : logger = getLogger ( __name__            )
+# ==============================================================================
+N1  = 200
+N2  = 200
+
+ds1 = array.array ( 'd' , ( random.gauss ( 0   , 1   ) for i in range ( N1 ) ) ) ## Gaussian 
+ds2 = array.array ( 'd' , ( random.gauss ( 0   , 1   ) for i in range ( N2 ) ) ) ## the same  Gaussian 
+ds3 = array.array ( 'd' , ( random.gauss ( 0.5 , 1   ) for i in range ( N2 ) ) ) ## different Gaussian 
+ds4 = array.array ( 'd' , ( random.gauss ( 0   , 1.3 ) for i in range ( N2 ) ) ) ## different Gaussian 
+
+# =============================================================================
+def test_2samples_good () :
+    
+    logger = getLogger ( 'test_2samples_good' ) 
+
+    test   = TSTest ( ds1 , ds2 )
+    title  = 'Test good samples'
+    logger.info ( '%s:\n%s' % ( title , test.table ( title = title , prefix = '# ' ) ) )
+
+    toys   = TSToys ( ds1 , ds2 )
+    title  = 'Toys good samples'
+    logger.info ( '%s:\n%s' % ( title , toys.table ( title = title , prefix = '# ' ) ) )
+
+    with use_canvas ( 'test_2samples_good/test' , wait = 3 ) : test.draw() 
+    with use_canvas ( 'test_2samples_good/toys' , wait = 3 ) : toys.draw() 
+
+    
+# =============================================================================
+def test_2samples_same () :
+    
+    logger = getLogger ( 'test_2samples_same' ) 
+    
+    test   = TSTest ( ds1 , ds1 )                   
+    title  = 'Test same samples'
+    logger.info ( '%s:\n%s' % ( title , test.table ( title = title , prefix = '# ' ) ) )
+
+    toys   = TSToys ( ds1 , ds1 )
+    title  = 'Toys same samples'
+    logger.info ( '%s:\n%s' % ( title , toys.table ( title = title , prefix = '# ' ) ) )
+
+    with use_canvas ( 'test_2samples_same/test' , wait = 3 ) : test.draw() 
+    with use_canvas ( 'test_2samples_same/toys' , wait = 3 ) : toys.draw() 
+    
+# =============================================================================
+def test_2samples_bad1 () :
+    
+    logger = getLogger ( 'test_2samples_bad1'  ) 
+    
+    test   = TSTest ( ds1 , ds3 )
+    title  = 'Test bad/1 samples'
+    logger.info ( '%s:\n%s' % ( title , test.table ( title = title , prefix = '# ' ) ) )
+
+    toys   = TSToys ( ds1 , ds3 )
+    title  = 'Toys bad/1 samples'
+    logger.info ( '%s:\n%s' % ( title , toys.table ( title = title , prefix = '# ' ) ) )
+    
+    with use_canvas ( 'test_2samples_bad1/test' , wait = 3 ) : test.draw() 
+    with use_canvas ( 'test_2samples_bad1/toys' , wait = 3 ) : toys.draw() 
+    
+# =============================================================================
+def test_2samples_bad2 () :
+    
+    logger = getLogger ( 'test_2samples_bad2'  ) 
+    
+    test   = TSTest ( ds1 , ds4  )
+    title  = 'Test bad/2 samples'
+    logger.info ( '%s:\n%s' % ( title , test.table ( title = title , prefix = '# ' ) ) )
+
+    toys   = TSToys ( ds1 , ds4 )
+    title  = 'Toys bad/2 samples'
+    logger.info ( '%s:\n%s' % ( title , toys.table ( title = title , prefix = '# ' ) ) )
+    
+    with use_canvas ( 'test_2samples_bad2/test' , wait = 3 ) : test.draw() 
+    with use_canvas ( 'test_2samples_bad2/toys' , wait = 3 ) : toys.draw() 
+
+# ===============================================================================
+if '__main__' == __name__ :
+
+    test_2samples_good ()
+    test_2samples_same ()
+    test_2samples_bad1 ()
+    test_2samples_bad2 ()
+    
+# ===============================================================================
+##                                                                        The END 
+# ===============================================================================
+                   
diff --git a/ostap/stats/twosamples.py b/ostap/stats/twosamples.py
new file mode 100644
index 00000000..11c543b5
--- /dev/null
+++ b/ostap/stats/twosamples.py
@@ -0,0 +1,625 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# =============================================================================
+## @file ostap/stats/twosamples.py
+#  Two-Sample Tests
+#  @see https://www.jstor.org/stable/25471118
+#  @author Vanya BELYAEV Ivan.Belyaev@cern.ch
+#  @date   2024-10-16
+# =============================================================================
+""" Two-Sample Tests
+ - see https://www.jstor.org/stable/25471118
+"""
+# =============================================================================
+__version__ = "$Revision$"
+__author__  = "Vanya BELYAEV Ivan.Belyaev@cern.ch"
+__date__    = "2024-09-16"
+__all__     = (
+)
+# =============================================================================
+from   ostap.core.meta_info   import root_info 
+from   ostap.core.ostap_types import listlike_types 
+from   collections            import defaultdict
+from   ostap.core.core        import SE, VE, Ostap
+from   ostap.math.base        import doubles, axis_range  
+from   ostap.math.models      import f1_draw 
+from   ostap.stats.gof_utils  import Estimators,Summary
+import ostap.fitting.ds2numpy 
+import ostap.fitting.roofit
+import ROOT, math, array   
+# =============================================================================
+# logging 
+# =============================================================================
+from ostap.logger.logger import getLogger 
+if '__main__' ==  __name__ : logger = getLogger( 'ostap.stats.twosamples' )
+else                       : logger = getLogger( __name__ )
+# =============================================================================
+logger.debug ( 'Two-sample test' )
+# =============================================================================
+try : # =======================================================================
+    # =========================================================================
+    import numpy as np
+    # =========================================================================
+except ImportError : # ========================================================
+    # =========================================================================
+    np = None
+# =============================================================================
+if  ( 6 , 32 ) <= root_info : data2vct = lambda s : s
+else                        : data2vct = lambda s : doubles ( s )
+# =============================================================================
+## transform data into empirical CDF
+#  @code
+#  data = ...
+#  cdf  = ecdf_from_data ( data ) 
+#  @endcode 
+#  @param data input dataset 
+#  @return empirical cumulative distribution function 
+def ecdf_from_data  ( data ) :
+    """ Transform data into empirical CDF 
+    - data : input dataset 
+    >>> data = ...
+    >>> cdf  = ecdf_from_data ( data ) 
+    """
+    if   isinstance  ( data , Ostap.Math.ECDF  ) : return data 
+    elif np and isinstance ( data , np.ndarray ) : return Ostap.Math.ECDF ( data2vct ( data ) ) 
+    elif isinstance  ( data , array.array      ) : return Ostap.Math.ECDF ( data2vct ( data ) ) 
+    elif isinstance  ( data1 , listlike_types  ) : return Ostap.Math.ECDF ( doubles  ( data ) ) 
+    ## 
+    raise TypeError ( "ecdf_from_data: Unsupported `data' type: %s" % type ( data1 ).__name__ )
+# ===============================================================================
+## Prepare data
+#  @code
+#  data1 = ...
+#  data2 = ...
+#  ecdf1 , ecdf2 , ecdf = prepare_data1 ( data1 , data2 ) 
+#  @endcode
+#  @param data1 the 1st    dataset
+#  @param data2 the second dataset
+#  @param pooled (optional) the CDF for pooled data 
+#  @return triplet of empiricla CDFs for the 1st, 2nd and pooled datasets 
+def prepare_data1 ( data1 , data2 , pooled = None ) :
+    """ Prepare data
+    >>> data1 = ...
+    >>> data2 = ...
+    >>> ecdf1 , ecdf2 , ecdf = prepare_data1 ( data1 , data2 ) 
+    - data1  : the 1st    dataset
+    - data2  : the second dataset
+    - pooled : (optiomal) the CDF for pooled data 
+    - return a triplet of empirical CDFs for the 1st, 2nd and pooled datasets 
+    """
+    ecdf1 = ecdf_from_data ( data1 )
+    ecdf2 = ecdf_from_data ( data2 )
+    ##
+    n1    = len ( ecdf1 )
+    n2    = len ( ecdf2 )
+    ##
+    ecdf  = pooled if ( isinstance ( pooled , Ostap.Math.ECDF ) and n1 + n2 == len ( pooled ) ) else ecdf1 + ecdf2
+    ## 
+    return ecdf1 , ecdf2 , ecdf
+# =============================================================================
+## Prepare data
+#  @code
+#  data1 = ...
+#  data2 = ...
+#  ecdf1 , ecdf2 , ecdf , ranks1 , ranks2 = prepare_data2 ( data1 , data2 ) 
+#  @endcode
+#  @param data1 the 1st    dataset
+#  @param data2 the second dataset
+#  @param pooled (optional) the CDF for pooled data
+#  @param ranks1 (optional) array of ranks of elements from the pooled sample in the 1st dataset
+#  @param ranks2 (optional) array of ranks of elements from the pooled sample in the 2nd dataset
+#  @return quintet of empirical CDFs for the 1st, 2nd and pooled datasets and two ransk arrays 
+def prepare_data2 ( data1 , data2 , pooled = None , ranks1 = None , ranks2 = None ) :
+    """ Prepare data
+    >>> data1 = ...
+    >>> data2 = ...
+    >>> ecdf1 , ecdf2 , ecdf , ranks1 , ranks2 = prepare_data2 ( data1 , data2 ) 
+    - data1  : the 1st    dataset
+    - data2  : the second dataset
+    - pooled : (optional) the CDF for pooled data
+    - ranks1 : (optional) array of ranks of elements from the pooled sample in the 1st dataset
+    - ranks2 : (optional) array of ranks of elements from the pooled sample in the 2nd dataset
+    - return quintet of empirical CDFs for the 1st, 2nd and pooled datasets and two ransk arrays 
+    """
+    ##
+    ecdf1 , ecdf2 , ecdf = prepare_data1 ( data1 , data2 , pooled )
+    ##
+    n1 = len ( ecdf1 )
+    n2 = len ( ecdf2 )
+    ## 
+    ok1 = isinstance ( ranks1 , listlike_types ) and n1 + n2 == len ( ranks1 )
+    if not ok1 :
+        r1     = ecdf1.ranks ( ecdf )            
+        ranks1 = np.asarray  ( r1 , dtype = np.dtype ( 'uint16' ) ) if np else array.array ( 'I' , r1 )
+        ## 
+    ok2 = isinstance ( ranks2 , listlike_types ) and n1 + n2 == len ( ranks2 )
+    if not ok2 :
+        r2     = ecdf2.ranks ( ecdf )            
+        ranks2 = np.asarray  ( r2 , dtype = np.dtype ( 'uint16' ) ) if np else array.array ( 'I' , r2 )
+    ## 
+    return ecdf1, ecdf2, ecdf, ranks1 , ranks2
+
+# =============================================================================
+## Get Kolmogorov-Smirnov statistics KS
+#  @code
+#  ecdf1 =...
+#  ecdf2 =...
+#  ks = kolmogorov_smirnov ( ecdf1, ecdf2 )
+#  @endcode
+#  @param data1  the 1st dataset or empirical CDF
+#  @param data2  the 2nd dataset or empirical CDF 
+#  @param pooled (optional) pooled dataset or empirical CDF
+#  @return Kolmogorov-Smirnov statistics KS
+def kolmogorov_smirnov ( data1 , data2 , pooled = None ) :
+    """ Get Kolmogorov-Smirnov statistics  KS
+    - data1  : the 1st dataset or empirical CDF 
+    - data2  : the 2nd dataset or empirical CDF 
+    - pooled : (optional) pooled dataset or CDF
+    """
+    ecdf1, ecdf2, ecdf = prepare_data1 ( data1 , data2 , pooled ) 
+    return max ( abs ( ecdf1 ( x ) - ecdf2 ( x ) ) for x in ecdf )
+# ================================================================================
+## Get Kuiper statistics K
+#  @code
+#  ecdf1 =...
+#  ecdf2 =...
+#  k  = kuiper ( ecdf1, ecdf2 )
+#  @endcode
+#  @param data1  the 1st dataset or empirical CDF
+#  @param data2  the 2nd dataset or empirical CDF 
+#  @param pooled (optional) pooled dataset or empirical CDF
+#  @return Kuiper statistics K
+def kuiper ( data1 , data2 , pooled = None  ) :
+    """ Get kuiper statistics K
+    - data1  : the 1st dataset or empirical CDF 
+    - data2  : the 2nd dataset or empirical CDF 
+    - pooled : (optional) pooled dataset or CDF
+    """
+    ecdf1, ecdf2, ecdf = prepare_data1 ( data1 , data2 , pooled ) 
+    return max ( ecdf1 ( x ) - ecdf2 ( x ) for x in ecdf ) - min ( ecdf1 ( x ) - ecdf2 ( x ) for x in ecdf ) 
+# =============================================================================
+## Get ZK statistics ZK
+#  @see https://www.jstor.org/stable/25471118
+#  @code
+#  ecdf1 =...
+#  ecdf2 =...
+#  zk  = ZK ( ecdf1, ecdf2 )
+#  @endcode
+#  @param data1  the 1st dataset or empirical CDF
+#  @param data2  the 2nd dataset or empirical CDF 
+#  @param pooled (optional) pooled dataset or empirical CDF
+#  @param ranks1 (optional) ranks of the elements from the pooled data in the 1st dataset
+#  @param ranks2 (optional) ranks of the elements from the pooled data in the 2nd dataset
+#  @return ZK statistics ZK 
+def ZK  ( data1 , data2 , pooled = None , ranks1 = None  , ranks2 = None  ) :
+    """ Get ZK statistics 
+    - data1  : the 1st dataset or empirical CDF 
+    - data2  : the 2nd dataset or empirical CDF 
+    - pooled : (optional) pooled dataset or CDF
+    - ranks1 : (optional) ranks of the elements from the pooled data in the 1st dataset
+    - ranks2 : (optional) ranks of the elements from the pooled data in the 2nd dataset
+    - see https://www.jstor.org/stable/25471118
+    """
+    ## transform input (if needed) 
+    ecdf1, ecdf2, ecdf, ranks1, ranks2 = prepare_data2 ( data1 , data2 , pooled , ranks1 , ranks2 ) 
+    
+    n1   = len ( ecdf1 )
+    n2   = len ( ecdf2 )
+    n    = n1 + n2
+
+    flog = math.log 
+    def term ( k ) :
+
+        r1  = ranks1 [ k ]
+        r2  = ranks2 [ k ]
+        
+        r1  = max ( 0.5 , min ( n1 - 0.5 , r1 ) ) ## r1 + 0.5 ??
+        r2  = max ( 0.5 , min ( n2 - 0.5 , r2 ) ) ## r2 + 0.5 ?? 
+        
+        fk  = 0.5 / n if not k else float ( k ) / n  
+        f1k = float ( r1 ) / n1
+        f2k = float ( r2 ) / n2
+        
+        rr  = n1 * ( f1k * flog ( f1k / fk ) + ( 1 - f1k ) * flog ( ( 1. - f1k ) / ( 1. - fk ) ) )
+        rr += n2 * ( f2k * flog ( f2k / fk ) + ( 1 - f2k ) * flog ( ( 1. - f2k ) / ( 1. - fk ) ) ) 
+        
+        return rr
+    
+    return max ( term ( k ) for k in range ( n ) )
+
+# =============================================================================
+## Get ZA statistics ZA
+#  @see https://www.jstor.org/stable/25471118
+#  @code
+#  ecdf1 =...
+#  ecdf2 =...
+#  za  = ZA ( ecdf1, ecdf2 )
+#  @endcode
+#  @param data1  the 1st dataset or empirical CDF
+#  @param data2  the 2nd dataset or empirical CDF 
+#  @param pooled (optional) pooled dataset or empirical CDF
+#  @param ranks1 (optional) ranks of the elements from the pooled data in the 1st dataset
+#  @param ranks2 (optional) ranks of the elements from the pooled data in the 2nd dataset
+#  @return ZA statistics ZA 
+def ZA ( data1 , data2 , pooled = None , ranks1 = None , ranks2 = None ) :
+    """ Get ZA statistics 
+    - data1  : the 1st dataset or empirical CDF 
+    - data2  : the 2nd dataset or empirical CDF 
+    - pooled : (optional) pooled dataset or CDF
+    - ranks1 : (optional) ranks of the elements from the pooled data in the 1st dataset
+    - ranks2 : (optional) ranks of the elements from the pooled data in the 2nd dataset
+    - see https://www.jstor.org/stable/25471118
+    """
+    ## transform input (if needed) 
+    ecdf1, ecdf2, ecdf, ranks1, ranks2 = prepare_data2 ( data1 , data2 , pooled , ranks1 , ranks2 ) 
+
+    n1   = len ( ecdf1 )
+    n2   = len ( ecdf2 )
+    n    = n1 + n2
+    
+    flog = math.log 
+    def term ( k ) :
+
+        r1  = ranks1 [ k ]
+        r2  = ranks2 [ k ]
+        
+        r1  = max ( 0.5 , min ( n1 - 0.5 , r1 ) ) ## r1 + 0.5 ??
+        r2  = max ( 0.5 , min ( n2 - 0.5 , r2 ) ) ## r2 + 0.5 ?? 
+        
+        fk  = 0.5 / n if not k else float ( k ) / n  
+        f1k = float ( r1 ) / n1
+        f2k = float ( r2 ) / n2
+
+        rr  = n1 * ( f1k * flog ( f1k ) + ( 1 - f1k ) * flog ( 1. - f1k ) ) / ( ( k + 0.5 ) * ( n - k - 0.5 ) )
+        rr += n2 * ( f2k * flog ( f2k ) + ( 1 - f2k ) * flog ( 1. - f2k ) ) / ( ( k + 0.5 ) * ( n - k - 0.5 ) )
+        
+        return rr
+    
+    return sum ( term ( k ) for k in range ( n ) )
+
+# =============================================================================
+## Get ZC statistics ZC
+#  @see https://www.jstor.org/stable/25471118
+#  @code
+#  ecdf1 =...
+#  ecdf2 =...
+#  zc  = ZC ( ecdf1, ecdf2 )
+#  @endcode
+#  @param data1  the 1st dataset or empirical CDF
+#  @param data2  the 2nd dataset or empirical CDF 
+#  @param pooled (optional) pooled dataset or empirical CDF
+#  @param ranks1 (optional) ranks of the elements from the pooled data in the 1st dataset
+#  @param ranks2 (optional) ranks of the elements from the pooled data in the 2nd dataset
+#  @return ZA statistics ZA 
+def ZC ( data1 , data2 , pooled = None , ranks1 = None , ranks2 = None ) :
+    """ Get ZC statistics 
+    - data1  : the 1st dataset or empirical CDF 
+    - data2  : the 2nd dataset or empirical CDF 
+    - pooled : (optional) pooled dataset or CDF
+    - ranks1 : (optional) ranks of the elements from the pooled data in the 1st dataset
+    - ranks2 : (optional) ranks of the elements from the pooled data in the 2nd dataset
+    - see https://www.jstor.org/stable/25471118
+    """
+    ## transform input (if needed) 
+    ecdf1, ecdf2, ecdf, ranks1, ranks2 = prepare_data2 ( data1 , data2 , pooled , ranks1 , ranks2 ) 
+
+    n1  = len ( ecdf1 )
+    n2  = len ( ecdf2 )
+    n   = n1 + n2
+    
+    flog = math.log 
+    def term1 ( j ) :
+        r1  = ranks1 [ j ]
+        return flog ( n1 / ( j + 0.5 ) - 1 ) * flog ( n / ( r1 + 0.5 ) - 1 )
+    
+    def term2 ( j ) :
+        r2  = ranks2 [ j ]
+        return flog ( n2 / ( j + 0.5 ) - 1 ) * flog ( n / ( r2 + 0.5 ) - 1 )
+    
+    t1 = sum ( term1 ( j ) for j in range ( n1 ) )
+    t2 = sum ( term2 ( j ) for j in range ( n2 ) )
+
+    return ( t1 + t2 ) / n 
+
+# =============================================================================
+## @class TSTest
+#  Two-Sample Test
+class TSTest(Estimators):
+    """ Two-sample test 
+    """
+    def __init__ ( self  ,
+                   data1 ,
+                   data2 ) :
+        
+        ## transform input data 
+        ecdf1, ecdf2, ecdf, ranks1, ranks2 = prepare_data2 ( data1 , data2 ) 
+        
+        self.__ecdf1  = ecdf1 
+        self.__ecdf2  = ecdf2
+        self.__ecdf   = ecdf 
+        self.__ranks1 = ranks1 
+        self.__ranks2 = ranks2 
+        
+        self.__estimators = {            
+            'KS' : kolmogorov_smirnov ( self.ecdf1 , self.ecdf2 , self.ecdf ) , 
+            'K'  : kuiper             ( self.ecdf1 , self.ecdf2 , self.ecdf ) , 
+            'ZK' : ZK                 ( self.ecdf1 , self.ecdf2 , self.ecdf , self.ranks1 , self.ranks2 ) , 
+            'ZA' : ZA                 ( self.ecdf1 , self.ecdf2 , self.ecdf , self.ranks1 , self.ranks2 ) , 
+            'ZC' : ZC                 ( self.ecdf1 , self.ecdf2 , self.ecdf , self.ranks1 , self.ranks2 ) , 
+        }
+        
+    @property
+    def ecdf1 ( self ) :
+        """`ecdf1` : empirical CDF for the 1st dataset
+        """
+        return self.__ecdf1 
+    @property
+    def ecdf2 ( self ) :
+        """`ecdf2` : empirical CDF for the 2nd dataset
+        """
+        return self.__ecdf2 
+    @property
+    def ecdf  ( self ) :
+        """`ecdf` : empirical CDF for combined dataset
+        """
+        return self.__ecdf
+    @property
+    def ranks1 ( self ) :
+        """ ranks of elements from pooled sample in the 1st dataset"""
+        return self.__ranks1
+    @property
+    def ranks2 ( self ) :
+        """ ranks of elements from pooled sample in the 2nd dataset"""
+        return self.__ranks2
+
+    # =========================================================================
+    @property
+    def estimators ( self ) :
+        """`estimators` : get a dictionary of all estimators"""
+        return self.__estimators
+
+    # =========================================================================
+    ## Get Kolmogorov-Smirnov statistiscs 
+    @property 
+    def kolmogorov_smirnov_estimator ( self ) :
+        """ Get Kolmogorov-Smirnov statistiscs KS
+        """
+        return self.estimators.get ( 'KS' , None ) 
+
+    # =========================================================================
+    ## Get Kuiper  statististics 
+    @property 
+    def kuiper_estimator ( self ) :
+        """ Get Kuiperstatistics
+        """        
+        return self.estimators.get ( 'K' , None ) 
+
+    # =========================================================================
+    ## Get ZK statististics 
+    @property 
+    def ZK_estimator  ( self ) :
+        """ Get ZK statistics
+        """
+        return self.estimators.get ( 'ZK' , None ) 
+        
+    # =========================================================================
+    ## Get ZA statististics 
+    @property 
+    def ZA_estimator  ( self ) :
+        """ Get ZA statistics
+        """        
+        return self.estimators.get ( 'ZA' , None ) 
+    
+    # =========================================================================
+    ## Get ZC statististics 
+    @property 
+    def ZC_estimator ( self ) :
+        """ Get ZC statistics
+        """        
+        return self.estimators.get ( 'ZC' , None ) 
+    
+    __repr__ = Estimators.table
+    __str__  = Estimators.table
+    
+    # =========================================================================
+    def ks_acccept ( self , alpha ) :
+
+        assert 0 < alpha < 1 , 'Invalid "alpha"%s' % alpha
+        c_alpha = math.sqrt ( -1 * math.log ( alpha / 2. ) / 2. ) 
+        n   = len ( self.ecdf1 )
+        m   = len ( self.ecdf2 )
+        d   = self.__KS_val
+        return d > c_alpha * math.sqrt ( ( n + m ) / ( n * m ) ) 
+
+    # =========================================================================
+    def ks_alpha ( self ) :
+        
+        n = len ( self.ecdf1 )
+        m = len ( self.ecdf2 )
+        d = self.__KS_val
+        c = d / math.sqrt ( ( n + m ) / ( n * m ) ) 
+        return 2 * math.exp ( -2 * c * c )
+
+    # =========================================================================
+    ## draw all involved empirical CDFS
+    def draw  ( self , opts = '' , *args , **kwargs ) :
+        """ Draw fit CDF & empirical CDF
+        """
+
+        ecdf1 = self.ecdf1
+        ecdf2 = self.ecdf2
+        ecdf  = self.ecdf
+
+        xmin  , xmax  = ecdf .xmin (), ecdf .xmax()
+        xmin1 , xmax1 = ecdf1.xmin (), ecdf1.xmax()
+        xmin2 , xmax2 = ecdf1.xmin (), ecdf1.xmax()
+
+        xmin = min ( xmin , xmin1 , xmin2 )
+        xmax = max ( xmax , xmax1 , xmax2 )
+
+        xmin , xmax = axis_range ( xmin , xmax , delta = 0.05 )
+
+        xmin = kwargs.pop ( 'xmin' , xmin )
+        xmax = kwargs.pop ( 'xmax' , xmax )
+        
+        r1 = ecdf.draw ( opts                        , color = 2 , linewidth = 3 , xmin = xmin , xmax = xmax , **kwargs )
+        r2 = ecdf.draw ( '%s %s' % ( 'same' , opts ) , color = 4 , linewidth = 3 , xmin = xmin , xmax = xmax , **kwargs )
+        r  = ecdf.draw ( '%s %s' % ( 'same' , opts ) , color = 8 , linewidth = 3 , xmin = xmin , xmax = xmax , **kwargs )
+        
+        return r
+
+# =============================================================================
+## @class TSToys
+#  Two-Sample Test
+class TSToys(TSTest,Summary):
+    """ Two-sample test 
+    """
+    def __init__ ( self           ,
+                   data1          ,
+                   data2          ,
+                   nToys  = 100   , 
+                   silent = False ) :
+
+        ## initialize the base
+        TSTest.__init__ ( self , data1 , data2 )
+
+        self.__counters = defaultdict(SE) 
+        self.__ecdfs    = {} 
+        self.__silent   = True if silent else False
+        
+        self.__nToys  = 0 
+        if 0 < nToys : self.run ( nToys , silent = silent ) 
+    
+    # =========================================================================
+    ## number of toys/permutations 
+    @property
+    def nToys ( self ) :
+        """`nToys` : number of toys/permutations"""
+        return self.__nToys
+    
+    # =========================================================================
+    ## ECDFs
+    @property 
+    def ecdfs ( self ) :
+        """`ecdfs` : toys/permutations results as empirical cumulative distribution functions"""
+        return self.__ecdfs
+    
+    # =========================================================================
+    ## Counters  
+    @property 
+    def counters ( self ) :
+        """`counters` : toy results as counters"""
+        return self.__counters
+    
+    # ===============================================================================
+    ## run toys 
+    def run ( self , nToys = 1000 , silent = False ) :
+        """ Run toys 
+        """ 
+        assert isinstance ( nToys , int ) and 0 < nToys , "Invalid `nToys` argument!"
+
+        n1   = len ( self.ecdf1 )
+        n2   = len ( self.ecdf1 )
+        
+        data = np.asarray ( self.ecdf.data () , dtype = float )
+        
+        results  = defaultdict(list)
+        counters = self.counters
+        
+        from ostap.utils.progress_bar import progress_bar 
+        for i in progress_bar ( nToys , silent = silent , description = 'Permutations:') :
+
+            np.random.shuffle ( data )
+            data1 = data [    : n1 ]
+            data2 = data [ n1 :    ] 
+            
+            ## transform input data 
+            ecdf1, ecdf2, ecdf, ranks1, ranks2 = prepare_data2 ( data1 , data2 ) 
+            
+            ks = kolmogorov_smirnov ( ecdf1 , ecdf2 , ecdf )
+            k  = kuiper             ( ecdf1 , ecdf2 , ecdf )
+            zk = ZK                 ( ecdf1 , ecdf2 , ecdf , ranks1 , ranks2 )
+            za = ZA                 ( ecdf1 , ecdf2 , ecdf , ranks1 , ranks2 )
+            zc = ZC                 ( ecdf1 , ecdf2 , ecdf , ranks1 , ranks2 )
+            
+            counters [ 'KS' ] += ks 
+            counters [ 'K'  ] += k 
+            counters [ 'ZK' ] += zk
+            counters [ 'ZA' ] += za
+            counters [ 'ZC' ] += zc
+            
+            results [ 'KS'  ].append ( ks )    
+            results [ 'K'   ].append ( k  ) 
+            results [ 'ZK'  ].append ( zk ) 
+            results [ 'ZA'  ].append ( za ) 
+            results [ 'ZC'  ].append ( zc ) 
+            
+        ## accumulate number of toys 
+        self.__nToys += nToys 
+
+        ECDF = Ostap.Math.ECDF 
+        for key in results :
+            data = results [ key ]
+            if not data : continue
+            if not key in self.__ecdfs : self.__ecdfs [ key ] = ECDF ( data , True ) ## complementary ECDF!
+            else                       : self.__ecdfs [ key ]  .add  ( data2vct ( data ) ) 
+            
+        del results
+
+    # =========================================================================
+    ## Get Kolmogorov-Smirnov statistiscs 
+    @property 
+    def kolmogorov_smirnov ( self ) :
+        """ Get Kolmogorov-Smirnov statistiscs KS
+        """
+        return self.result ( 'KS' ) 
+    
+    # =========================================================================
+    ## Get Kuiper statististics 
+    @property 
+    def kuiper ( self ) :
+        """ Get Kuiper statistics 
+        """        
+        return self.result ( 'K' ) 
+           
+    # =========================================================================
+    ## Get ZK statististics 
+    @property 
+    def ZK  ( self ) :
+        """ Get ZK statistics 
+        """
+        return self.result ( 'ZK' ) 
+        
+    # =========================================================================
+    ## Get ZA statististics 
+    @property 
+    def ZA  ( self ) :
+        """ Get ZA statistics
+        """        
+        return self.result ( 'ZA' ) 
+    
+    # =========================================================================
+    ## Get ZC statististics 
+    @property 
+    def ZC  ( self ) :
+        """ Get ZC statistics 
+        """        
+        return self.result ( 'ZC' ) 
+
+    table    = Summary.table 
+    __repr__ = Summary.table
+    __str__  = Summary.table
+    
+# =============================================================================
+if '__main__' == __name__ :
+    
+    from ostap.utils.docme import docme
+    docme ( __name__ , logger = logger )
+
+# =============================================================================
+##                                                                      The END 
+# =============================================================================
+
+
+
+
diff --git a/source/include/Ostap/ECDF.h b/source/include/Ostap/ECDF.h
index 85abf306..6f12398d 100644
--- a/source/include/Ostap/ECDF.h
+++ b/source/include/Ostap/ECDF.h
@@ -29,8 +29,10 @@ namespace Ostap
     {
     public: 
       // ======================================================================
-      /// the actual type pof data 
-      typedef std::vector<double>  Data ;
+      /// the actual type of data 
+      typedef std::vector<double>           Data    ;
+      /// the actual type of indices 
+      typedef std::vector<Data::size_type>  Indices ;
       // ======================================================================
     public: 
       // ======================================================================
@@ -47,13 +49,14 @@ namespace Ostap
       template <class ITERATOR,
                 typename value_type = typename std::iterator_traits<ITERATOR>::value_type,
                 typename            = std::enable_if<std::is_convertible<value_type,double>::value> >
-      ECDF  ( ITERATOR   begin                 ,
-              ITERATOR   end                   ,
-              const bool complementary = false ) 
+      ECDF
+      ( ITERATOR   begin                 ,
+        ITERATOR   end                   ,
+        const bool complementary = false ) 
         : m_data          ( begin , end   )
         , m_complementary ( complementary ) 
-      { this->sort_me() ; }
-      /// constructoer to create complementary/oridnary ECDF
+      { if ( !std::is_sorted ( m_data.begin() , m_data.end() ) ) { this->sort_me() ; } }
+      /// constructoer to create complementary/ordinary ECDF
       ECDF
       ( const ECDF&  right         ,
         const bool   complementary ) ;
@@ -85,34 +88,34 @@ namespace Ostap
       unsigned long add ( const ECDF&  values ) ;
       /// add a bunch oif values 
       template <class ITERATOR,
-        typename value_type = typename std::iterator_traits<ITERATOR>::value_type,
-        typename            = std::enable_if<std::is_convertible<value_type,double>::value> >
-        unsigned long add
-        ( ITERATOR begin ,
-          ITERATOR end   )
-        {
-          /// copy input data 
-          Data tmp1 ( begin , end ) ;
-          /// sort input data 
-          std::sort ( tmp1.begin  () , tmp1.end  ()  ) ;
-          /// prepare the output 
-          Data tmp2 ( m_data.size () + tmp1.size () )  ;
-          /// merge two sorted continers 
-          std::merge ( m_data.begin() ,
-                       m_data.end  () ,
-                       tmp1.begin  () ,
-                       tmp1.end    () ,
-                       tmp2.begin  () ) ;
-          /// swap the output with own data 
-          std::swap ( m_data , tmp2 ) ;
-          return m_data.size () ;
-        }
+                typename value_type = typename std::iterator_traits<ITERATOR>::value_type,
+                typename            = std::enable_if<std::is_convertible<value_type,double>::value> >
+      unsigned long add
+      ( ITERATOR begin ,
+        ITERATOR end   )
+      {
+        /// copy input data 
+        Data tmp1 ( begin , end ) ;
+        /// sort input data 
+        if ( !std::is_sorted ( tmp1.begin() , tmp1.end() ) ) { std::sort ( tmp1.begin  () , tmp1.end  ()  ) ; }
+        /// prepare the output 
+        Data tmp2 ( m_data.size () + tmp1.size () )  ;
+        /// merge two sorted containers 
+        std::merge ( m_data.begin() ,
+                     m_data.end  () ,
+                     tmp1.begin  () ,
+                     tmp1.end    () ,
+                     tmp2.begin  () ) ;
+        /// swap the result  with own data 
+        std::swap ( m_data , tmp2 ) ;
+        return m_data.size () ;
+      }
       // ======================================================================
     public :
       // ======================================================================
       ECDF& __iadd__ ( const double x ) { add ( x ) ; return *this ; }
       ECDF& __iadd__ ( const Data&  x ) { add ( x ) ; return *this ; }
-      ECDF  __add__  ( const ECDF&  x ) ;      
+      ECDF  __add__  ( const ECDF&  x ) const ;      
       // ======================================================================
       inline ECDF& operator+= ( const double x ) { add ( x ) ; return *this ; }
       inline ECDF& operator+= ( const Data&  x ) { add ( x ) ; return *this ; }
@@ -121,18 +124,33 @@ namespace Ostap
     public:
       // ======================================================================
       /// data size 
-      inline unsigned long N             () const { return m_data.size () ; } 
+      inline Data::size_type N             () const { return m_data.size () ; } 
       /// data size 
-      inline unsigned long size          () const { return m_data.size () ; }
+      inline Data::size_type size          () const { return m_data.size () ; }
       /// access to data
-      inline const Data&   data          () const { return m_data         ; }
+      inline const Data&     data          () const { return m_data         ; }
       // ======================================================================
       /// complementary?
-      inline bool          complementary () const { return m_complementary ; }
+      inline bool            complementary () const { return m_complementary ; }
       /// minimal x-value
-      inline double        xmin          () const { return m_data.front () ; } 
+      inline double          xmin          () const { return m_data.front () ; } 
       /// maximal x-value
-      inline double        xmax          () const { return m_data.back  () ; }
+      inline double          xmax          () const { return m_data.back  () ; }
+      // ======================================================================
+      /// get the abscissa value with the given index 
+      inline double operator[] ( const unsigned int index ) const
+      { return index < m_data.size() ? m_data[index] : m_data.back() ; }
+      // ======================================================================
+    public:
+      // ======================================================================
+      /** number of elements that are less or equal to x
+       *  "rank of x" in the ordered sample
+       */
+      inline Data::size_type rank ( const double x ) const
+      { return std::upper_bound ( m_data.begin () , m_data.end () , x ) - m_data.begin() ; }
+      // ======================================================================
+      /// get ranks for all elements from another sample 
+      Indices ranks ( const ECDF& sample ) const ;
       // ======================================================================
     public:
       // ======================================================================
@@ -146,11 +164,11 @@ namespace Ostap
     private:
       // ======================================================================
       /// container of sorted data 
-      Data m_data           {}       ; // container of sorted data
+      Data m_data          {       } ; // container of sorted data
       /// complementary CDF?
       bool m_complementary { false } ; // complementary CDF ? 
       // ======================================================================
-    };
+    }; //                                The end of the class Ostap::Math::ECDF
     // ========================================================================
     /// swap two objects 
     inline void swap ( ECDF& left , ECDF& right ) { left.swap ( right ) ; }
@@ -158,10 +176,10 @@ namespace Ostap
     inline ECDF operator+( const ECDF& a ,
                            const ECDF& b )
     { ECDF c { a } ; c += b ; return c ;  }
-    // =======================================================================
-  } //                                            end of namespace Ostap::Math
+    // ========================================================================
+  } //                                         The end of namespace Ostap::Math
   // ==========================================================================
-} //                                                     end of namespace Ostap 
+} //                                                 The end of namespace Ostap 
 // ============================================================================
 //                                                                      The END
 // ============================================================================
diff --git a/source/src/ECDF.cpp b/source/src/ECDF.cpp
index 874cb933..1aa079be 100644
--- a/source/src/ECDF.cpp
+++ b/source/src/ECDF.cpp
@@ -26,11 +26,8 @@
 Ostap::Math::ECDF::ECDF
 ( const Ostap::Math::ECDF::Data&  data          ,
   const bool                      complementary )
-  : m_data          ( data          )
-  , m_complementary ( complementary ) 
-{
-  this->sort_me() ;
-}
+  : Ostap::Math::ECDF::ECDF ( data.begin() , data.end() , complementary )
+{}
 // ============================================================================
 // constructoer to create complementary/oridnary ECDF
 // ============================================================================
@@ -68,8 +65,9 @@ double Ostap::Math::ECDF::evaluate   ( const double x ) const
   if      ( x < m_data.front () ) { return m_complementary ? 1.0 : 0.0 ; } 
   else if ( x > m_data.back  () ) { return m_complementary ? 0.0 : 1.0 ; } 
   //
-  const double result = double ( std::upper_bound ( m_data.begin () ,
-                                                    m_data.end   () , x ) - m_data.begin() ) / m_data.size () ;
+  // const double result = double ( std::upper_bound ( m_data.begin () ,
+  //                                                   m_data.end   () , x ) - m_data.begin() ) / m_data.size () ;
+  const double result = double ( rank ( x ) ) / m_data.size () ;
   return m_complementary ? ( 1 - result ) : result ; 
 }
 // ============================================================================
@@ -107,13 +105,15 @@ Ostap::Math::ECDF::add
 unsigned long
 Ostap::Math::ECDF::add
 ( const Ostap::Math::ECDF::Data& values )
-{
-  return  add ( values.begin() , values.end() ) ;
-}
+{ return  add ( values.begin() , values.end() ) ; }
 // ============================================================================
 Ostap::Math::ECDF
-Ostap::Math::ECDF::__add__  ( const Ostap::Math::ECDF&  x )
-{ return (*this) + x ; } 
+Ostap::Math::ECDF::__add__  ( const Ostap::Math::ECDF&  x ) const 
+{
+  ECDF c { *this } ;
+  c.add ( x ) ;
+  return c ;
+} 
 // ============================================================================
 // add a value to data container  
 // ============================================================================
@@ -122,7 +122,7 @@ Ostap::Math::ECDF::add
 ( const Ostap::Math::ECDF& values )
 {
   Data tmp2  ( m_data.size() + values.size() ) ;
-  /// merge two sorted continers 
+  /// merge two sorted containers 
   std::merge ( m_data.begin        () ,
                m_data.end          () ,
                values.m_data.begin () ,
@@ -131,6 +131,28 @@ Ostap::Math::ECDF::add
   std::swap ( m_data , tmp2 ) ;
   return m_data.size () ;
 }
+// ============================================================================
+// get ranks of the elements in the (pooled) sample
+// ============================================================================
+Ostap::Math::ECDF::Indices
+Ostap::Math::ECDF::ranks
+( const Ostap::Math::ECDF& sample ) const
+{
+  const Data::size_type N  = size() ;
+  // fill outptu array with N
+  Indices result ( sample.size () ,  N ) ;
+  Data::size_type NS= sample.size() ;
+  for ( Data::size_type i = 0 ; i < NS ; ++i )
+    {
+      Data::size_type r = rank ( sample.m_data [ i ] ) ;
+      result [ i ] = r ;
+      // try to be a bit more efficient, the rest of array is filled with N 
+      if ( N <= r ) { break ; }
+    }
+  return result ;
+}
+// ============================================================================
+
 // ============================================================================
 //                                                                      The END 
 // ============================================================================