From 5ec8b6cc1b6210b27b8bf0056275427835101a07 Mon Sep 17 00:00:00 2001
From: "Josh L. Espinoza" <jol.espinoz@gmail.com>
Date: Thu, 20 Jul 2023 15:10:21 -0700
Subject: [PATCH] v2023.7.20

---
 CHANGELOG.md                             |  16 ++
 LICENSE.txt => LICENSE                   |  14 -
 README.md                                |  22 +-
 build/lib/compositional/__init__.py      |  45 +---
 build/lib/compositional/compositional.py | 329 ++++++++++++++++++++++-
 compositional.egg-info/PKG-INFO          |   9 +-
 compositional.egg-info/SOURCES.txt       |   3 +-
 compositional/__init__.py                |  45 +---
 devel.txt                                |   5 -
 9 files changed, 375 insertions(+), 113 deletions(-)
 create mode 100644 CHANGELOG.md
 rename LICENSE.txt => LICENSE (77%)
 delete mode 100644 devel.txt

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..f375645
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,16 @@
+#### Changes:
+* [2023..7.20] - Added the following functions:
+	* `assert_acceptable_arguments`
+	* `check_compositional`
+	* `sparsity`
+	* `number_of_components`
+	* `prevalence_of_components`
+	* `transform_closure`
+	* `filter_data_highpass`
+
+* [2022.8.31] - Added support for Python v3.10
+
+#### Future: 
+* Reimplement `plot_compositional` and `plot_prevalence` from [Soothsayer](github.com/jolespin/soothsayer)
+* Simplex plots (Optional: matplotlib)
+* Weight components (e.g. gene size)
diff --git a/LICENSE.txt b/LICENSE
similarity index 77%
rename from LICENSE.txt
rename to LICENSE
index 1597392..4a9f45d 100644
--- a/LICENSE.txt
+++ b/LICENSE
@@ -1,17 +1,3 @@
-# ==============
-# compositional
-# ==============
-# Compositional data analysis in Python
-# ------------------------------------
-# GitHub: https://github.com/jolespin/compositional
-# PyPI: https://pypi.org/project/compositional
-# ------------------------------------
-# =======
-# Contact
-# =======
-# Producer: Josh L. Espinoza
-# Contact: jespinoz@jcvi.org, jol.espinoz@gmail.com
-# Google Scholar: https://scholar.google.com/citations?user=r9y1tTQAAAAJ&hl
 # =======
 # License BSD-3
 # =======
diff --git a/README.md b/README.md
index e182d28..36f1846 100644
--- a/README.md
+++ b/README.md
@@ -22,9 +22,10 @@ ete[2/3]
 
 #### Install:
 ```
-# "Stable" release (still developmental)
+# Stable release (Preferred)
 pip install compositional
-# Current release
+
+# Developmental release
 pip install git+https://github.com/jolespin/compositional
 ```
 
@@ -89,7 +90,7 @@ import compositional as coda
 import pandas as pd
 
 # Load abundances (Gomez and Espinoza et al. 2017)
-X = pd.read_csv("https://github.com/jolespin/supragingival_plaque_microbiome/blob/master/16S_amplicons/Data/X.tsv.gz?raw=true", 
+X = pd.read_csv("https://github.com/jolespin/projects/raw/main/supragingival_plaque_microbiome/16S_amplicons/Data/X.tsv.gz", 
                 sep="\t",
                 index_col=0,
                 compression="gzip",
@@ -101,6 +102,21 @@ X = X + delta
 # X.shape: (n=473 samples, m=481 OTUs) | delta=4.322249644494967e-06
 ```
 
+#### (Highpass) Filtering of compositional data
+Here we are going to first remove all samples with less than 10,000 total counts, then all features that aren't in at least 50% of the samples, and then samples that don't have at least 50 detected components.
+
+```
+X_filtered = coda.filter_data_highpass(
+    X=X, 
+    minimum_total_counts=10000,
+    minimum_prevalence=0.5,
+    minimum_components=50,
+)
+
+X.shape, X_filtered.shape
+# ((473, 481), (401, 93))
+```
+
 #### Pairwise operations
 ```
 # Pairwise variance log-ratio
diff --git a/build/lib/compositional/__init__.py b/build/lib/compositional/__init__.py
index e52fbed..3107ca8 100644
--- a/build/lib/compositional/__init__.py
+++ b/build/lib/compositional/__init__.py
@@ -1,39 +1,6 @@
-# ==============
-# Compositional
-# ==============
-# Compositional data analysis in Python
-# ------------------------------------
-# GitHub: https://github.com/jolespin/compositional
-# PyPI: https://pypi.org/project/compositional/
-# ------------------------------------
-# =======
-# Contact
-# =======
-# Producer: Josh L. Espinoza
-# Contact: jespinoz@jcvi.org, jol.espinoz@gmail.com
-# Google Scholar: https://scholar.google.com/citations?user=r9y1tTQAAAAJ&hl
-# =======
-# License BSD-3
-# =======
-# https://opensource.org/licenses/BSD-3-Clause
-#
-# Copyright 2020 Josh L. Espinoza
-#
-# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# -*- coding: utf-8 -*-
 
-#
-# =======
-# Version
-# =======
-__version__= "2020.05.19"
+__version__= "2023.7.20"
 __author__ = "Josh L. Espinoza"
 __email__ = "jespinoz@jcvi.org, jol.espinoz@gmail.com"
 __url__ = "https://github.com/jolespin/compositional"
@@ -45,11 +12,15 @@
 # =======
 __functions__ = [
     # Transforms
-    "transform_xlr", "transform_clr", "transform_iqlr", "transform_ilr",
+    "transform_xlr", "transform_clr", "transform_iqlr", "transform_ilr","transform_closure",
     # Pairwise
     "pairwise_vlr", "pairwise_rho","pairwise_phi",
     # Utilities
-    "check_packages",
+    "check_packages","assert_acceptable_arguments","check_compositional",
+    # Filtering
+    "filter_data_highpass",
+    # Metrics
+    "sparsity","number_of_components","prevalence_of_components",
 ]
 __classes__ = []
 
diff --git a/build/lib/compositional/compositional.py b/build/lib/compositional/compositional.py
index 1add0cb..e31aa4d 100644
--- a/build/lib/compositional/compositional.py
+++ b/build/lib/compositional/compositional.py
@@ -2,8 +2,8 @@
 from __future__ import print_function, division
 
 # Built-ins
-import sys,warnings,functools
-from collections import Mapping
+import sys,warnings,functools, operator
+from collections.abc import Mapping 
 from importlib import import_module
 
 # Version specific
@@ -20,6 +20,28 @@
 # =========
 # Utilities
 # =========
+def assert_acceptable_arguments(query, target, operation="le", message="Invalid option provided.  Please refer to the following for acceptable arguments:"):
+    """
+    le: operator.le(a, b) : <=
+    eq: operator.eq(a, b) : ==
+    ge: operator.ge(a, b) : >=
+    """
+    def is_nonstring_iterable(obj):
+        condition_1 = hasattr(obj, "__iter__")
+        condition_2 =  not type(obj) == str
+        return all([condition_1,condition_2])
+    
+    # If query is not a nonstring iterable or a tuple
+    if any([
+            not is_nonstring_iterable(query),
+            isinstance(query,tuple),
+            ]):
+        query = [query]
+    query = set(query)
+    target = set(target)
+    func_operation = getattr(operator, operation)
+    assert func_operation(query,target), "{}\n{}".format(message, target)
+
 # Check packages
 def check_packages(packages, namespace=None, import_into_backend=True, verbose=False):
     """
@@ -73,9 +95,158 @@ def wrapper(*args, **kwargs):
         return wrapper
     return decorator
 
+def check_compositional(X, n_dimensions:int=None, acceptable_dimensions:set={1,2}):
+    """
+    # Description
+    Check that 1D and 2D NumPy/Pandas objects are the correct shape and >= 0
+
+    # Parameters
+        * X:
+            - Compositional data
+            (1D): pd.Series or 1D np.array
+            (2D): pd.DataFrame or 2D np.array
+        * n_dimensions: int   
+    """
+    if n_dimensions is None:
+        n_dimensions = len(X.shape)
+    if not hasattr(acceptable_dimensions, "__iter__"):
+        acceptable_dimensions = {acceptable_dimensions}
+    assert n_dimensions in acceptable_dimensions, "`X` must be {}".format(" or ".join(map(lambda d: f"{d}D", acceptable_dimensions)))
+    assert np.all(X >= 0), "`X` cannot contain negative values."
+
+# ===========================
+# Summary metrics
+# ===========================
+def sparsity(X, checks=True):
+    """
+    # Description
+    Calculates the sparsity (i.e., ratio of zeros) in a NumPy or Pandas object
+
+    # Parameters
+        * X:
+            - Compositional data
+            (1D): pd.Series or 1D np.array
+            (2D): pd.DataFrame or 2D np.array
+        * checks:
+            Check whether or not dimmensions are correct and data is >= 0
+    * Output
+        Ratio zeros
+    """
+    n_dimensions = len(X.shape)
+    if checks:
+        check_compositional(X, n_dimensions)
+
+    if n_dimensions == 2:
+        if isinstance(X, pd.DataFrame):
+            X = X.values
+        X = X.ravel()
+    number_of_zeros = np.sum(X == 0)
+    number_of_values = X.size
+    return number_of_zeros/number_of_values
+
+def number_of_components(X, checks=True):
+    """
+    # Description
+    Calculates the number of detected components (i.e., richness) in a NumPy or Pandas object
+
+    # Parameters
+        * X:
+            - Compositional data
+            (1D): pd.Series or 1D np.array of a composition (i.e., sample)
+            (2D): pd.DataFrame or 2D np.array (rows=samples/compositions, columns=features/components)
+        * checks:
+            Check whether or not dimmensions are correct and data is >= 0
+    * Output
+        Number of components per composition (i.e., sample)
+    """
+    n_dimensions = len(X.shape)
+    
+    if checks:
+        check_compositional(X, n_dimensions)
+    
+    if n_dimensions == 2:
+        return (X > 0).sum(axis=1)
+        
+    else:
+        return (X > 0).sum()
+
+def prevalence_of_components(X, checks=True):
+    """
+    # Description
+    Calculates the prevalence of detected components in a NumPy or Pandas object
+
+    # Parameters
+        * X:
+            - Compositional data
+            (1D): pd.Series or 1D np.array of a component vector
+            (2D): pd.DataFrame or 2D np.array (rows=samples/compositions, columns=features/components)
+        * checks:
+            Check whether or not dimmensions are correct and data is >= 0
+    * Output
+        Number of compositions where a component was detected
+    """
+    
+    n_dimensions = len(X.shape)
+    
+    if checks:
+        check_compositional(X, n_dimensions)
+    
+    if n_dimensions == 2:
+        return (X > 0).sum(axis=0)
+        
+    else:
+        return (X > 0).sum()
 # ===========================
 # Compositional data analysis
 # ===========================
+def transform_closure(X, checks=True):
+    """
+    # Description
+    Closure (e.g., total sum scaling, relative abundance) that can handle 1D and 2D NumPy  and Pandas objects
+
+    # Parameters
+        * X:
+            - Compositional data
+            (1D): pd.Series or 1D np.array
+            (2D): pd.DataFrame or 2D np.array
+        * checks:
+            Check whether or not dimmensions are correct and data is >= 0
+    * Output
+        Closure transformed matching input object class
+    """
+    
+    n_dimensions = len(X.shape)
+
+    if checks:
+        check_compositional(X, n_dimensions)
+    
+    if n_dimensions == 2:
+        
+        index = None
+        components = None
+        
+        if isinstance(X, pd.DataFrame):
+            index = X.index
+            components = X.columns
+            X = X.values
+            
+        X_closure = X/X.sum(axis=1).reshape(-1,1)
+        
+        if index is not None:
+            X_closure = pd.DataFrame(X_closure, index=index, columns=components)
+        
+    else:
+        components = None
+        if isinstance(X, pd.Series):
+            components = X.index
+            X = X.values
+            
+        X_closure = X/X.sum()
+        if components is not None:
+            X_closure = pd.Series(X_closure, index=components)
+
+    return X_closure
+
 # Extension of CLR to use custom centroids, references, and zeros without pseudocounts
 def transform_xlr(X, reference_components=None, centroid="mean", return_zeros_as_neginfinity=False, zeros_ok=True):
     """
@@ -106,8 +277,8 @@ def transform_xlr(X, reference_components=None, centroid="mean", return_zeros_as
             False: Error
     """
     n_dimensions = len(X.shape)
-    assert n_dimensions in {1,2}, "`X` must be 1D or 2D"
-    assert np.all(X >= 0), "`X` cannot contain negative values because of log-transformation step."
+    check_compositional(X, n_dimensions)
+
     assert not isinstance(reference_components, tuple), "`reference_components` cannot be type tuple"
     # 1-Dimensional
     if n_dimensions == 1:
@@ -277,7 +448,7 @@ def transform_clr(X, return_zeros_as_neginfinity=False, zeros_ok=True):
     return transform_xlr(X, reference_components=None, centroid="mean", return_zeros_as_neginfinity=return_zeros_as_neginfinity, zeros_ok=zeros_ok)
 
 # Interquartile range log-ratio transform
-def transform_iqlr(X, percentile_range=(25,75), centroid="mean", interval_type="open", return_zeros_as_neginfinity=False, zeros_ok=True, ddof=0):
+def transform_iqlr(X, percentile_range=(25,75), centroid="mean", interval_type="open", return_zeros_as_neginfinity=False, zeros_ok=True, ddof=1):
     """
     Wrapper around `transform_xlr`
 
@@ -295,8 +466,7 @@ def transform_iqlr(X, percentile_range=(25,75), centroid="mean", interval_type="
     """
     # Checks
     n_dimensions = len(X.shape)
-    assert n_dimensions in {2}, "`X` must be 2D"
-    assert np.all(X >= 0), "`X` cannot contain negative values because of log-transformation step."
+    check_compositional(X, n_dimensions)
     assert interval_type in {"closed", "open"}, "`interval_type` must be in the following: {closed, open}"
     percentile_range = tuple(sorted(percentile_range))
     assert len(percentile_range) == 2, "percentile_range must have 2 elements"
@@ -342,8 +512,7 @@ def pairwise_vlr(X):
     """
     # Checks
     n_dimensions = len(X.shape)
-    assert n_dimensions in {2}, "`X` must be 2D"
-    assert np.all(X >= 0), "`X` cannot contain negative values because of log-transformation step."
+    check_compositional(X, n_dimensions, acceptable_dimensions={2})
 
     components = None
     if isinstance(X, pd.DataFrame):
@@ -358,7 +527,7 @@ def pairwise_vlr(X):
         raise Exception("N={} zeros detected in `X`.  Either preprocess or add pseudocounts.".format(n_zeros))    
 
     X_log = np.log(X)
-    covariance = nancorr(X_log, cov=True) # covariance = np.cov(X_log.T, ddof=ddof)
+    covariance = nancorr(X_log, cov=True) # covariance = np.cov(X_log.T, ddof=1)
     diagonal = np.diagonal(covariance)
     vlr = -2*covariance + diagonal[:,np.newaxis] + diagonal
     if components is not None:
@@ -417,7 +586,7 @@ def pairwise_rho(X=None, reference_components=None, centroid="mean", interval_ty
             
     # rho (Erb et al. 2016)
     n, m = xlr.shape
-    variances = np.var(xlr, axis=0) # variances = np.var(X_xlr, axis=0, ddof=ddof)
+    variances = np.var(xlr, axis=0, ddof=1) # variances = np.var(X_xlr, axis=0, ddof=ddof)
     rhos = 1 - (vlr/np.add.outer(variances,variances))    
     if components is not None:
         rhos = pd.DataFrame(rhos, index=components, columns=components)
@@ -475,7 +644,7 @@ def pairwise_phi(X=None, symmetrize=True, triangle="lower", reference_components
             
     # phi (Lovell et al. 2015)
     n, m = xlr.shape
-    variances = np.var(xlr, axis=0)#[:,np.newaxis]
+    variances = np.var(xlr, axis=0, ddof=1)#[:,np.newaxis]
     phis = vlr/variances   
     if symmetrize:
         assert triangle in {"lower","upper"}, "`triangle` must be one of the following: {'lower','upper'}"
@@ -598,4 +767,140 @@ def _ilr_without_tree(X):
     # With tree
     else:
         return _ilr_with_tree(X=X, tree=tree)
+    
+# =========    
+# Filtering
+# =========
+def _filter_data(
+    X:pd.DataFrame,
+    total_counts,
+    prevalence,
+    components,
+    mode,
+    order_of_operations:list=["total_counts", "prevalence", "components"],
+    interval_type="closed",
+    ):
+
+    check_compositional(X, acceptable_dimensions=2)
+    assert_acceptable_arguments(query=order_of_operations,target=["total_counts", "prevalence", "components"], operation="le")
+    assert_acceptable_arguments(query=[mode],target=["highpass", "lowpass"], operation="le")
+    assert_acceptable_arguments(query=[interval_type],target=["closed", "open"], operation="le")
+
+
+    def _get_elements(data,tol,operation):
+        return data[lambda x: operation(x,tol)].index
+
+    def _filter_total_counts(X, tol, operation):
+        data = X.sum(axis=1)
+        return X.loc[_get_elements(data, tol, operation),:]
+
+    def _filter_prevalence(X, tol, operation):
+        conditions = [
+            isinstance(tol, float),
+            0.0 < tol <= 1.0,
+        ]
+
+        if all(conditions):
+            tol = round(X.shape[0]*tol)
+        data = (X > 0).sum(axis=0)
+        assert tol <= X.shape[0], "If prevalence is an integer ({}), it cannot be larger than the number of samples ({}) in the index".format(tol, X.shape[0])
+        return X.loc[:,_get_elements(data, tol, operation)]
+
+    def _filter_components(X, tol, operation):
+        data = (X > 0).sum(axis=1)
+        return X.loc[_get_elements(data, tol, operation),:]
+
+    if interval_type == "closed":
+        operations = {"highpass":operator.ge, "lowpass":operator.le}
+    if interval_type == "open":
+        operations = {"highpass":operator.gt, "lowpass":operator.lt}
+
+    # Defaults
+    if mode == "highpass":
+        if components is None:
+            components = 0
+        if total_counts is None:
+            total_counts = 0
+        if prevalence is None:
+            prevalence = 0
+    if mode == "lowpass":
+        if components in {None, np.inf}:
+            components = X.shape[1]
+        if total_counts in {None, np.inf}:
+            total_counts = np.inf
+        if prevalence in {None, np.inf}:
+            prevalence = X.shape[0]
+            
+    functions = dict(zip(["total_counts", "prevalence", "components"], [_filter_total_counts, _filter_prevalence, _filter_components]))
+    thresholds = dict(zip(["total_counts", "prevalence", "components"], [total_counts, prevalence, components]))
+
+    for strategy in order_of_operations:
+        tol = thresholds[strategy]
+        if tol is not None:
+            X = functions[strategy](X=X,tol=tol, operation=operations[mode])
+
+    return X
+        
+def filter_data_highpass(
+    X:pd.DataFrame,
+    minimum_total_counts=1,
+    minimum_prevalence=1,
+    minimum_components=1,
+    order_of_operations:list=["minimum_total_counts", "minimum_prevalence", "minimum_components"],
+    interval_type="closed",
+    ):
+
+    """
+    # Description
+    Highpass filter compositional table to include data higher than a minimum
+    
+    # Parameters
+        * X: pd.DataFrame or 2D np.array of compositional data (rows=compositions/samples, columns=components/features)
+        * minimum_total_counts:  The minimum total counts in a composition (sum per row) (axis=0)
+        * minimum_prevalence: The minimum number of compositions that must contain the components (axis=1)
+        * minimum_components: The minimum number of detected components (axis=0)
+        * order_of_operations: Order of filtering scheme.  Choose between: ["minimum_total_counts", "minimum_prevalence", "minimum_components"]
+
+    Adapted from the following source:
+    * https://github.com/jolespin/soothsayer
+ 
+    """
+    assert_acceptable_arguments(query=order_of_operations,target=["minimum_total_counts", "minimum_prevalence", "minimum_components"], operation="le")
+        
+    order_of_operations = list(map(lambda x: "_".join(x.split("_")[1:]), order_of_operations))
+
+    return _filter_data(
+        X=X,
+        total_counts=minimum_total_counts,
+        prevalence=minimum_prevalence,
+        components=minimum_components,
+        mode="highpass",
+        order_of_operations=order_of_operations,
+        interval_type=interval_type,
+        )
+
+# def filter_data_lowpass(
+#     X:pd.DataFrame,
+#     maximum_total_counts=np.inf,
+#     maximum_prevalence=np.inf,
+#     maximum_components=np.inf,
+#     order_of_operations:list=["maximum_total_counts", "maximum_prevalence", "maximum_components"],
+#     interval_type="closed",
+#     ):
+
+#     """
+#     # Description
+#     Lowpass filter compositional table to include data lower than a maximum
+    
+#     # Parameters
+#         * X: pd.DataFrame or 2D np.array of compositional data (rows=compositions/samples, columns=components/features)
+#         * maximum_total_counts:  The maximum total counts in a composition (sum per row) (axis=0)
+#         * maximum_prevalence: The maximum number of compositions that must contain the components (axis=1)
+#         * maximum_components: The maximum number of detected components (axis=0)
+#         * order_of_operations: Order of filtering scheme.  Choose between: ["maximum_total_counts", "maximum_prevalence", "maximum_components"]
+
+#     Adapted from the following source:
+#     * https://github.com/jolespin/soothsayer
+ 
+#     """
 
diff --git a/compositional.egg-info/PKG-INFO b/compositional.egg-info/PKG-INFO
index f0e4592..d4c4fad 100644
--- a/compositional.egg-info/PKG-INFO
+++ b/compositional.egg-info/PKG-INFO
@@ -1,10 +1,13 @@
-Metadata-Version: 1.0
+Metadata-Version: 2.1
 Name: compositional
-Version: 2020.12.16
+Version: 2023.7.20
 Summary: Compositional data analysis in Python
 Home-page: https://github.com/jolespin/compositional
 Author: Josh L. Espinoza
 Author-email: jespinoz@jcvi.org
 License: BSD-3
-Description: UNKNOWN
 Platform: UNKNOWN
+License-File: LICENSE
+
+UNKNOWN
+
diff --git a/compositional.egg-info/SOURCES.txt b/compositional.egg-info/SOURCES.txt
index 3c9577a..9485a9d 100644
--- a/compositional.egg-info/SOURCES.txt
+++ b/compositional.egg-info/SOURCES.txt
@@ -1,9 +1,8 @@
-MANIFEST.in
+LICENSE
 README.md
 setup.py
 compositional/__init__.py
 compositional/compositional.py
-compositional.egg-info/Icon
 compositional.egg-info/PKG-INFO
 compositional.egg-info/SOURCES.txt
 compositional.egg-info/dependency_links.txt
diff --git a/compositional/__init__.py b/compositional/__init__.py
index fa59e9d..3107ca8 100644
--- a/compositional/__init__.py
+++ b/compositional/__init__.py
@@ -1,39 +1,6 @@
-# ==============
-# Compositional
-# ==============
-# Compositional data analysis in Python
-# ------------------------------------
-# GitHub: https://github.com/jolespin/compositional
-# PyPI: https://pypi.org/project/compositional/
-# ------------------------------------
-# =======
-# Contact
-# =======
-# Producer: Josh L. Espinoza
-# Contact: jespinoz@jcvi.org, jol.espinoz@gmail.com
-# Google Scholar: https://scholar.google.com/citations?user=r9y1tTQAAAAJ&hl
-# =======
-# License BSD-3
-# =======
-# https://opensource.org/licenses/BSD-3-Clause
-#
-# Copyright 2020 Josh L. Espinoza
-#
-# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# -*- coding: utf-8 -*-
 
-#
-# =======
-# Version
-# =======
-__version__= "2022.8.31"
+__version__= "2023.7.20"
 __author__ = "Josh L. Espinoza"
 __email__ = "jespinoz@jcvi.org, jol.espinoz@gmail.com"
 __url__ = "https://github.com/jolespin/compositional"
@@ -45,11 +12,15 @@
 # =======
 __functions__ = [
     # Transforms
-    "transform_xlr", "transform_clr", "transform_iqlr", "transform_ilr",
+    "transform_xlr", "transform_clr", "transform_iqlr", "transform_ilr","transform_closure",
     # Pairwise
     "pairwise_vlr", "pairwise_rho","pairwise_phi",
     # Utilities
-    "check_packages",
+    "check_packages","assert_acceptable_arguments","check_compositional",
+    # Filtering
+    "filter_data_highpass",
+    # Metrics
+    "sparsity","number_of_components","prevalence_of_components",
 ]
 __classes__ = []
 
diff --git a/devel.txt b/devel.txt
deleted file mode 100644
index a95b209..0000000
--- a/devel.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-[2022.08.31] - Added support for Python v3.10
-* Simplex plots (Optional: matplotlib)
-* Show influence of component length (e.g. gene size)?
-* Instead of xlr and vlr arguments.  Provide a X_log argument.  
-* Make functions robust to nans