Merge pull request #1 from YosefLab/tree-plot

tree annotation
YosefLab · May 15, 2024 · 138835a · 138835a
2 parents 574d096 + 68f3cee
commit 138835a
Show file tree

Hide file tree

Showing 19 changed files with 851 additions and 371 deletions.
diff --git a/docs/api.md b/docs/api.md
@@ -2,28 +2,8 @@
 
 ## Preprocessing
 
-```{eval-rst}
-.. module:: pycea.pp
-.. currentmodule:: pycea
-
-.. autosummary::
-    :toctree: generated
-
-    pp.basic_preproc
-```
-
 ## Tools
 
-```{eval-rst}
-.. module:: pycea.tl
-.. currentmodule:: pycea
-
-.. autosummary::
-    :toctree: generated
-
-    tl.basic_tool
-```
-
 ## Plotting
 
 ```{eval-rst}
@@ -33,5 +13,8 @@
 .. autosummary::
     :toctree: generated
 
+    pl.tree
     pl.branches
+    pl.nodes
+    pl.annotation
 ```
diff --git a/docs/conf.py b/docs/conf.py
@@ -91,10 +91,16 @@
 }
 
 intersphinx_mapping = {
-    "python": ("https://docs.python.org/3", None),
     "anndata": ("https://anndata.readthedocs.io/en/stable/", None),
+    "cycler": ("https://matplotlib.org/cycler/", None),
+    "matplotlib": ("https://matplotlib.org/stable/", None),
     "numpy": ("https://numpy.org/doc/stable/", None),
     "networkx": ("https://networkx.org/documentation/stable/", None),
+    "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
+    "python": ("https://docs.python.org/3", None),
+    "scanpy": ("https://scanpy.readthedocs.io/en/stable/", None),
+    "scipy": ("https://docs.scipy.org/doc/scipy/reference/", None),
+    "squidpy": ("https://squidpy.readthedocs.io/en/stable/", None),
     "treedata": ("https://treedata.readthedocs.io/en/stable/", None),
 }
 

diff --git a/docs/notebooks/example.ipynb b/docs/notebooks/example.ipynb
@@ -12,134 +12,7 @@
    "execution_count": 1,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "from anndata import AnnData\n",
-    "import pandas as pd\n",
-    "import pycea"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "adata = AnnData(np.random.normal(size=(20, 10)))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "With myst it is possible to link in the text cell of a notebook such as this one the documentation of a function or a class.\n",
-    "\n",
-    "Let's take as an example the function {func}`pycea.pp.basic_preproc`. \n",
-    "You can see that by clicking on the text, the link redirects to the API documentation of the function. \n",
-    "Check the raw markdown of this cell to understand how this is specified.\n",
-    "\n",
-    "This works also for any package listed by `intersphinx`. Go to `docs/conf.py` and look for the `intersphinx_mapping` variable. \n",
-    "There, you will see a list of packages (that this package is dependent on) for which this functionality is supported. \n",
-    "\n",
-    "For instance, we can link to the class {class}`anndata.AnnData`, to the attribute {attr}`anndata.AnnData.obs` or the method {meth}`anndata.AnnData.write`.\n",
-    "\n",
-    "Again, check the raw markdown of this cell to see how each of these links are specified.\n",
-    "\n",
-    "You can read more about this in the [intersphinx page](https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html) and the [myst page](https://myst-parser.readthedocs.io/en/v0.15.1/syntax/syntax.html#roles-an-in-line-extension-point)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Implement a preprocessing function here."
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "0"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pycea.pp.basic_preproc(adata)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>A</th>\n",
-       "      <th>B</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>a</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>b</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>c</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   A  B\n",
-       "0  a  1\n",
-       "1  b  2\n",
-       "2  c  3"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pd.DataFrame().assign(A=[\"a\", \"b\", \"c\"], B=[1, 2, 3])"
-   ]
+   "source": []
   }
  ],
  "metadata": {

diff --git a/src/pycea/_utils.py b/src/pycea/_utils.py
diff --git a/src/pycea/pl/__init__.py b/src/pycea/pl/__init__.py
@@ -1 +1 @@
-from .tree import branches
+from .plot_tree import annotation, branches, nodes, tree
diff --git a/src/pycea/pl/_docs.py b/src/pycea/pl/_docs.py
@@ -2,13 +2,27 @@
 
 from __future__ import annotations
 
+from textwrap import dedent
+
+
+def _doc_params(**kwds):
+    r"""Docstrings should start with ``\\`` in the first line for proper formatting"""
+
+    def dec(obj):
+        obj.__orig_doc__ = obj.__doc__
+        obj.__doc__ = dedent(obj.__doc__).format_map(kwds)
+        return obj
+
+    return dec
+
+
 doc_common_plot_args = """\
-color_map
+cmap
     Color map to use for continous variables. Can be a name or a
     :class:`~matplotlib.colors.Colormap` instance (e.g. `"magma`", `"viridis"`
     or `mpl.cm.cividis`), see :func:`~matplotlib.cm.get_cmap`.
     If `None`, the value of `mpl.rcParams["image.cmap"]` is used.
-    The default `color_map` can be set using :func:`~scanpy.set_figure_params`.
+    The default `cmap` can be set using :func:`~scanpy.set_figure_params`.
 palette
     Colors to use for plotting categorical annotation groups.
     The palette can be a valid :class:`~matplotlib.colors.ListedColormap` name
@@ -18,6 +32,10 @@
     If `None`, `mpl.rcParams["axes.prop_cycle"]` is used unless the categorical
     variable already has colors stored in `tdata.uns["{var}_colors"]`.
     If provided, values of `tdata.uns["{var}_colors"]` will be set.
+vmax
+    The maximum value for the colormap.
+vmin
+    The minimum value for the colormap.
 ax
     A matplotlib axes object. If `None`, a new figure and axes will be created.
 """
diff --git a/src/pycea/pl/_utils.py b/src/pycea/pl/_utils.py
@@ -11,7 +11,7 @@
 import numpy as np
 from scanpy.plotting import palettes
 
-from pycea._utils import get_root
+from pycea.utils import get_root
 
 
 def layout_tree(
@@ -62,7 +62,7 @@ def layout_tree(
     node_coords = {}
     for node in nx.dfs_postorder_nodes(tree, root):
         if tree.out_degree(node) == 0:
-            lon = (i / n_leaves) * 2 * np.pi
+            lon = (i / (n_leaves)) * 2 * np.pi  # + 2 * np.pi / n_leaves
             if extend_branches:
                 node_coords[node] = (max_depth, lon)
             else:
@@ -175,3 +175,59 @@ def _get_categorical_colors(tdata, key, data, palette=None):
     # store colors in tdata
     tdata.uns[key + "_colors"] = colors_list
     return dict(zip(categories, colors_list))
+
+
+def _get_categorical_markers(tdata, key, data, markers=None):
+    """Get categorical markers for plotting."""
+    default_markers = ["o", "s", "D", "^", "v", "<", ">", "p", "P", "*", "h", "H", "X"]
+    # Ensure data is a category
+    if not data.dtype.name == "category":
+        data = data.astype("category")
+    categories = data.cat.categories
+    # Use default markers if no markers are provided
+    if markers is None:
+        markers_list = tdata.uns.get(key + "_markers", None)
+        if markers_list is None or len(markers_list) > len(categories):
+            markers_list = default_markers[: len(categories)]
+    # Use provided markers
+    else:
+        if isinstance(markers, cabc.Mapping):
+            markers_list = [markers[k] for k in categories]
+        else:
+            if not isinstance(markers, cabc.Sequence):
+                raise ValueError("Please check that the value of 'markers' is a valid " "list of marker names.")
+            if len(markers) < len(categories):
+                warnings.warn(
+                    "Length of markers is smaller than the number of "
+                    f"categories (markers length: {len(markers)}, "
+                    f"categories length: {len(categories)}. "
+                    "Some categories will have the same marker.",
+                    stacklevel=2,
+                )
+                markers_list = markers * (len(categories) // len(markers) + 1)
+            else:
+                markers_list = markers[: len(categories)]
+    # store markers in tdata
+    tdata.uns[key + "_markers"] = markers_list
+    return dict(zip(categories, markers_list))
+
+
+def _series_to_rgb_array(series, colors, vmin=None, vmax=None, na_color="#808080"):
+    """Converts a pandas Series to an N x 3 numpy array based using a color map."""
+    if isinstance(colors, dict):
+        # Map using the dictionary
+        color_series = series.map(colors)
+        color_series[series.isna()] = na_color
+        rgb_array = np.array([mcolors.to_rgb(color) for color in color_series])
+    elif isinstance(colors, mcolors.ListedColormap):
+        # Normalize and map values if cmap is a ListedColormap
+        if vmin is not None and vmax is not None:
+            norm = mcolors.Normalize(vmin, vmax)
+            colors.set_bad(na_color)
+            color_series = colors(norm(series))
+            rgb_array = np.vstack(color_series[:, :3])
+        else:
+            raise ValueError("vmin and vmax must be specified when using a ListedColormap.")
+    else:
+        raise ValueError("cmap must be either a dictionary or a ListedColormap.")
+    return rgb_array