Skip to content

Commit

Permalink
Logoplots (#534)
Browse files Browse the repository at this point in the history
* added file with logoplot calculation function and import function name

* added palmotif and IPython asdependencies

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Restructured logoplot function to use logomaker instead of palmotif; Removed AnnData filter functionality

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update of logoplot_cdr3_motif docs

* Add tutorial section about logoplots

* Add ax/fig_kws default arguments

* changed font to logomaker default

* Update src/scirpy/pl/_logoplots.py

Co-authored-by: Gregor Sturm <[email protected]>

* Update src/scirpy/pl/_logoplots.py

Co-authored-by: Gregor Sturm <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update keyword arguments

* Add intersphinx

* nitpick ignore

* Update changelog

* Update TCR tutorial

* reorganize tutorial

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Gregor Sturm <[email protected]>
  • Loading branch information
3 people authored Nov 24, 2024
1 parent 088d8c7 commit 0d0201a
Show file tree
Hide file tree
Showing 10 changed files with 454 additions and 191 deletions.
1 change: 1 addition & 0 deletions .conda/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ requirements:
- numba >=0.41.0
- pooch >=1.7.0
- joblib >=1.3.1
- logomaker

test:
source_files:
Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ and this project adheres to [Semantic Versioning][].
created with Scirpy v0.20 or later. This change was necessary to speed up writing results to `h5ad` when working
with large datasets ([#556](https://github.com/scverse/scirpy/pull/556)).

### Additions

- The function `pl.logoplot_cdr3_motif` has been added that allows to plot sequence logos of
CDR3 sequences using [logomaker](https://logomaker.readthedocs.io/en/latest/) ([#534](https://github.com/scverse/scirpy/pull/534))

### Documentation

- Add a tutorial for BCR analysis with Scirpy ([#542](https://github.com/scverse/scirpy/pull/542)).
Expand Down
2 changes: 1 addition & 1 deletion docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ when calling the plotting function or need to be precomputed and stored in
pl.clonotype_modularity
pl.clonotype_network
pl.clonotype_imbalance

pl.logoplot_cdr3_motif


Base plotting functions: `pl.base`
Expand Down
2 changes: 2 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@
"awkward": ("https://awkward-array.org/doc/main", None),
"pooch": ("https://www.fatiando.org/pooch/latest", None),
"joblib": ("https://joblib.readthedocs.io/en/latest", None),
"logomaker": ("https://logomaker.readthedocs.io/en/latest/", None),
}

# List of patterns, relative to source directory, that match files and
Expand Down Expand Up @@ -186,4 +187,5 @@
("py:class", "seaborn.matrix.ClusterGrid"),
("py:meth", "mudata.MuData.update"),
("py:class", "awkward.highlevel.Array"),
("py:class", "logomaker.src.Logo.Logo"),
]
391 changes: 219 additions & 172 deletions docs/tutorials/tutorial_3k_tcr.ipynb

Large diffs are not rendered by default.

120 changes: 102 additions & 18 deletions docs/tutorials/tutorial_5k_bcr.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ dependencies = [
'pooch>=1.7.0',
'pycairo>=1.20; sys_platform == "win32"',
'joblib>=1.3.1',
'logomaker'
]

[project.optional-dependencies]
Expand Down
1 change: 1 addition & 0 deletions src/scirpy/pl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from ._clonotypes import COLORMAP_EDGES, clonotype_network
from ._diversity import alpha_diversity
from ._group_abundance import group_abundance
from ._logoplots import logoplot_cdr3_motif
from ._repertoire_overlap import repertoire_overlap
from ._spectratype import spectratype
from ._vdj_usage import vdj_usage
Expand Down
115 changes: 115 additions & 0 deletions src/scirpy/pl/_logoplots.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
from collections.abc import Sequence
from typing import Literal

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from logomaker import Logo, alignment_to_matrix

from scirpy.get import airr as get_airr
from scirpy.util import DataHandler

from .styling import _init_ax


@DataHandler.inject_param_docs()
def logoplot_cdr3_motif(
adata: DataHandler.TYPE,
*,
chains: Literal["VJ_1", "VDJ_1", "VJ_2", "VDJ_2"] | Sequence[Literal["VJ_1", "VDJ_1", "VJ_2", "VDJ_2"]] = "VDJ_1",
airr_mod: str = "airr",
airr_key: str = "airr",
chain_idx_key: str = "chain_indices",
cdr3_col: str = "junction_aa",
to_type: Literal["information", "counts", "probability", "weight"] = "information",
pseudocount: float = 0,
background: np.ndarray | pd.DataFrame = None,
center_weights: bool = False,
font_name: str = "sans",
color_scheme: str = "chemistry",
vpad: float = 0.05,
width: float = 0.9,
ax: plt.Axes | None = None,
fig_kws: dict | None = None,
**kwargs,
) -> Logo:
"""
Generates logoplots of CDR3 sequences
This is a user friendly wrapper function around the logomaker python package.
Enables the analysis of potential amino acid motifs by displaying logo plots.
Subsetting of AnnData/MuData has to be performed manually beforehand (or while calling) and only cells with equal cdr3 sequence lengths are permitted.
Parameters
----------
{adata}
chains
One or up to two chains from which to use CDR3 sequences i.e. primary and/or secondary VJ/VDJ chains. Mixing VJ and VDJ chains will likely not lead to a meaningful result.
{airr_mod}
{airr_key}
{chain_idx_key}
cdr3_col
Key inside awkward array to retrieve junction information (should be in aa)
to_type
Choose one of matrix types as defined by logomaker:
* `"information"`
* `"counts"`
* `"probability"`
* `"weight"`
pseudocount
Pseudocount to use when converting from counts to probabilities
background
Background probabilities. Both arrays with the same length as ouput or df with same shape as ouput are permitted.
center_weights
Whether to subtract the mean of each row, but only if to_type == `weight`
font_name
customize the font face. You can list all available fonts with `logomaker.list_font_names()`.
color_scheme
customize the color scheme. You can list all available color schemes with `logomaker.list_color_schemes()`.
vpad
The whitespace to leave above and below each character within that character's bounding box.
width
x coordinate span of each character
ax
Add the plot to a predefined Axes object.
fig_kws
Parameters passed to the :func:`matplotlib.pyplot.figure` call
if no `ax` is specified.
**kwargs
Additional arguments passed to `logomaker.Logo()` for comprehensive customization.
For a full list of parameters please refer to `logomaker documentation <https://logomaker.readthedocs.io/en/latest/implementation.html#logo-class>`_
Returns
-------
Returns a object of class logomaker.Logo (see here for more information https://logomaker.readthedocs.io/en/latest/implementation.html#matrix-functions)
"""
params = DataHandler(adata, airr_mod, airr_key, chain_idx_key)

if isinstance(chains, str):
chains = [chains]

if ax is None:
fig_kws = {} if fig_kws is None else fig_kws
if "figsize" not in fig_kws:
fig_kws["figsize"] = (6, 2)
ax = _init_ax(fig_kws)

# make sure that sequences are prealigned i.e. they need to have the the same length
airr_df = get_airr(params, [cdr3_col], chains)
sequence_list = []
for chain in chains:
for sequence in airr_df[chain + "_" + cdr3_col]:
if sequence is not None:
sequence_list.append(sequence)

motif = alignment_to_matrix(
sequence_list, to_type=to_type, pseudocount=pseudocount, background=background, center_weights=center_weights
)
cdr3_logo = Logo(motif, color_scheme=color_scheme, vpad=vpad, width=width, font_name=font_name, ax=ax, **kwargs)

cdr3_logo.style_xticks(anchor=0, spacing=1, rotation=45)
cdr3_logo.ax.set_ylabel(f"{to_type}")
cdr3_logo.ax.grid(False)
cdr3_logo.ax.set_xlim([-1, len(motif)])
cdr3_logo.ax.set_title("/".join(chains))
return cdr3_logo
7 changes: 7 additions & 0 deletions src/scirpy/tests/test_plotting.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# pylama:ignore=W0611,W0404
import logomaker
import matplotlib.pyplot as plt
import pytest
import seaborn as sns
Expand Down Expand Up @@ -159,3 +160,9 @@ def test_clonotype_network_pie(
show_legend=show_legend,
)
assert isinstance(p, plt.Axes)


@pytest.mark.extra
def test_logoplot(adata_cdr3):
p = pl.logoplot_cdr3_motif(adata_cdr3, chains="VJ_1")
assert isinstance(p, logomaker.Logo)

0 comments on commit 0d0201a

Please sign in to comment.