Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mutational load function (SHM) #536

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
9f8558f
Added mutational_load function to calculate differences between seque…
MKanetscheider Aug 9, 2024
4755736
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 9, 2024
4b020b6
Merge branch 'scverse:main' into mutational_load
MKanetscheider Aug 13, 2024
56a8594
Rewrote mutational_load function based on previous feedback and added…
MKanetscheider Aug 15, 2024
c599a39
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 15, 2024
5c7c92c
Fixed an issue with pre-commit
MKanetscheider Aug 15, 2024
c84708c
Merge branch 'mutational_load' of https://github.com/MKanetscheider/s…
MKanetscheider Aug 15, 2024
12ada2f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 15, 2024
a416e06
Further optimized mutational_load function and formating of docstring…
MKanetscheider Aug 18, 2024
c0e795c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 18, 2024
9793062
Fixed small issues with the code layout as suggested by grst
MKanetscheider Aug 20, 2024
d12f7b5
Merge branch 'scverse:main' into mutational_load
MKanetscheider Aug 26, 2024
906df48
Merge branch 'mutational_load' of https://github.com/MKanetscheider/s…
MKanetscheider Aug 26, 2024
196177e
Added a first beta-test case, which revealed some bugs that were also…
MKanetscheider Aug 29, 2024
11101a5
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 29, 2024
cf53b72
Specified 'except' condition
MKanetscheider Aug 29, 2024
9c6a56c
Merge branch 'mutational_load' of https://github.com/MKanetscheider/s…
MKanetscheider Aug 29, 2024
e5c4d76
Merge branch 'main' into mutational_load
MKanetscheider Aug 30, 2024
ea80d69
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 30, 2024
e662e36
Merge branch 'main' into mutational_load
MKanetscheider Oct 15, 2024
ae9563b
Add notebook section about somatic hypermutation
grst Oct 17, 2024
4ad7d59
Merge branch 'main' into mutational_load
grst Nov 20, 2024
8abcd01
Update SHM description text in tutorial
grst Nov 20, 2024
42f79da
Merge branch 'main' into mutational_load
grst Nov 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,12 @@ V(D)J gene usage

tl.spectratype

Calculating mutations
^^^^^^^^^^^^^^^^^^^^^
.. autosummary::
:toctree: ./generated

tl.mutational_load

Plotting: `pl`
--------------
Expand Down
115 changes: 113 additions & 2 deletions docs/tutorials/tutorial_5k_bcr.ipynb

Large diffs are not rendered by default.

147 changes: 147 additions & 0 deletions src/scirpy/tests/conftest.py

Large diffs are not rendered by default.

326 changes: 326 additions & 0 deletions src/scirpy/tests/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,3 +755,329 @@ def test_clonotype_imbalance(adata_tra):
stat = stat.sort_values(by="clone_id")
stat = stat.reset_index().iloc[:, 1:5]
pdt.assert_frame_equal(stat, expected_stat, check_names=False, check_dtype=False)


def test_mutational_load(adata_mutation, adata_not_aligned):
mutation_VDJ = ir.tl.mutational_load(
adata_mutation,
germline_alignment="germline_alignment",
chains=["VDJ_1", "VJ_1"],
frequency=False,
inplace=False,
region="IMGT_V(D)J",
)
mutation_V_segment = ir.tl.mutational_load(
adata_mutation,
germline_alignment="germline_alignment",
chains=["VDJ_1", "VJ_1"],
frequency=False,
inplace=False,
region="IMGT_V_segment",
)
mutation_subregion = ir.tl.mutational_load(
adata_mutation,
germline_alignment="germline_alignment",
chains=["VDJ_1", "VJ_1"],
frequency=False,
inplace=False,
region="subregion",
)
expected_mutation_VDJ = pd.DataFrame.from_dict(
{
"AAACGGGCACGACTCG-MH9179822": {
# 1
# no mutation
"VDJ_1_IMGT_V(D)J_mu_count": 0,
"VJ_1_IMGT_V(D)J_mu_count": 0,
},
"AACCATGAGAGCAATT-MH9179822": {
# 2
# no mutation, but germline cdr3 masked with 35 "N" in VDJ and 5 "N" in VJ
"VDJ_1_IMGT_V(D)J_mu_count": 0,
"VJ_1_IMGT_V(D)J_mu_count": 0,
},
"AACCATGCAGTCACTA-MH9179822": {
# 3
# no mutation, but sequence alignment poor sequence quality at beginning: 15 '.'
"VDJ_1_IMGT_V(D)J_mu_count": 0,
"VJ_1_IMGT_V(D)J_mu_count": 0,
},
"AACGTTGGTATAAACG-MH9179822": {
# 4
# no mutation, but gaps ('-') in sequence alignment: 3 in FWR1, 3 in FWR2 and 5 in FWR4
"VDJ_1_IMGT_V(D)J_mu_count": 11,
"VJ_1_IMGT_V(D)J_mu_count": 11,
},
"AACTCTTGTTTGGCGC-MH9179822": {
# 6
# few mutations: 1 in each subregion of sequence_alignment (= 7 in total)
"VDJ_1_IMGT_V(D)J_mu_count": 7,
"VJ_1_IMGT_V(D)J_mu_count": 7,
},
"AACTGGTCAATTGCTG-MH9179822": {
# 7
# some mutations: 3 in each subregion of germline alignment (= 21 in total)
"VDJ_1_IMGT_V(D)J_mu_count": 21,
"VJ_1_IMGT_V(D)J_mu_count": 21,
},
"AAGCCGCAGATATACG-MH9179822": {
# 8
# a lot mutation: 5 in each subregion of germline alignment (= 35 in total)
"VDJ_1_IMGT_V(D)J_mu_count": 35,
"VJ_1_IMGT_V(D)J_mu_count": 35,
},
"AAGCCGCAGCGATGAC-MH9179822": {
# 9
# No germline alignment
"VDJ_1_IMGT_V(D)J_mu_count": np.nan,
"VJ_1_IMGT_V(D)J_mu_count": np.nan,
},
"AAGCCGCGTCAGATAA-MH9179822": {
"VDJ_1_IMGT_V(D)J_mu_count": np.nan,
"VJ_1_IMGT_V(D)J_mu_count": np.nan,
# 10
# No sequence_alignment
},
},
orient="index",
)

expected_mutation_V_segment = pd.DataFrame.from_dict(
{
"AAACGGGCACGACTCG-MH9179822": {
# 1
# no mutation
"VDJ_1_v_segment_mu_count": 0,
"VJ_1_v_segment_mu_count": 0,
},
"AACCATGAGAGCAATT-MH9179822": {
# 2
# no mutation, but germline cdr3 masked with 35 "N" in VDJ and 5 "N" in VJ
"VDJ_1_v_segment_mu_count": 0,
"VJ_1_v_segment_mu_count": 0,
},
"AACCATGCAGTCACTA-MH9179822": {
# 3
# no mutation, but sequence alignment poor sequence quality at beginning: 15 '.'
"VDJ_1_v_segment_mu_count": 0,
"VJ_1_v_segment_mu_count": 0,
},
"AACGTTGGTATAAACG-MH9179822": {
# 4
# no mutation, but gaps ('-') in sequence alignment: 3 in FWR1, 3 in FWR2 and 5 in FWR4
"VDJ_1_v_segment_mu_count": 6,
"VJ_1_v_segment_mu_count": 6,
},
"AACTCTTGTTTGGCGC-MH9179822": {
# 6
# few mutations: 1 in each subregion of sequence_alignment (= 7 in total)
"VDJ_1_v_segment_mu_count": 5,
"VJ_1_v_segment_mu_count": 5,
},
"AACTGGTCAATTGCTG-MH9179822": {
# 7
# some mutations: 3 in each subregion of germline alignment (= 21 in total)
"VDJ_1_v_segment_mu_count": 15,
"VJ_1_v_segment_mu_count": 15,
},
"AAGCCGCAGATATACG-MH9179822": {
# 8
# a lot mutation: 5 in each subregion of germline alignment (= 35 in total)
"VDJ_1_v_segment_mu_count": 25,
"VJ_1_v_segment_mu_count": 25,
},
"AAGCCGCAGCGATGAC-MH9179822": {
# 9
# No germline alignment
"VDJ_1_v_segment_mu_count": np.nan,
"VJ_1_v_segment_mu_count": np.nan,
},
"AAGCCGCGTCAGATAA-MH9179822": {
# 10
# No sequence_alignment
"VDJ_1_v_segment_mu_count": np.nan,
"VJ_1_v_segment_mu_count": np.nan,
},
},
orient="index",
)

expected_mutation_subregion = pd.DataFrame.from_dict(
{
"AAACGGGCACGACTCG-MH9179822": {
# 1
# no mutation
"VDJ_1_fwr1_mu_count": 0,
"VDJ_1_cdr1_mu_count": 0,
"VDJ_1_fwr2_mu_count": 0,
"VDJ_1_cdr2_mu_count": 0,
"VDJ_1_fwr3_mu_count": 0,
"VDJ_1_cdr3_mu_count": 0,
"VDJ_1_fwr4_mu_count": 0,
"VJ_1_fwr1_mu_count": 0,
"VJ_1_cdr1_mu_count": 0,
"VJ_1_fwr2_mu_count": 0,
"VJ_1_cdr2_mu_count": 0,
"VJ_1_fwr3_mu_count": 0,
"VJ_1_cdr3_mu_count": 0,
"VJ_1_fwr4_mu_count": 0,
},
"AACCATGAGAGCAATT-MH9179822": {
# 2
# no mutation, but germline cdr3 masked with 35 "N" in VDJ and 5 "N" in VJ
"VDJ_1_fwr1_mu_count": 0,
"VDJ_1_cdr1_mu_count": 0,
"VDJ_1_fwr2_mu_count": 0,
"VDJ_1_cdr2_mu_count": 0,
"VDJ_1_fwr3_mu_count": 0,
"VDJ_1_cdr3_mu_count": 0,
"VDJ_1_fwr4_mu_count": 0,
"VJ_1_fwr1_mu_count": 0,
"VJ_1_cdr1_mu_count": 0,
"VJ_1_fwr2_mu_count": 0,
"VJ_1_cdr2_mu_count": 0,
"VJ_1_fwr3_mu_count": 0,
"VJ_1_cdr3_mu_count": 0,
"VJ_1_fwr4_mu_count": 0,
},
"AACCATGCAGTCACTA-MH9179822": {
# 3
# no mutation, but sequence alignment poor sequence quality at beginning: 15 '.'
"VDJ_1_fwr1_mu_count": 0,
"VDJ_1_cdr1_mu_count": 0,
"VDJ_1_fwr2_mu_count": 0,
"VDJ_1_cdr2_mu_count": 0,
"VDJ_1_fwr3_mu_count": 0,
"VDJ_1_cdr3_mu_count": 0,
"VDJ_1_fwr4_mu_count": 0,
"VJ_1_fwr1_mu_count": 0,
"VJ_1_cdr1_mu_count": 0,
"VJ_1_fwr2_mu_count": 0,
"VJ_1_cdr2_mu_count": 0,
"VJ_1_fwr3_mu_count": 0,
"VJ_1_cdr3_mu_count": 0,
"VJ_1_fwr4_mu_count": 0,
},
"AACGTTGGTATAAACG-MH9179822": {
# 4
# no mutation, but gaps ('-') in sequence alignment: 3 in FWR1, 3 in FWR2 and 5 in FWR4
"VDJ_1_fwr1_mu_count": 3,
"VDJ_1_cdr1_mu_count": 0,
"VDJ_1_fwr2_mu_count": 3,
"VDJ_1_cdr2_mu_count": 0,
"VDJ_1_fwr3_mu_count": 0,
"VDJ_1_cdr3_mu_count": 0,
"VDJ_1_fwr4_mu_count": 5,
"VJ_1_fwr1_mu_count": 3,
"VJ_1_cdr1_mu_count": 0,
"VJ_1_fwr2_mu_count": 3,
"VJ_1_cdr2_mu_count": 0,
"VJ_1_fwr3_mu_count": 0,
"VJ_1_cdr3_mu_count": 0,
"VJ_1_fwr4_mu_count": 5,
},
"AACTCTTGTTTGGCGC-MH9179822": {
# 6
# few mutations: 1 in each subregion of sequence_alignment (= 7 in total)
"VDJ_1_fwr1_mu_count": 1,
"VDJ_1_cdr1_mu_count": 1,
"VDJ_1_fwr2_mu_count": 1,
"VDJ_1_cdr2_mu_count": 1,
"VDJ_1_fwr3_mu_count": 1,
"VDJ_1_cdr3_mu_count": 1,
"VDJ_1_fwr4_mu_count": 1,
"VJ_1_fwr1_mu_count": 1,
"VJ_1_cdr1_mu_count": 1,
"VJ_1_fwr2_mu_count": 1,
"VJ_1_cdr2_mu_count": 1,
"VJ_1_fwr3_mu_count": 1,
"VJ_1_cdr3_mu_count": 1,
"VJ_1_fwr4_mu_count": 1,
},
"AACTGGTCAATTGCTG-MH9179822": {
# 7
# some mutations: 3 in each subregion of germline alignment (= 21 in total)
"VDJ_1_fwr1_mu_count": 3,
"VDJ_1_cdr1_mu_count": 3,
"VDJ_1_fwr2_mu_count": 3,
"VDJ_1_cdr2_mu_count": 3,
"VDJ_1_fwr3_mu_count": 3,
"VDJ_1_cdr3_mu_count": 3,
"VDJ_1_fwr4_mu_count": 3,
"VJ_1_fwr1_mu_count": 3,
"VJ_1_cdr1_mu_count": 3,
"VJ_1_fwr2_mu_count": 3,
"VJ_1_cdr2_mu_count": 3,
"VJ_1_fwr3_mu_count": 3,
"VJ_1_cdr3_mu_count": 3,
"VJ_1_fwr4_mu_count": 3,
},
"AAGCCGCAGATATACG-MH9179822": {
# 8
# a lot mutation: 5 in each subregion of germline alignment (= 35 in total)
"VDJ_1_fwr1_mu_count": 5,
"VDJ_1_cdr1_mu_count": 5,
"VDJ_1_fwr2_mu_count": 5,
"VDJ_1_cdr2_mu_count": 5,
"VDJ_1_fwr3_mu_count": 5,
"VDJ_1_cdr3_mu_count": 5,
"VDJ_1_fwr4_mu_count": 5,
"VJ_1_fwr1_mu_count": 5,
"VJ_1_cdr1_mu_count": 5,
"VJ_1_fwr2_mu_count": 5,
"VJ_1_cdr2_mu_count": 5,
"VJ_1_fwr3_mu_count": 5,
"VJ_1_cdr3_mu_count": 5,
"VJ_1_fwr4_mu_count": 5,
},
"AAGCCGCAGCGATGAC-MH9179822": {
# 9
# No germline alignment
"VDJ_1_fwr1_mu_count": np.nan,
"VDJ_1_cdr1_mu_count": np.nan,
"VDJ_1_fwr2_mu_count": np.nan,
"VDJ_1_cdr2_mu_count": np.nan,
"VDJ_1_fwr3_mu_count": np.nan,
"VDJ_1_cdr3_mu_count": np.nan,
"VDJ_1_fwr4_mu_count": np.nan,
"VJ_1_fwr1_mu_count": np.nan,
"VJ_1_cdr1_mu_count": np.nan,
"VJ_1_fwr2_mu_count": np.nan,
"VJ_1_cdr2_mu_count": np.nan,
"VJ_1_fwr3_mu_count": np.nan,
"VJ_1_cdr3_mu_count": np.nan,
"VJ_1_fwr4_mu_count": np.nan,
},
"AAGCCGCGTCAGATAA-MH9179822": {
# 10
# No sequence_alignment
"VDJ_1_fwr1_mu_count": np.nan,
"VDJ_1_cdr1_mu_count": np.nan,
"VDJ_1_fwr2_mu_count": np.nan,
"VDJ_1_cdr2_mu_count": np.nan,
"VDJ_1_fwr3_mu_count": np.nan,
"VDJ_1_cdr3_mu_count": np.nan,
"VDJ_1_fwr4_mu_count": np.nan,
"VJ_1_fwr1_mu_count": np.nan,
"VJ_1_cdr1_mu_count": np.nan,
"VJ_1_fwr2_mu_count": np.nan,
"VJ_1_cdr2_mu_count": np.nan,
"VJ_1_fwr3_mu_count": np.nan,
"VJ_1_cdr3_mu_count": np.nan,
"VJ_1_fwr4_mu_count": np.nan,
},
},
orient="index",
)
pdt.assert_frame_equal(mutation_subregion, expected_mutation_subregion, check_names=False, check_dtype=False)
pdt.assert_frame_equal(mutation_V_segment, expected_mutation_V_segment, check_names=False, check_dtype=False)
pdt.assert_frame_equal(mutation_VDJ, expected_mutation_VDJ, check_names=False, check_dtype=False)
with npt.assert_raises(ValueError):
ir.tl.mutational_load(
adata_not_aligned,
germline_alignment="germline_alignment",
chains=["VDJ_1", "VJ_1"],
frequency=False,
inplace=False,
region="IMGT_V(D)J",
)
1 change: 1 addition & 0 deletions src/scirpy/tl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@
from ._diversity import alpha_diversity
from ._group_abundance import group_abundance
from ._ir_query import ir_query, ir_query_annotate, ir_query_annotate_df
from ._mutational_load import mutational_load
grst marked this conversation as resolved.
Show resolved Hide resolved
from ._repertoire_overlap import repertoire_overlap
from ._spectratype import spectratype
Loading
Loading