Skip to content

Commit

Permalink
Merge pull request #238 from YosefLab/lca-reconstruct
Browse files Browse the repository at this point in the history
Lca reconstruct
  • Loading branch information
colganwi authored Mar 8, 2024
2 parents bb2fd4f + dced953 commit 41dbff8
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 13 deletions.
14 changes: 10 additions & 4 deletions cassiopeia/data/utilities.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
General utilities for the datasets encountered in Cassiopeia.
"""

import collections
from joblib import delayed
import multiprocessing
Expand Down Expand Up @@ -61,7 +62,7 @@ def get_lca_characters(
all_states = [
vec[i] for vec in vecs if vec[i] != missing_state_indicator
]

# this check is specifically if all_states consists of a single
# ambiguous state.
if len(list(set(all_states))) == 1:
Expand All @@ -72,6 +73,9 @@ def get_lca_characters(
else:
lca_vec[i] = all_states[0]
else:
all_ambiguous = np.all(
[is_ambiguous_state(s) for s in all_states]
)
chars = set.intersection(
*map(
set,
Expand All @@ -83,6 +87,10 @@ def get_lca_characters(
)
if len(chars) == 1:
lca_vec[i] = list(chars)[0]
if all_ambiguous:
# if we only have ambiguous states, we set the LCA state
# to be the intersection.
lca_vec[i] = tuple(chars)
return lca_vec


Expand Down Expand Up @@ -228,9 +236,7 @@ def compute_dissimilarity_map(
]

# load character matrix into shared memory
shm = shared_memory.SharedMemory(
create=True, size=cm.nbytes
)
shm = shared_memory.SharedMemory(create=True, size=cm.nbytes)
shared_cm = np.ndarray(cm.shape, dtype=cm.dtype, buffer=shm.buf)
shared_cm[:] = cm[:]

Expand Down
53 changes: 44 additions & 9 deletions test/data_tests/data_utilities_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def test_bootstrap_character_matrices_no_priors(self):

self.assertEqual(len(bootstrap_samples), 10)

for (bootstrap_matrix, bootstrap_priors) in bootstrap_samples:
for bootstrap_matrix, bootstrap_priors in bootstrap_samples:
self.assertCountEqual(
self.character_matrix.index, bootstrap_matrix.index
)
Expand Down Expand Up @@ -113,7 +113,7 @@ def test_bootstrap_character_matrices_with_priors(self):

self.assertEqual(len(bootstrap_samples), 10)

for (bootstrap_matrix, bootstrap_priors) in bootstrap_samples:
for bootstrap_matrix, bootstrap_priors in bootstrap_samples:
self.assertCountEqual(
self.character_matrix.index, bootstrap_matrix.index
)
Expand Down Expand Up @@ -316,6 +316,37 @@ def test_lca_characters_ambiguous(self):
)
self.assertEqual(ret_vec, [1, 2, 3, 0, 5])

def test_lca_characters_ambiguous2(self):

s1 = [
(4, 62),
(3, 10),
(3, 10, 16),
(0, 3),
(0, 2, 3),
(0, 2, 3),
(0, 4, 7),
(0, 2, 23),
(0, 1, 4, 44),
]
s2 = [4, 3, -1, 0, 0, 0, (0, 7), (0, 2), (0, 4)]

expected_reconstruction = [
4,
3,
(3, 10, 16),
0,
0,
0,
(0, 7),
(0, 2),
(0, 4),
]
ret_vec = data_utilities.get_lca_characters(
[s1, s2], missing_state_indicator=-1
)
self.assertEqual(ret_vec, expected_reconstruction)

def test_lca_characters_ambiguous_and_missing(self):
vecs = [
[(1, 1), (0, 2), (3, 0), (4,), (5,)],
Expand All @@ -325,7 +356,7 @@ def test_lca_characters_ambiguous_and_missing(self):
ret_vec = data_utilities.get_lca_characters(
vecs, missing_state_indicator=-1
)
self.assertEqual(ret_vec, [1, (0,2), (3,0), 0, 5])
self.assertEqual(ret_vec, [1, (0, 2), (3, 0), 0, 5])

def test_resolve_most_abundant(self):
state = (1, 2, 3, 3)
Expand Down Expand Up @@ -452,8 +483,10 @@ def test_inter_cluster_distance_basic(self):

tree = CassiopeiaTree(tree=tree, cell_meta=meta_data)

inter_cluster_distances = data_utilities.compute_inter_cluster_distances(
tree, meta_item="CellType"
inter_cluster_distances = (
data_utilities.compute_inter_cluster_distances(
tree, meta_item="CellType"
)
)

expected_distances = pd.DataFrame.from_dict(
Expand Down Expand Up @@ -507,10 +540,12 @@ def test_inter_cluster_distance_custom_input(self):

tree = CassiopeiaTree(tree=tree)

inter_cluster_distances = data_utilities.compute_inter_cluster_distances(
tree,
meta_data=meta_data["CellType"],
dissimilarity_map=weight_matrix,
inter_cluster_distances = (
data_utilities.compute_inter_cluster_distances(
tree,
meta_data=meta_data["CellType"],
dissimilarity_map=weight_matrix,
)
)

expected_distances = pd.DataFrame.from_dict(
Expand Down

0 comments on commit 41dbff8

Please sign in to comment.