Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(feat): support ellipsis indexing #1729

Merged
merged 25 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
d2b859d
(feat): support ellipsis indexing
ilan-gold Oct 22, 2024
df109f6
(chore): release note
ilan-gold Oct 22, 2024
db86a20
(chore): add tests for all axis combinations
ilan-gold Oct 22, 2024
5822ed5
(fix): index typing
ilan-gold Oct 22, 2024
2a6c837
(feat): add support for 3d ellipsis/error on >1 ellipsis
ilan-gold Oct 24, 2024
87a2dbf
Merge branch 'main' into ig/ellipsis_indexing
ilan-gold Oct 24, 2024
c69aa7e
(fix): `unpack_index` type
ilan-gold Oct 24, 2024
eb36543
Merge branch 'ig/ellipsis_indexing' of github.com:scverse/anndata int…
ilan-gold Oct 24, 2024
174596a
(fix): ignore ellipsis in docs
ilan-gold Oct 24, 2024
f018328
(fix): import
ilan-gold Oct 24, 2024
f580cad
Update docs/conf.py
ilan-gold Oct 25, 2024
e994076
(chore): `is` instead of `isinstance` checks
ilan-gold Oct 25, 2024
7cfbb57
Merge branch 'ig/ellipsis_indexing' of github.com:scverse/anndata int…
ilan-gold Oct 25, 2024
f0b7e7e
(fix): try making `unpack_index` more general-purpose
ilan-gold Oct 25, 2024
a06675a
Apply suggestions from code review
ilan-gold Oct 25, 2024
ff5eb93
fix test_backed_ellipsis_indexing
flying-sheep Oct 25, 2024
52ea6d3
(refactor): simplify 3-elem index handling
ilan-gold Oct 25, 2024
95d9d37
(refactor): simplify tests
ilan-gold Oct 25, 2024
57c479c
merge
ilan-gold Oct 25, 2024
defbf37
(fix): use correct base-comparison
ilan-gold Oct 25, 2024
8a08e2b
Update src/anndata/_core/index.py
ilan-gold Oct 25, 2024
916cd3d
(fix): use `id` per param
ilan-gold Oct 25, 2024
8edffaf
Merge branch 'ig/ellipsis_indexing' of github.com:scverse/anndata int…
ilan-gold Oct 25, 2024
6aff0f2
(chore): split ellipsis index
ilan-gold Oct 25, 2024
37fd1e0
fix types
flying-sheep Oct 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@
("py:obj", "numpy._typing._array_like._ScalarType_co"),
# https://github.com/sphinx-doc/sphinx/issues/10974
("py:class", "numpy.int64"),
# https://github.com/tox-dev/sphinx-autodoc-typehints/issues/498
("py:class", "types.EllipsisType"),
ilan-gold marked this conversation as resolved.
Show resolved Hide resolved
]


Expand Down
1 change: 1 addition & 0 deletions docs/release-notes/1729.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add support for ellipsis indexing of the {class}`~anndata.AnnData` object {user}`ilan-gold`
3 changes: 2 additions & 1 deletion src/anndata/_core/anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,10 @@
from os import PathLike
from typing import Any, Literal

from ..compat import Index1D
from ..typing import ArrayDataStructureType
from .aligned_mapping import AxisArraysView, LayersView, PairwiseArraysView
from .index import Index, Index1D
from .index import Index


# for backwards compat
Expand Down
36 changes: 22 additions & 14 deletions src/anndata/_core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,8 @@ def _normalize_indices(
if isinstance(index, pd.Series):
index: Index = index.values
if isinstance(index, tuple):
if len(index) > 2:
raise ValueError("AnnData can only be sliced in rows and columns.")
# deal with pd.Series
# TODO: The series should probably be aligned first
if isinstance(index[1], pd.Series):
index = index[0], index[1].values
if isinstance(index[0], pd.Series):
index = index[0].values, index[1]
index = tuple(i.values if isinstance(i, pd.Series) else i for i in index)
ax0, ax1 = unpack_index(index)
ax0 = _normalize_index(ax0, names0)
ax1 = _normalize_index(ax1, names1)
Expand Down Expand Up @@ -107,8 +101,7 @@ def name_idx(i):
"are not valid obs/ var names or indices."
)
return positions # np.ndarray[int]
else:
raise IndexError(f"Unknown indexer {indexer!r} of type {type(indexer)}")
raise IndexError(f"Unknown indexer {indexer!r} of type {type(indexer)}")


def _fix_slice_bounds(s: slice, length: int) -> slice:
Expand All @@ -132,13 +125,28 @@ def _fix_slice_bounds(s: slice, length: int) -> slice:

def unpack_index(index: Index) -> tuple[Index1D, Index1D]:
if not isinstance(index, tuple):
if index is Ellipsis:
index = slice(None)
return index, slice(None)
elif len(index) == 2:
num_ellipsis = sum(i is Ellipsis for i in index)
if num_ellipsis > 1:
raise IndexError("an index can only have a single ellipsis ('...')")
# If index has Ellipsis, filter it out (and if not, error)
if len(index) > 2:
if not num_ellipsis:
raise IndexError("Received a length 3 index without an ellipsis")
index = tuple(i for i in index if i is not Ellipsis)
return index
elif len(index) == 1:
return index[0], slice(None)
else:
raise IndexError("invalid number of indices")
# If index has Ellipsis, replace it with slice
if len(index) == 2:
index = tuple(slice(None) if i is Ellipsis else i for i in index)
return index
if len(index) == 1:
index = index[0]
if index is Ellipsis:
index = slice(None)
return index, slice(None)
raise IndexError("invalid number of indices")


@singledispatch
Expand Down
13 changes: 12 additions & 1 deletion src/anndata/compat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from importlib.util import find_spec
from inspect import Parameter, signature
from pathlib import Path
from types import EllipsisType
from typing import TYPE_CHECKING, TypeVar
from warnings import warn

Expand Down Expand Up @@ -47,7 +48,17 @@ class Empty:


Index1D = slice | int | str | np.int64 | np.ndarray
Index = Index1D | tuple[Index1D, Index1D] | scipy.sparse.spmatrix | SpArray
IndexRest = Index1D | EllipsisType
Index = (
IndexRest
| tuple[Index1D, IndexRest]
| tuple[IndexRest, Index1D]
| tuple[Index1D, Index1D, EllipsisType]
| tuple[EllipsisType, Index1D, Index1D]
ilan-gold marked this conversation as resolved.
Show resolved Hide resolved
| tuple[Index1D, EllipsisType, Index1D]
| scipy.sparse.spmatrix
| SpArray
)
H5Group = h5py.Group
H5Array = h5py.Dataset
H5File = h5py.File
Expand Down
51 changes: 51 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from functools import partial
from typing import TYPE_CHECKING

import joblib
import pytest
Expand All @@ -10,12 +11,62 @@
import anndata as ad
from anndata.tests.helpers import subset_func # noqa: F401

if TYPE_CHECKING:
from types import EllipsisType


@pytest.fixture
def backing_h5ad(tmp_path):
return tmp_path / "test.h5ad"


@pytest.fixture(
params=[
pytest.param(((...), (slice(None), slice(None))), id="ellipsis"),
flying-sheep marked this conversation as resolved.
Show resolved Hide resolved
pytest.param(
((..., slice(0, 10)), (slice(None), slice(0, 10))), id="obs-ellipsis"
),
pytest.param(
((slice(0, 10), ...), (slice(0, 10), slice(None))), id="var-ellipsis"
),
pytest.param(
((slice(0, 10), slice(0, 10), ...), (slice(0, 10), slice(0, 10))),
id="obs-var-ellipsis",
),
pytest.param(
((..., slice(0, 10), slice(0, 10)), (slice(0, 10), slice(0, 10))),
id="ellipsis-obs-var",
),
pytest.param(
((slice(0, 10), ..., slice(0, 10)), (slice(0, 10), slice(0, 10))),
id="obs-ellipsis-var",
),
]
)
def ellipsis_index_with_equivalent(
request,
) -> tuple[tuple[EllipsisType | slice], tuple[slice, slice]]:
return request.param


@pytest.fixture
def ellipsis_index(
ellipsis_index_with_equivalent: tuple[
tuple[EllipsisType | slice], tuple[slice, slice]
],
) -> tuple[EllipsisType | slice]:
return ellipsis_index_with_equivalent[0]


@pytest.fixture
def equivalent_ellipsis_index(
ellipsis_index_with_equivalent: tuple[
tuple[EllipsisType | slice], tuple[slice, slice]
],
) -> tuple[slice, slice]:
return ellipsis_index_with_equivalent[1]


#####################
# Dask tokenization #
#####################
Expand Down
12 changes: 12 additions & 0 deletions tests/test_backed_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
if TYPE_CHECKING:
from collections.abc import Callable, Generator, Sequence
from pathlib import Path
from types import EllipsisType

from _pytest.mark import ParameterSet
from numpy.typing import ArrayLike, NDArray
Expand Down Expand Up @@ -127,6 +128,17 @@ def test_backed_indexing(
assert_equal(csr_mem[:, var_idx].X, dense_disk[:, var_idx].X)


def test_backed_ellipsis_indexing(
ondisk_equivalent_adata: tuple[AnnData, AnnData, AnnData, AnnData],
ellipsis_index: tuple[EllipsisType | slice],
equivalent_ellipsis_index: tuple[slice],
):
csr_mem, csr_disk, csc_disk, _ = ondisk_equivalent_adata

assert_equal(csr_mem.X[equivalent_ellipsis_index], csr_disk.X[ellipsis_index])
assert_equal(csr_mem.X[equivalent_ellipsis_index], csc_disk.X[ellipsis_index])


def make_randomized_mask(size: int) -> np.ndarray:
randomized_mask = np.zeros(size, dtype=bool)
inds = np.random.choice(size, 20, replace=False)
Expand Down
28 changes: 28 additions & 0 deletions tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from contextlib import ExitStack
from copy import deepcopy
from operator import mul
from typing import TYPE_CHECKING

import joblib
import numpy as np
Expand Down Expand Up @@ -35,6 +36,9 @@
)
from anndata.utils import asarray

if TYPE_CHECKING:
from types import EllipsisType

IGNORE_SPARSE_EFFICIENCY_WARNING = pytest.mark.filterwarnings(
"ignore:Changing the sparsity structure:scipy.sparse.SparseEfficiencyWarning"
)
Expand Down Expand Up @@ -786,6 +790,30 @@ def test_dataframe_view_index_setting():
assert a2.obs.index.values.tolist() == ["a", "b"]


def test_ellipsis_index(
ellipsis_index: tuple[EllipsisType | slice],
equivalent_ellipsis_index: tuple[slice],
matrix_type,
):
adata = gen_adata((10, 10), X_type=matrix_type, **GEN_ADATA_DASK_ARGS)
subset_ellipsis = adata[ellipsis_index]
subset = adata[equivalent_ellipsis_index]
assert_equal(subset_ellipsis, subset)


@pytest.mark.parametrize(
("index", "expected_error"),
[
((..., 0, ...), r"only have a single ellipsis"),
((0, 0, 0), r"Received a length 3 index"),
],
ids=["ellipsis-int-ellipsis", "int-int-int"],
)
def test_index_3d_errors(index: tuple[int | EllipsisType, ...], expected_error: str):
with pytest.raises(IndexError, match=expected_error):
gen_adata((10, 10))[index]


# @pytest.mark.parametrize("dim", ["obs", "var"])
# @pytest.mark.parametrize(
# ("idx", "pat"),
Expand Down