Merge pull request #50 from CSOgroup/pre-commit-ci-update-config

[pre-commit.ci] pre-commit autoupdate
CSOgroup · Aug 19, 2024 · 58a36e5 · 58a36e5
2 parents d2e76bd + ce55415
commit 58a36e5
Show file tree

Hide file tree

Showing 7 changed files with 106 additions and 108 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,80 +1,80 @@
 fail_fast: false
 default_language_version:
-    python: python3
-    node: 16.14.2
+  python: python3
+  node: 16.14.2
 default_stages:
-    - commit
-    - push
+  - commit
+  - push
 minimum_pre_commit_version: 2.16.0
 repos:
-    - repo: https://github.com/psf/black
-      rev: 23.3.0
-      hooks:
-          - id: black
-    - repo: https://github.com/pre-commit/mirrors-prettier
-      rev: v3.0.0-alpha.9-for-vscode
-      hooks:
-          - id: prettier
-    - repo: https://github.com/asottile/blacken-docs
-      rev: 1.14.0
-      hooks:
-          - id: blacken-docs
-    - repo: https://github.com/PyCQA/isort
-      rev: 5.12.0
-      hooks:
-          - id: isort
-    - repo: https://github.com/asottile/yesqa
-      rev: v1.5.0
-      hooks:
-          - id: yesqa
-            additional_dependencies:
-                - flake8-tidy-imports
-                - flake8-docstrings
-                - flake8-rst-docstrings
-                - flake8-comprehensions
-                - flake8-bugbear
-                - flake8-blind-except
-    - repo: https://github.com/pre-commit/pre-commit-hooks
-      rev: v4.4.0
-      hooks:
-          - id: detect-private-key
-          - id: check-ast
-          - id: end-of-file-fixer
-          - id: mixed-line-ending
-            args: [--fix=lf]
-          - id: trailing-whitespace
-          - id: check-case-conflict
-    - repo: https://github.com/PyCQA/autoflake
-      rev: v2.2.0
-      hooks:
-          - id: autoflake
-            args:
-                - --in-place
-                - --remove-all-unused-imports
-                - --remove-unused-variable
-                - --ignore-init-module-imports
-    - repo: https://github.com/PyCQA/flake8
-      rev: 6.0.0
-      hooks:
-          - id: flake8
-            additional_dependencies:
-                - flake8-tidy-imports
-                - flake8-docstrings
-                - flake8-rst-docstrings
-                - flake8-comprehensions
-                - flake8-bugbear
-                - flake8-blind-except
-    - repo: https://github.com/asottile/pyupgrade
-      rev: v3.7.0
-      hooks:
-          - id: pyupgrade
-            args: [--py3-plus, --py38-plus, --keep-runtime-typing]
-    - repo: local
-      hooks:
-          - id: forbid-to-commit
-            name: Don't commit rej files
-            entry: |
-                Cannot commit .rej files. These indicate merge conflicts that arise during automated template updates.
-                Fix the merge conflicts manually and remove the .rej files.
-            language: fail
-            files: '.*\.rej$'
+  - repo: https://github.com/psf/black
+    rev: 24.8.0
+    hooks:
+      - id: black
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: v4.0.0-alpha.8
+    hooks:
+      - id: prettier
+  - repo: https://github.com/asottile/blacken-docs
+    rev: 1.18.0
+    hooks:
+      - id: blacken-docs
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+  - repo: https://github.com/asottile/yesqa
+    rev: v1.5.0
+    hooks:
+      - id: yesqa
+        additional_dependencies:
+          - flake8-tidy-imports
+          - flake8-docstrings
+          - flake8-rst-docstrings
+          - flake8-comprehensions
+          - flake8-bugbear
+          - flake8-blind-except
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: detect-private-key
+      - id: check-ast
+      - id: end-of-file-fixer
+      - id: mixed-line-ending
+        args: [--fix=lf]
+      - id: trailing-whitespace
+      - id: check-case-conflict
+  - repo: https://github.com/PyCQA/autoflake
+    rev: v2.3.1
+    hooks:
+      - id: autoflake
+        args:
+          - --in-place
+          - --remove-all-unused-imports
+          - --remove-unused-variable
+          - --ignore-init-module-imports
+  - repo: https://github.com/PyCQA/flake8
+    rev: 7.1.1
+    hooks:
+      - id: flake8
+        additional_dependencies:
+          - flake8-tidy-imports
+          - flake8-docstrings
+          - flake8-rst-docstrings
+          - flake8-comprehensions
+          - flake8-bugbear
+          - flake8-blind-except
+  - repo: https://github.com/asottile/pyupgrade
+    rev: v3.17.0
+    hooks:
+      - id: pyupgrade
+        args: [--py3-plus, --py38-plus, --keep-runtime-typing]
+  - repo: local
+    hooks:
+      - id: forbid-to-commit
+        name: Don't commit rej files
+        entry: |
+          Cannot commit .rej files. These indicate merge conflicts that arise during automated template updates.
+          Fix the merge conflicts manually and remove the .rej files.
+        language: fail
+        files: '.*\.rej$'
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -1,16 +1,16 @@
 # https://docs.readthedocs.io/en/stable/config-file/v2.html
 version: 2
 build:
-    os: ubuntu-20.04
-    tools:
-        python: "3.10"
+  os: ubuntu-20.04
+  tools:
+    python: "3.10"
 sphinx:
-    configuration: docs/conf.py
-    # disable this for more lenient docs builds
-    fail_on_warning: false
+  configuration: docs/conf.py
+  # disable this for more lenient docs builds
+  fail_on_warning: false
 python:
-    install:
-        - method: pip
-          path: .
-          extra_requirements:
-              - doc
+  install:
+    - method: pip
+      path: .
+      extra_requirements:
+        - doc
diff --git a/README.md b/README.md
@@ -39,15 +39,15 @@ CellCharter is able to automatically identify spatial domains, and offers a suit
 
 ## Features
 
--  **Identify niches for multiple samples**: By combining the power of scVI and scArches, CellCharter can identify domains for multiple samples simultaneously, even with in presence of batch effects.
--  **Scalability**: CellCharter can handle large datasets with millions of cells and thousands of features. The possibility to run it on GPUs makes it even faster
--  **Flexibility**: CellCharter can be used with different types of spatial -omics data, such as spatial transcriptomics, proteomics, epigenomics and multiomics data. The only difference is the method used for dimensionality reduction and batch effect removal.
-    - Spatial transcriptomics: CellCharter has been tested on [scVI](https://docs.scvi-tools.org/en/stable/api/reference/scvi.model.SCVI.html#scvi.model.SCVI) with Zero-inflated negative binomial distribution.
-    - Spatial proteomics: CellCharter has been tested on a version of [scArches](https://docs.scarches.org/en/latest/api/models.html#scarches.models.TRVAE), modified to be use Mean Squared Error loss instead of the default Negative Binomial loss.
-    - Spatial epigenomics: CellCharter has been tested on [scVI](https://docs.scvi-tools.org/en/stable/api/reference/scvi.model.SCVI.html#scvi.model.SCVI) with Poisson distribution.
-    - Spatial multiomics: it's possible to use multi-omics models such as [MultiVI](https://docs.scvi-tools.org/en/stable/api/reference/scvi.model.MULTIVI.html#scvi.model.MULTIVI), or use the concatenativo of the results from the different models.
--  **Best candidates for number of domains**: CellCharter offers a [method to find multiple best candidates](https://cellcharter.readthedocs.io/en/latest/generated/cellcharter.tl.ClusterAutoK.html) for the number of domains, based on the stability of a certain number of domains across multiple runs.
-- **Domain characterization**: CellCharter provides a set of tools to characterize and compare the spatial domains, such as domain proportion, cell type enrichment, (differential) neighborhood enrichment, domain shape characterization.
+-   **Identify niches for multiple samples**: By combining the power of scVI and scArches, CellCharter can identify domains for multiple samples simultaneously, even with in presence of batch effects.
+-   **Scalability**: CellCharter can handle large datasets with millions of cells and thousands of features. The possibility to run it on GPUs makes it even faster
+-   **Flexibility**: CellCharter can be used with different types of spatial -omics data, such as spatial transcriptomics, proteomics, epigenomics and multiomics data. The only difference is the method used for dimensionality reduction and batch effect removal.
+    -   Spatial transcriptomics: CellCharter has been tested on [scVI](https://docs.scvi-tools.org/en/stable/api/reference/scvi.model.SCVI.html#scvi.model.SCVI) with Zero-inflated negative binomial distribution.
+    -   Spatial proteomics: CellCharter has been tested on a version of [scArches](https://docs.scarches.org/en/latest/api/models.html#scarches.models.TRVAE), modified to be use Mean Squared Error loss instead of the default Negative Binomial loss.
+    -   Spatial epigenomics: CellCharter has been tested on [scVI](https://docs.scvi-tools.org/en/stable/api/reference/scvi.model.SCVI.html#scvi.model.SCVI) with Poisson distribution.
+    -   Spatial multiomics: it's possible to use multi-omics models such as [MultiVI](https://docs.scvi-tools.org/en/stable/api/reference/scvi.model.MULTIVI.html#scvi.model.MULTIVI), or use the concatenativo of the results from the different models.
+-   **Best candidates for number of domains**: CellCharter offers a [method to find multiple best candidates](https://cellcharter.readthedocs.io/en/latest/generated/cellcharter.tl.ClusterAutoK.html) for the number of domains, based on the stability of a certain number of domains across multiple runs.
+-   **Domain characterization**: CellCharter provides a set of tools to characterize and compare the spatial domains, such as domain proportion, cell type enrichment, (differential) neighborhood enrichment, domain shape characterization.
 
 Since CellCharter 0.3.0, we moved the implementation of Gaussian Mixture Model (GMM) from [PyCave](https://github.com/borchero/pycave), not mainted anymmore, to [TorchGMM](https://github.com/CSOgroup/torchgmm), a fork of PyCave mantained by the CSOgroup. This change allows us to have a more stable and mantained implementation of GMM that is compatible with the most recent versions of PyTorch.
 

diff --git a/src/cellcharter/gr/_utils.py b/src/cellcharter/gr/_utils.py
@@ -1,4 +1,5 @@
 """Graph utilities."""
+
 from __future__ import annotations
 
 from anndata import AnnData

diff --git a/src/cellcharter/tl/_autok.py b/src/cellcharter/tl/_autok.py
@@ -86,7 +86,7 @@ def __init__(
         self.similarity_function = similarity_function if similarity_function else fowlkes_mallows_score
         self.stability = []
 
-    def fit(self, adata: ad.AnnData, use_rep: str = 'X_cellcharter'):
+    def fit(self, adata: ad.AnnData, use_rep: str = "X_cellcharter"):
         """
         Cluster data multiple times for each number of clusters (K) in the selected range and compute the average stability for each them.
 
@@ -97,24 +97,24 @@ def fit(self, adata: ad.AnnData, use_rep: str = 'X_cellcharter'):
         use_rep
             Key in :attr:`anndata.AnnData.obsm` to use as data to fit the clustering model. If ``None``, uses :attr:`anndata.AnnData.X`.
         """
-
         if use_rep not in adata.obsm:
             raise ValueError(f"{use_rep} not found in adata.obsm. If you want to use adata.X, set use_rep=None")
 
         X = adata.obsm[use_rep] if use_rep is not None else adata.X
 
-
         self.labels = defaultdict(list)
         self.best_models = {}
 
         random_state = self.model_params.pop("random_state", 0)
 
-        if ("trainer_params" not in self.model_params) or ("enable_progress_bar" not in self.model_params["trainer_params"]):
+        if ("trainer_params" not in self.model_params) or (
+            "enable_progress_bar" not in self.model_params["trainer_params"]
+        ):
             self.model_params["trainer_params"] = {"enable_progress_bar": False}
 
         previous_stability = None
         for i in range(self.max_runs):
-            print(f"Iteration {i+1}/{self.max_runs}")
+            print(f"Iteration {i + 1}/{self.max_runs}")
             new_labels = {}
 
             for k in tqdm(self.n_clusters, disable=(len(self.n_clusters) == 1)):
@@ -147,7 +147,7 @@ def fit(self, adata: ad.AnnData, use_rep: str = 'X_cellcharter'):
                         for k, new_l in new_labels.items():
                             self.labels[k].append(new_l)
                         print(
-                            f"Convergence with a change in stability of {stability_change} reached after {i+1} iterations"
+                            f"Convergence with a change in stability of {stability_change} reached after {i + 1} iterations"
                         )
                         break
 
@@ -176,10 +176,10 @@ def best_k(self) -> int:
         stability_mean = np.array([np.mean(self.stability[k]) for k in range(len(self.n_clusters[1:-1]))])
         best_idx = np.argmax(stability_mean)
         return self.n_clusters[best_idx + 1]
-    
+
     @property
     def peaks(self) -> List[int]:
-        """ Find the peaks in the stability curve. """
+        """Find the peaks in the stability curve."""
         if self.max_runs <= 1:
             raise ValueError("Cannot compute stability with max_runs <= 1")
         stability_mean = np.array([np.mean(self.stability[k]) for k in range(len(self.n_clusters[1:-1]))])
@@ -205,9 +205,7 @@ def predict(self, adata: ad.AnnData, use_rep: str = None, k: int = None) -> pd.C
         X = (
             adata.obsm[use_rep]
             if use_rep is not None
-            else adata.obsm["X_cellcharter"]
-            if "X_cellcharter" in adata.obsm
-            else adata.X
+            else adata.obsm["X_cellcharter"] if "X_cellcharter" in adata.obsm else adata.X
         )
         return pd.Categorical(self.best_models[k].predict(X), categories=np.arange(k))
 

diff --git a/src/cellcharter/tl/_gmm.py b/src/cellcharter/tl/_gmm.py
@@ -9,7 +9,6 @@
 import scipy.sparse as sps
 import torch
 from pytorch_lightning import Trainer
-from torchgmm import set_logging_level
 from torchgmm.base.data import (
     DataLoader,
     TensorLike,

diff --git a/src/cellcharter/tl/_shape.py b/src/cellcharter/tl/_shape.py
@@ -167,7 +167,7 @@ def _remove_dangling_branches(graph, min_ratio=0.05):
 
 
 def _longest_path_from_node(graph, u):
-    visited = {i: False for i in list(graph.nodes)}
+    visited = dict.fromkeys(graph.nodes)
     distance = {i: -1 for i in list(graph.nodes)}
     idx2node = dict(enumerate(graph.nodes))