Skip to content

Commit

Permalink
Updating the code base for NAR publication
Browse files Browse the repository at this point in the history
  • Loading branch information
uddamvathanak committed Oct 24, 2024
1 parent 2d7805d commit 75c113d
Show file tree
Hide file tree
Showing 34 changed files with 1,405 additions and 581 deletions.
Empty file modified .gitignore
100644 → 100755
Empty file.
Empty file modified .readthedocs.yaml
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
-->
[![Documentation Status](https://readthedocs.org/projects/discotoolkit-py/badge/?version=latest)](https://discotoolkit-py.readthedocs.io/en/latest/?badge=latest) [![Downloads](https://static.pepy.tech/personalized-badge/discotoolkit?period=total&units=international_system&left_color=black&right_color=orange&left_text=Downloads)](https://pepy.tech/project/discotoolkit) [![PyPI version](https://img.shields.io/pypi/v/discotoolkit)](https://pypi.org/project/discotoolkit)

# DISCOtoolkit 1.1.3
# DISCOtoolkit 1.1.4

DISCOtoolkit is an python package that allows users to access data and use the tools provided by the [DISCO database](https://www.immunesinglecell.org/). Read the documentation [DISCOtoolkit](https://discotoolkit-py.readthedocs.io/en/latest/). It provides the following functions:

Expand Down
Empty file modified __pycache__/main.cpython-38.pyc
100644 → 100755
Empty file.
Empty file modified __pycache__/setup.cpython-38.pyc
100644 → 100755
Empty file.
Empty file modified __pycache__/test_file.cpython-38.pyc
100644 → 100755
Empty file.
335 changes: 244 additions & 91 deletions build/lib/discotoolkit/CELLiD.py

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions build/lib/discotoolkit/DiscoClass.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ class Filter:
"""
Filter class object to save the attributes for filtering the dataset from DISCO
sample String e.g. GSM3891625_3;
project String;
tissue String e.g. Lung, Bladder;
disease String e.g. PDAC;
sample_id String e.g. ERX2757110;
project_id String;
tissue String e.g. lung, bladder;
disease String e.g. COVID-19;
platform String e.g. 10x3';
sample_type String;
cell_type String;
Expand All @@ -20,12 +20,12 @@ class Filter:
return Class object
"""

def __init__(self, sample = None, project = None, tissue = None, disease = None, platform = None, sample_type = None,
def __init__(self, sample_id = None, project_id = None, tissue = None, disease = None, platform = None, sample_type = None,
cell_type = None, cell_type_confidence : str = "medium", include_cell_type_children : bool = True, min_cell_per_sample : int = 100):

# handling for string and list input
self.sample = self.convert_to_list(sample) # sample id
self.project = self.convert_to_list(project) # project, lab, or dataset from different author
self.sample_id = self.convert_to_list(sample_id) # sample id
self.project_id = self.convert_to_list(project_id) # project, lab, or dataset from different author
self.tissue = self.convert_to_list(tissue) # organ tissue
self.disease = self.convert_to_list(disease) # cancer or non cancer, or COVID-1e9 disease
self.platform = self.convert_to_list(platform) # sequencing platform
Expand Down
314 changes: 204 additions & 110 deletions build/lib/discotoolkit/DownloadDiscoData.py

Large diffs are not rendered by default.

233 changes: 158 additions & 75 deletions build/lib/discotoolkit/GetMetadata.py

Large diffs are not rendered by default.

18 changes: 13 additions & 5 deletions build/lib/discotoolkit/GlobalVariable.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
'''
Descripttion:
version:
Author: Mengwei Li
Date: 2023-07-06 16:59:59
LastEditors: Mengwei Li
LastEditTime: 2024-10-23 15:52:29
'''
"""
Global variable file to import for the subsequent script.
Expand All @@ -17,9 +25,9 @@
timeout = 600

# Define package-level variable
response = requests.get("http://www.immunesinglecell.org/api/vishuo/getToolkitUrl")
# response = requests.get("http://www.immunesinglecell.org/api/vishuo/getToolkitUrl")

if response.status_code == 200:
prefix_disco_url = json.loads(response.text)["url"]
else:
prefix_disco_url = "http://www.immunesinglecell.org/toolkitapi"
# if response.status_code == 200:
# prefix_disco_url = json.loads(response.text)["url"]
# else:
prefix_disco_url = "https://immunesinglecell.org/disco_v3_api/"
74 changes: 74 additions & 0 deletions build/lib/discotoolkit/Utilities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import h5py
import numpy as np
from scipy.sparse import csr_matrix
from pathlib import Path


def write_10X_h5(adata, file):
"""Writes adata to a 10X-formatted h5 file.
Note that this function is not fully tested and may not work for all cases.
It will not write the following keys to the h5 file compared to 10X:
'_all_tag_keys', 'pattern', 'read', 'sequence'
Args:
adata (AnnData object): AnnData object to be written.
file (str): File name to be written to. If no extension is given, '.h5' is appended.
Raises:
FileExistsError: If file already exists.
Returns:
None
"""

if ".h5" not in file:
file = f"{file}.h5"
if Path(file).exists():
raise FileExistsError(f"There already is a file `{file}`.")

def int_max(x):
return int(max(np.floor(len(str(int(max(x)))) / 4), 1) * 4)

def str_max(x):
return max([len(i) for i in x])

w = h5py.File(file, "w")
grp = w.create_group("matrix")
grp.create_dataset(
"barcodes",
data=np.array(adata.obs_names, dtype=f"|S{str_max(adata.obs_names)}"),
)
grp.create_dataset(
"data", data=np.array(adata.X.data, dtype=f"<i{int_max(adata.X.data)}")
)
ftrs = grp.create_group("features")
# this group will lack the following keys:
# '_all_tag_keys', 'feature_type', 'genome', 'id', 'name', 'pattern', 'read', 'sequence'
ftrs.create_dataset(
"feature_type",
data=np.array(
adata.var.feature_types, dtype=f"|S{str_max(adata.var.feature_types)}"
),
)
ftrs.create_dataset(
"genome",
data=np.array(adata.var.genome, dtype=f"|S{str_max(adata.var.genome)}"),
)
ftrs.create_dataset(
"id",
data=np.array(adata.var.gene_ids, dtype=f"|S{str_max(adata.var.gene_ids)}"),
)
ftrs.create_dataset(
"name", data=np.array(adata.var.index, dtype=f"|S{str_max(adata.var.index)}")
)
grp.create_dataset(
"indices", data=np.array(adata.X.indices, dtype=f"<i{int_max(adata.X.indices)}")
)
grp.create_dataset(
"indptr", data=np.array(adata.X.indptr, dtype=f"<i{int_max(adata.X.indptr)}")
)
grp.create_dataset(
"shape",
data=np.array(list(adata.X.shape)[::-1], dtype=f"<i{int_max(adata.X.shape)}"),
)
2 changes: 1 addition & 1 deletion build/lib/discotoolkit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
from .CELLiD import *
from .GeneSearch import *

__version__ = "1.1.3"
__version__ = "1.1.4"
Loading

0 comments on commit 75c113d

Please sign in to comment.