From 75c113db57a1c09443d186610e2da14f15b4d8c2 Mon Sep 17 00:00:00 2001
From: uddamvathanak <vathanakuddam@gmail.com>
Date: Thu, 24 Oct 2024 17:24:23 +0800
Subject: [PATCH] Updating the code base for NAR publication

---
 .gitignore                                    |   0
 .readthedocs.yaml                             |   0
 README.md                                     |   2 +-
 __pycache__/main.cpython-38.pyc               | Bin
 __pycache__/setup.cpython-38.pyc              | Bin
 __pycache__/test_file.cpython-38.pyc          | Bin
 build/lib/discotoolkit/CELLiD.py              | 335 +++++++++++++-----
 build/lib/discotoolkit/DiscoClass.py          |  14 +-
 build/lib/discotoolkit/DownloadDiscoData.py   | 314 ++++++++++------
 build/lib/discotoolkit/GetMetadata.py         | 233 ++++++++----
 build/lib/discotoolkit/GlobalVariable.py      |  18 +-
 build/lib/discotoolkit/Utilities.py           |  74 ++++
 build/lib/discotoolkit/__init__.py            |   2 +-
 discotoolkit/CELLiD.py                        | 335 +++++++++++++-----
 discotoolkit/DiscoClass.py                    |  14 +-
 discotoolkit/DownloadDiscoData.py             | 314 ++++++++++------
 discotoolkit/GetMetadata.py                   | 233 ++++++++----
 discotoolkit/GlobalVariable.py                |  18 +-
 discotoolkit/Utilities.py                     |  74 ++++
 discotoolkit/__init__.py                      |   2 +-
 .../__pycache__/CELLiD.cpython-311.pyc        | Bin 0 -> 24971 bytes
 .../__pycache__/DiscoClass.cpython-311.pyc    | Bin 0 -> 3209 bytes
 .../DownloadDiscoData.cpython-311.pyc         | Bin 0 -> 3492 bytes
 .../__pycache__/GeneSearch.cpython-311.pyc    | Bin 0 -> 6477 bytes
 .../__pycache__/GetMetadata.cpython-311.pyc   | Bin 0 -> 13674 bytes
 .../GlobalVariable.cpython-311.pyc            | Bin 0 -> 620 bytes
 .../__pycache__/__init__.cpython-311.pyc      | Bin 0 -> 375 bytes
 docs/.DS_Store                                | Bin 0 -> 6148 bytes
 docs/CELLiD_celltype_annotation.ipynb         |   0
 docs/Gene_search.ipynb                        |   0
 docs/download_data.ipynb                      |   0
 docs/index.md                                 |   2 +-
 docs/scEnrichment.ipynb                       |   0
 setup.py                                      |   2 +-
 34 files changed, 1405 insertions(+), 581 deletions(-)
 mode change 100644 => 100755 .gitignore
 mode change 100644 => 100755 .readthedocs.yaml
 mode change 100644 => 100755 __pycache__/main.cpython-38.pyc
 mode change 100644 => 100755 __pycache__/setup.cpython-38.pyc
 mode change 100644 => 100755 __pycache__/test_file.cpython-38.pyc
 create mode 100644 build/lib/discotoolkit/Utilities.py
 create mode 100644 discotoolkit/Utilities.py
 create mode 100644 discotoolkit/__pycache__/CELLiD.cpython-311.pyc
 create mode 100644 discotoolkit/__pycache__/DiscoClass.cpython-311.pyc
 create mode 100644 discotoolkit/__pycache__/DownloadDiscoData.cpython-311.pyc
 create mode 100644 discotoolkit/__pycache__/GeneSearch.cpython-311.pyc
 create mode 100644 discotoolkit/__pycache__/GetMetadata.cpython-311.pyc
 create mode 100644 discotoolkit/__pycache__/GlobalVariable.cpython-311.pyc
 create mode 100644 discotoolkit/__pycache__/__init__.cpython-311.pyc
 create mode 100644 docs/.DS_Store
 mode change 100644 => 100755 docs/CELLiD_celltype_annotation.ipynb
 mode change 100644 => 100755 docs/Gene_search.ipynb
 mode change 100644 => 100755 docs/download_data.ipynb
 mode change 100644 => 100755 docs/scEnrichment.ipynb

diff --git a/.gitignore b/.gitignore
old mode 100644
new mode 100755
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
old mode 100644
new mode 100755
diff --git a/README.md b/README.md
index 7d5fbf2..94adc90 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 -->
 [![Documentation Status](https://readthedocs.org/projects/discotoolkit-py/badge/?version=latest)](https://discotoolkit-py.readthedocs.io/en/latest/?badge=latest) [![Downloads](https://static.pepy.tech/personalized-badge/discotoolkit?period=total&units=international_system&left_color=black&right_color=orange&left_text=Downloads)](https://pepy.tech/project/discotoolkit) [![PyPI version](https://img.shields.io/pypi/v/discotoolkit)](https://pypi.org/project/discotoolkit)
 
-# DISCOtoolkit 1.1.3
+# DISCOtoolkit 1.1.4
 
 DISCOtoolkit is an python package that allows users to access data and use the tools provided by the [DISCO database](https://www.immunesinglecell.org/). Read the documentation [DISCOtoolkit](https://discotoolkit-py.readthedocs.io/en/latest/). It provides the following functions:
 
diff --git a/__pycache__/main.cpython-38.pyc b/__pycache__/main.cpython-38.pyc
old mode 100644
new mode 100755
diff --git a/__pycache__/setup.cpython-38.pyc b/__pycache__/setup.cpython-38.pyc
old mode 100644
new mode 100755
diff --git a/__pycache__/test_file.cpython-38.pyc b/__pycache__/test_file.cpython-38.pyc
old mode 100644
new mode 100755
diff --git a/build/lib/discotoolkit/CELLiD.py b/build/lib/discotoolkit/CELLiD.py
index 922c9de..d8b10b4 100644
--- a/build/lib/discotoolkit/CELLiD.py
+++ b/build/lib/discotoolkit/CELLiD.py
@@ -9,10 +9,11 @@
 import hashlib
 import requests
 from scipy import stats
+
 # from fast_fisher import fast_fisher_exact, odds_ratio
 from scipy.stats.contingency import odds_ratio
-from joblib import Parallel, delayed # multiprocessing library
-from pandarallel import pandarallel # multiprocessing library
+from joblib import Parallel, delayed  # multiprocessing library
+from pandarallel import pandarallel  # multiprocessing library
 
 pd.options.mode.chained_assignment = None
 
@@ -24,17 +25,45 @@
 """
 CELLiD cell type annotation function block.
 """
-# get the total atlas from DISCO website for the user to select which atlas to use
-def get_atlas(ref_data = None, ref_path = None):
 
+
+def generate_correlation_map(x, y):
+    """Correlate each n with each m.
+
+    Parameters
+    ----------
+    x : np.array
+      Shape N X T.
+
+    y : np.array
+      Shape M X T.
+
+    Returns
+    -------
+    np.array
+      N X M array in which each element is a correlation coefficient.
+
+    """
+    mu_x = x.mean(1)
+    mu_y = y.mean(1)
+    n = x.shape[1]
+    if n != y.shape[1]:
+        raise ValueError("x and y must " + "have the same number of timepoints.")
+    s_x = x.std(1, ddof=n - 1)
+    s_y = y.std(1, ddof=n - 1)
+    cov = np.dot(x, y.T) - n * np.dot(mu_x[:, np.newaxis], mu_y[np.newaxis, :])
+    return cov / np.dot(s_x[:, np.newaxis], s_y[np.newaxis, :])
+
+
+# get the total atlas from DISCO website for the user to select which atlas to use
+def get_atlas(ref_data=None, ref_path=None):
     """get the all the atlas string from the DISCO website and return to the user
 
     Returns:
         List: return list of string
-    """    
+    """
     # Download reference data and ref_deg if missing
     if ref_data is None:
-
         # check for the data path
         if ref_path is None:
             ref_path = "DISCOtmp"
@@ -42,19 +71,33 @@ def get_atlas(ref_data = None, ref_path = None):
         # if the path does not exist, we create a default directory
         if not os.path.exists(ref_path):
             os.mkdir(ref_path)
-        
+
         # download reference data from the server in pickle extension and read as pandas dataframe
         if ref_data is None:
-            if not(os.path.exists(ref_path + "/ref_data.pkl")):
+            if not (os.path.exists(ref_path + "/ref_data.pkl")):
                 logging.info("Downloading reference dataset...")
-                response = requests.get(url=prefix_disco_url +"/getRefPkl", timeout=timeout)
+                response = requests.get(
+                    url=prefix_disco_url + "toolkit/getRef?type=pkl", timeout=timeout
+                )
                 open(ref_path + "/ref_data.pkl", "wb").write(response.content)
-            ref_data = pd.read_pickle(ref_path + "/ref_data.pkl", compression = {'method':'gzip','compresslevel':6})
+            ref_data = pd.read_pickle(
+                ref_path + "/ref_data.pkl",
+                compression={"method": "gzip", "compresslevel": 6},
+            )
 
     all_atlas = [each.split("--")[1] for each in ref_data.columns]
     return list(set(all_atlas))
 
-def CELLiD_cluster(rna, ref_data : pd.DataFrame = None, ref_deg : pd.DataFrame = None, atlas : str = None, n_predict : int = 1, ref_path : str = None, ncores : int = 10):
+
+def CELLiD_cluster(
+    rna,
+    ref_data: pd.DataFrame = None,
+    ref_deg: pd.DataFrame = None,
+    atlas: str = None,
+    n_predict: int = 1,
+    ref_path: str = None,
+    ncores: int = 10,
+):
     """Cell type annotation using reference data and compute the correlation between the user cell gene expression as compare
         to the reference data. The celltype with highest correlation will be concluded as the celltype
 
@@ -73,20 +116,21 @@ def CELLiD_cluster(rna, ref_data : pd.DataFrame = None, ref_deg : pd.DataFrame =
 
     # initialize the pandarallel for parallel processing
     pandarallel.initialize(nb_workers=ncores)
-    
+
     # check for the input data
-    if not(isinstance(rna, pd.DataFrame) or isinstance(rna, np.ndarray)):
+    if not (isinstance(rna, pd.DataFrame) or isinstance(rna, np.ndarray)):
         logging.error("the rna must be a pandas DataFrame or Numpy Array")
 
     # check for number of prediction of celltypes and set the largest number to 3 only
     if n_predict is not None:
         if n_predict > 3:
-            logging.info("Any value of n_predict that exceeds 3 will be automatically adjusted to 3.")
+            logging.info(
+                "Any value of n_predict that exceeds 3 will be automatically adjusted to 3."
+            )
             n_predict = 3
-    
+
     # Download reference data and ref_deg if missing
     if (ref_data is None) or (ref_deg is None):
-
         # check for the data path
         if ref_path is None:
             ref_path = "DISCOtmp"
@@ -94,42 +138,60 @@ def CELLiD_cluster(rna, ref_data : pd.DataFrame = None, ref_deg : pd.DataFrame =
         # if the path does not exist, we create a default directory
         if not os.path.exists(ref_path):
             os.mkdir(ref_path)
-        
+
         # download reference data from the server in pickle extension and read as pandas dataframe
         if ref_data is None:
-            if not(os.path.exists(ref_path + "/ref_data.pkl")):
+            if not (os.path.exists(ref_path + "/ref_data.pkl")):
                 logging.info("Downloading reference dataset...")
-                response = requests.get(url=prefix_disco_url +"/getRefPkl", timeout=timeout)
+                response = requests.get(
+                    url=prefix_disco_url + "toolkit/getRef?type=pkl", timeout=timeout
+                )
                 open(ref_path + "/ref_data.pkl", "wb").write(response.content)
-            ref_data = pd.read_pickle(ref_path + "/ref_data.pkl", compression = {'method':'gzip','compresslevel':6})
+            ref_data = pd.read_pickle(
+                ref_path + "/ref_data.pkl",
+                compression={"method": "gzip", "compresslevel": 6},
+            )
 
         # similarly do the same for the DEG reference
         if ref_deg is None:
-            if not(os.path.exists(ref_path + "/ref_deg.pkl")):
+            if not (os.path.exists(ref_path + "/ref_deg.pkl")):
                 logging.info("Downloading deg dataset...")
-                response = requests.get(url=prefix_disco_url +"/getRefDegPkl", timeout=timeout)
+                response = requests.get(
+                    url=prefix_disco_url + "toolkit/getRefDeg?type=pkl", timeout=timeout
+                )
                 open(ref_path + "/ref_deg.pkl", "wb").write(response.content)
-            ref_deg = pd.read_pickle(ref_path + "/ref_deg.pkl", compression = {'method':'gzip','compresslevel':6})
-    
+            ref_deg = pd.read_pickle(
+                ref_path + "/ref_deg.pkl",
+                compression={"method": "gzip", "compresslevel": 6},
+            )
+
     ####
     # write list of atlas function
     # we can write a function to get all the atlas from disco to the users
     # subsetting to the selected atlas by the user
     if atlas is not None:
-        select_ref = [index for index, each in enumerate(ref_data.columns) if each.split("--")[1] in list(atlas)]
+        select_ref = [
+            index
+            for index, each in enumerate(ref_data.columns)
+            if each.split("--")[1] in list(atlas)
+        ]
         ref_data = ref_data.iloc[:, select_ref]
-    
+
     # get the intersection of the gene for the user data and the reference data
     genes = set.intersection(set(rna.index), set(ref_data.index))
 
     # give an error as 2000 genes are very low
     if len(genes) <= 2000:
-        logging.error("Less than 2000 genes are shared between the input data and the reference dataset!")
+        logging.error(
+            "Less than 2000 genes are shared between the input data and the reference dataset!"
+        )
 
     # give a warning to the user
     if len(genes) <= 5000:
-        logging.warning("The input data and reference dataset have a limited number of overlapping genes, which may potentially impact the accuracy of the CELLiD.")
-    
+        logging.warning(
+            "The input data and reference dataset have a limited number of overlapping genes, which may potentially impact the accuracy of the CELLiD."
+        )
+
     # subsetting the dataframe to the common genes across data and the reference
     rna = rna.loc[list(genes)]
     ref_data = ref_data.loc[list(genes)]
@@ -137,7 +199,7 @@ def CELLiD_cluster(rna, ref_data : pd.DataFrame = None, ref_deg : pd.DataFrame =
     # nested apply function to compute correlation for each cell type between the user and reference database
     def each_ref_correlation(input, ref_data):
         input = list(input)
-        res = ref_data.parallel_apply(basic_correlation, input = input, axis = 0)
+        res = ref_data.parallel_apply(basic_correlation, input=input, axis=0)
         return res
 
     # apply the correlation to all the columns in user data
@@ -148,9 +210,14 @@ def basic_correlation(ref, input):
         predicted = pd.Series(list(ref)).corr(pd.Series(list(input)), method="spearman")
 
         return predicted
-  
+
     # get the predicted correlation as in pandas DataFrame
-    predicted_cell = rna.apply(each_ref_correlation, result_type = "expand", axis = 0, ref_data = ref_data)
+    # predicted_cell = pd.DataFrame(
+    #     generate_correlation_map(ref_data.to_numpy().T, rna.to_numpy().T)
+    # )
+    predicted_cell = rna.apply(
+        each_ref_correlation, result_type="expand", axis=0, ref_data=ref_data
+    )
     predicted_cell = pd.DataFrame(predicted_cell)
 
     # set the index to be the celltype based on the reference_data
@@ -161,60 +228,84 @@ def basic_correlation(ref, input):
     # define a function to apply to each column of the dataframe
     def get_top_5_indices(col):
         # get the indices of the top 5 values in the column
-        indices = np.where(col.rank(method='min', ascending=False) <= 5)[0]
+        indices = np.where(col.rank(method="min", ascending=False) <= 5)[0]
         # convert to numeric and return
-        return pd.Series(indices).astype('int')
-    
-    ct = predicted_cell.apply(get_top_5_indices)
-
-    # ct = pd.DataFrame(predicted_cell.parallel_apply(lambda x: [index for index, each in enumerate(x.rank()) if each <= 5], axis = 0, result_type = "expand"))
+        return pd.Series(indices).astype("int")
 
+    ct = predicted_cell.apply(get_top_5_indices)
     # compute the correleration based on the genes set that is intersection between the user data and reference dataset
+
     def second_correlation(i, ref_data, rna, ct):
-        ref = ref_data.iloc[:,ct[i]].copy()
-        g = set(ref_deg.iloc[check_in_list(ref_deg["group"], ref.columns)].copy()["gene"])
+        ref = ref_data.iloc[:, list(ct[str(i)])].copy()
+        g = set(
+            ref_deg.iloc[check_in_list(ref_deg["group"], ref.columns)].copy()["gene"]
+        )
         g = set.intersection(set(ref.index), g)
         ref = ref.loc[list(g)].copy()
-        input = rna.loc[list(g), i].copy()
+        # print(rna.loc[list(g), str(i)])
+        input = rna.loc[list(g), str(i)].copy()
         # predict = ref.apply(lambda i: np.round(stats.spearmanr(pd.to_numeric(i), pd.to_numeric(input), nan_policy='omit')[0], 3))
-        predict = ref.apply(lambda i: np.round(pd.Series(i).corr(pd.Series(input), method='spearman'), 3)) # trying with pandas correlation
+        predict = ref.apply(
+            lambda i: np.round(
+                pd.Series(i).corr(pd.Series(input), method="spearman"), 3
+            )
+        )  # trying with pandas correlation
         predict = pd.DataFrame(predict)
         predict.columns = ["cor"]
-        predict = predict.sort_values(["cor"], ascending = False)
-        res = [[each.split("--")[0], each.split("--")[1], predict["cor"][index], i] for index, each in enumerate(predict.index) if index < n_predict]
-        res = pd.DataFrame(res, columns =['cell_type', 'atlas', "score", "input_index"])
+        predict = predict.sort_values(["cor"], ascending=False)
+        res = [
+            [each.split("--")[0], each.split("--")[1], predict["cor"][index], i]
+            for index, each in enumerate(predict.index)
+            if index < n_predict
+        ]
+        res = pd.DataFrame(res, columns=["cell_type", "atlas", "score", "input_index"])
         return res  # return list in the format cell_type, atlas, score, input_index
 
     # get a data in the format of cell_type, atlas, score, input_index
-    predicted_cell = Parallel(n_jobs=ncores, batch_size=32, verbose = 2)(delayed(second_correlation)(int(y), ref_data, rna, ct) for y in range(rna.shape[1]))
+    predicted_cell = Parallel(n_jobs=ncores, batch_size=32, verbose=2)(
+        delayed(second_correlation)(int(y), ref_data, rna, ct)
+        for y in range(rna.shape[1])
+    )
 
     # first concatenate in row wise manner
     predicted_cell = pd.concat(predicted_cell)
 
-    pivot = pd.pivot_table(predicted_cell, index='input_index', columns=predicted_cell.groupby('input_index').cumcount()+1, 
-                       values=['cell_type', 'atlas', 'score'], aggfunc='first')
+    pivot = pd.pivot_table(
+        predicted_cell,
+        index="input_index",
+        columns=predicted_cell.groupby("input_index").cumcount() + 1,
+        values=["cell_type", "atlas", "score"],
+        aggfunc="first",
+    )
 
-    pivot = pivot[["cell_type", "atlas", "score"]] 
+    pivot = pivot[["cell_type", "atlas", "score"]]
 
     # flatten multi-level column index
-    pivot.columns = ['_'.join(map(str, col)).strip() for col in pivot.columns.values]
+    pivot.columns = ["_".join(map(str, col)).strip() for col in pivot.columns.values]
 
     # rename columns
     column_names = {}
     for i in range(n_predict):
-        column_names[f'cell_type_{i+1}'] = f'predicted_cell_type_{i+1}'
-        column_names[f'atlas_{i+1}'] = f'source_atlas_{i+1}'
-        column_names[f'score_{i+1}'] = f'score_{i+1}'
-        
+        column_names[f"cell_type_{i+1}"] = f"predicted_cell_type_{i+1}"
+        column_names[f"atlas_{i+1}"] = f"source_atlas_{i+1}"
+        column_names[f"score_{i+1}"] = f"score_{i+1}"
+
     pivot = pivot.rename(columns=column_names)
 
-    return pivot# return pandas dataframe in the format the same original R package
+    return pivot  # return pandas dataframe in the format the same original R package
+
 
 """
 Geneset enrichment analysis using DISCO data
 """
 
-def CELLiD_enrichment(input : pd.DataFrame, reference : pd.DataFrame = None, ref_path : str = None, ncores : int = 1):
+
+def CELLiD_enrichment(
+    input: pd.DataFrame,
+    reference: pd.DataFrame = None,
+    ref_path: str = None,
+    ncores: int = 1,
+):
     """Function to generate enrichment analysis based on the reference gene sets and following the DISCO pipeline.
 
     Args:
@@ -229,7 +320,7 @@ def CELLiD_enrichment(input : pd.DataFrame, reference : pd.DataFrame = None, ref
 
     # check input data
     # check if the input type is a DataFrame
-    if not(isinstance(input, pd.DataFrame)):
+    if not (isinstance(input, pd.DataFrame)):
         logging.error("The input must be a dataframe")
 
     # check if the user provide more than the needed column which are gene and fc
@@ -240,7 +331,7 @@ def CELLiD_enrichment(input : pd.DataFrame, reference : pd.DataFrame = None, ref
     if input.shape[1] == 2:
         input_shape = 2
         input.columns = ["gene", "fc"]
-    
+
     # else we can just take a gene list for the enrichment
     else:
         input_shape = 1
@@ -248,7 +339,6 @@ def CELLiD_enrichment(input : pd.DataFrame, reference : pd.DataFrame = None, ref
 
     # if the user does not provide reference geneset list
     if reference is None:
-
         # check for the data path if it is provided by the user
         if ref_path is None:
             ref_path = "DISCOtmp"
@@ -256,31 +346,45 @@ def CELLiD_enrichment(input : pd.DataFrame, reference : pd.DataFrame = None, ref
         # if the path does not exist, we create a default directory
         if not os.path.exists(ref_path):
             os.mkdir(ref_path)
-        
+
         # check if the file does not exist
         if not (os.path.exists(ref_path + "/ref_geneset.pkl")):
             # downloading the geneset data from disco database
-            response = requests.get(url=prefix_disco_url +"/getGeneSetPkl", timeout=timeout)
+            response = requests.get(
+                url=prefix_disco_url + "/getGeneSetPkl", timeout=timeout
+            )
             open(ref_path + "/ref_geneset.pkl", "wb").write(response.content)
-            reference = pd.read_pickle(ref_path + "/ref_geneset.pkl", compression = {'method':'gzip','compresslevel':6})
+            reference = pd.read_pickle(
+                ref_path + "/ref_geneset.pkl",
+                compression={"method": "gzip", "compresslevel": 6},
+            )
 
         else:
             # read the data into pandas dataframe for subsequent analysis
-            reference = pd.read_pickle(ref_path + "/ref_geneset.pkl", compression = {'method':'gzip','compresslevel':6})
+            reference = pd.read_pickle(
+                ref_path + "/ref_geneset.pkl",
+                compression={"method": "gzip", "compresslevel": 6},
+            )
 
     # rename the name data to include the reference atlas
     reference["name"] = reference["name"] + " in " + reference["atlas"]
-    input["gene"] = input["gene"].str.upper() # convert all the gene to upper string as the reference gene name is in uppercase
+    input["gene"] = input[
+        "gene"
+    ].str.upper()  # convert all the gene to upper string as the reference gene name is in uppercase
     # condition when the user provide the fold change for the enrichment analysis
     if input_shape == 2:
-        input["fc"] = 2 ** input["fc"] # 2 to the power of fc
-        input = input.set_index("gene", drop = False) # set index to the gene
-        input = input.loc[list(set(reference["gene"]) & set(input['gene']))] # only get the common genes for comparison
+        input["fc"] = 2 ** input["fc"]  # 2 to the power of fc
+        input = input.set_index("gene", drop=False)  # set index to the gene
+        input = input.loc[
+            list(set(reference["gene"]) & set(input["gene"]))
+        ]  # only get the common genes for comparison
 
     # else we only look at the ranked gene list
     else:
-        input = input.set_index("gene", drop = False)
-        input = input.loc[list(set(reference["gene"]) & set(input['gene']))] # only get the common genes for comparison
+        input = input.set_index("gene", drop=False)
+        input = input.loc[
+            list(set(reference["gene"]) & set(input["gene"]))
+        ]  # only get the common genes for comparison
     # filter to get only the geneset that contain the input genes from the user
     # this might good different result from the website so lets remove it for
     # unique_names = reference[reference["gene"].isin(input["gene"])]["name"].unique()
@@ -291,8 +395,8 @@ def CELLiD_enrichment(input : pd.DataFrame, reference : pd.DataFrame = None, ref
     # optimise the CELLiD_enrichment function
     atlas_dict = {}
     name_dict = {}
-    reference_atlas = reference['atlas']
-    reference_name = reference['name']
+    reference_atlas = reference["atlas"]
+    reference_name = reference["name"]
 
     # pre compute the parameters of the fisher exact test for the reference data
     for rownum in range(len(reference)):
@@ -305,57 +409,105 @@ def CELLiD_enrichment(input : pd.DataFrame, reference : pd.DataFrame = None, ref
             name_dict[reference_name[rownum]] = [rownum]
         else:
             name_dict[reference_name[rownum]].append(rownum)
-    
+
     # apply function to the dataframe
-    def process_unique_name(reference_filter, input, atlas_dfs, input_shape, atlas_dict):
+    def process_unique_name(
+        reference_filter, input, atlas_dfs, input_shape, atlas_dict
+    ):
         # use the compile`d pattern to search for matches
-        unique_name = reference_filter['name'].head(1).values[0]
-        atlas = reference_filter['atlas'].head(1).values
+        unique_name = reference_filter["name"].head(1).values[0]
+        atlas = reference_filter["atlas"].head(1).values
         # print(unique_name)
         reference_full = atlas_dfs[list(atlas_dict.keys()).index(atlas)]
         # reference_filter = reference.iloc[name_dict[unique_name]].copy() # subset the reference data
         # reference_full = reference.iloc[atlas_dict[atlas]].copy() # full reference to the atlas
-        reference_filter = reference_filter.set_index(["gene"], drop = False) # set gene as the index
+        reference_filter = reference_filter.set_index(
+            ["gene"], drop=False
+        )  # set gene as the index
         # reference_filter["gene"] = reference_filter.index
-        input_filter = input.loc[list(set(reference_full["gene"]) & set(input["gene"]))] # subset the input genes
+        input_filter = input.loc[
+            list(set(reference_full["gene"]) & set(input["gene"]))
+        ]  # subset the input genes
 
         # condition for no passed gene set
         if input_filter.empty:
             return None
-        
+
         # different computation to either include the fold change in the fisher exact test
         if input_shape == 2:
-            a = (reference_filter.loc[np.intersect1d(reference_filter["gene"], (input_filter["gene"]))].iloc[:, 0] * input_filter.loc[np.intersect1d(input_filter["gene"], reference_filter["gene"]), "fc"]).sum() + 1
-            b = input_filter.loc[np.setdiff1d(input_filter["gene"], reference_filter["gene"]), "fc"].sum() + 1
-            c = reference_filter.loc[np.setdiff1d(reference_filter["gene"], input_filter["gene"])].iloc[:, 0].sum() + 1
-            d = len(set(reference_full["gene"])) - len(set(reference_filter["gene"]).union(set(input_filter["gene"])))
+            a = (
+                reference_filter.loc[
+                    np.intersect1d(reference_filter["gene"], (input_filter["gene"]))
+                ].iloc[:, 0]
+                * input_filter.loc[
+                    np.intersect1d(input_filter["gene"], reference_filter["gene"]), "fc"
+                ]
+            ).sum() + 1
+            b = (
+                input_filter.loc[
+                    np.setdiff1d(input_filter["gene"], reference_filter["gene"]), "fc"
+                ].sum()
+                + 1
+            )
+            c = (
+                reference_filter.loc[
+                    np.setdiff1d(reference_filter["gene"], input_filter["gene"])
+                ]
+                .iloc[:, 0]
+                .sum()
+                + 1
+            )
+            d = len(set(reference_full["gene"])) - len(
+                set(reference_filter["gene"]).union(set(input_filter["gene"]))
+            )
         else:
             a = len(np.intersect1d(reference_filter["gene"], input_filter["gene"])) + 1
             b = len(np.setdiff1d(input_filter["gene"], reference_filter["gene"])) + 1
             c = len(np.setdiff1d(reference_filter["gene"], input_filter["gene"])) + 1
             d = len(set(reference_full["gene"])) - a - b - c
-        
+
         # get both value from the fisher exact test
         # pval = pvalue(a, b, c, d).two_tail
         _, pval = stats.fisher_exact(np.array([[a, b], [c, d]]))
-        odds = (a * d)/(b * c)
+        odds = (a * d) / (b * c)
         # only get the significant p_value
         # we can make changes to the p_value as in like we allow the user to specify it so that they have more control on the result they can obtain
         if pval < 0.01:
-            return pd.Series({"pval": pval, "or": odds, "name": unique_name,
-                            "gene": ",".join(reference_filter.loc[reference_filter["gene"].isin(input_filter["gene"]), "gene"]),
-                            "background": len(set(reference_full["gene"])), "overlap": len(reference_filter.loc[reference_filter["gene"].isin(input_filter["gene"])]), "geneset": len(reference_filter)})
+            return pd.Series(
+                {
+                    "pval": pval,
+                    "or": odds,
+                    "name": unique_name,
+                    "gene": ",".join(
+                        reference_filter.loc[
+                            reference_filter["gene"].isin(input_filter["gene"]), "gene"
+                        ]
+                    ),
+                    "background": len(set(reference_full["gene"])),
+                    "overlap": len(
+                        reference_filter.loc[
+                            reference_filter["gene"].isin(input_filter["gene"])
+                        ]
+                    ),
+                    "geneset": len(reference_filter),
+                }
+            )
         else:
             return None
 
     # Split them into cores
     unique_name_dfs = [reference.iloc[name_dict[key], :] for key in name_dict.keys()]
     atlas_dfs = [reference.iloc[atlas_dict[key], :] for key in atlas_dict.keys()]
-    
+
     # return unique_name_dfs, atlas_dfs, input, input_shape
     # return unique_name_dfs, atlas_dfs
     # using joblib to apply multiprocessing
-    results = Parallel(n_jobs=ncores, verbose = 2, batch_size = 64)(delayed(process_unique_name)(unique_name_df, input, atlas_dfs, input_shape, atlas_dict) for unique_name_df in unique_name_dfs)
+    results = Parallel(n_jobs=ncores, verbose=2, batch_size=64)(
+        delayed(process_unique_name)(
+            unique_name_df, input, atlas_dfs, input_shape, atlas_dict
+        )
+        for unique_name_df in unique_name_dfs
+    )
     results = [res for res in results if res is not None]
 
     # concatenate the result into the dataframe to the user
@@ -363,8 +515,9 @@ def process_unique_name(reference_filter, input, atlas_dfs, input_shape, atlas_d
         res_df = pd.concat(results, axis=1).transpose()
         res_df["pval"] = res_df["pval"].astype(float).round(3)
         res_df["or"] = res_df["or"].astype(float).round(3)
-        res_df = res_df.sort_values(by=["pval", "or"], ascending=[True, False]).head(min(50, len(results)))
+        res_df = res_df.sort_values(by=["pval", "or"], ascending=[True, False]).head(
+            min(50, len(results))
+        )
         return res_df
     else:
         return None
-    
\ No newline at end of file
diff --git a/build/lib/discotoolkit/DiscoClass.py b/build/lib/discotoolkit/DiscoClass.py
index a055cb2..ab3d4fe 100644
--- a/build/lib/discotoolkit/DiscoClass.py
+++ b/build/lib/discotoolkit/DiscoClass.py
@@ -6,10 +6,10 @@ class Filter:
     """
     Filter class object to save the attributes for filtering the dataset from DISCO
 
-    sample                      String    e.g. GSM3891625_3;
-    project                     String;
-    tissue                      String    e.g. Lung, Bladder;
-    disease                     String    e.g. PDAC;
+    sample_id                      String    e.g. ERX2757110;
+    project_id                     String;
+    tissue                      String    e.g. lung, bladder;
+    disease                     String    e.g. COVID-19;
     platform                    String    e.g. 10x3';
     sample_type                 String;
     cell_type                   String;
@@ -20,12 +20,12 @@ class Filter:
     return Class object
     """
 
-    def __init__(self, sample = None, project = None, tissue = None, disease = None, platform = None, sample_type = None,
+    def __init__(self, sample_id = None, project_id = None, tissue = None, disease = None, platform = None, sample_type = None,
                  cell_type = None, cell_type_confidence : str = "medium", include_cell_type_children : bool = True, min_cell_per_sample : int = 100):
         
         # handling for string and list input
-        self.sample = self.convert_to_list(sample) # sample id
-        self.project = self.convert_to_list(project) # project, lab, or dataset from different author
+        self.sample_id = self.convert_to_list(sample_id) # sample id
+        self.project_id = self.convert_to_list(project_id) # project, lab, or dataset from different author
         self.tissue = self.convert_to_list(tissue) # organ tissue 
         self.disease = self.convert_to_list(disease) # cancer or non cancer, or COVID-1e9 disease
         self.platform = self.convert_to_list(platform) # sequencing platform
diff --git a/build/lib/discotoolkit/DownloadDiscoData.py b/build/lib/discotoolkit/DownloadDiscoData.py
index 273986a..484c7d4 100644
--- a/build/lib/discotoolkit/DownloadDiscoData.py
+++ b/build/lib/discotoolkit/DownloadDiscoData.py
@@ -1,11 +1,11 @@
-'''
-Descripttion: 
-version: 
+"""
+Descripttion:
+version:
 Author: Mengwei Li
 Date: 2023-04-14 16:33:44
 LastEditors: Mengwei Li
-LastEditTime: 2023-04-16 12:14:48
-'''
+LastEditTime: 2024-10-23 21:30:03
+"""
 
 # import libraries
 import requests
@@ -14,14 +14,15 @@
 import re
 import os
 import hashlib
+import scanpy as sc
 
 # import variable and class from other script
 from .GlobalVariable import logging, prefix_disco_url
 from .DiscoClass import FilterData, Filter
 from .GetMetadata import check_in_list
 
-def download_disco_data(metadata, output_dir : str = "DISCOtmp"):
 
+def download_disco_data(metadata, output_dir: str = "DISCOtmp"):
     """function to download the data based on the given filter
     Args:
         metadata (FilterData) : FilterData class to filters data from DISCO database
@@ -29,116 +30,209 @@ def download_disco_data(metadata, output_dir : str = "DISCOtmp"):
 
     Returns:
         None: This function does not return any object and instead download the data for the user
-    """    
+    """
 
     # define a list to store the error sample that can not be download
     error_sample = []
 
     # create directory known as DISCOtmp or based on user defined directory if it does not exist
-    if not(os.path.exists(output_dir)):
+    if not (os.path.exists(output_dir)):
         os.mkdir(output_dir)
 
+    samples = list(metadata.sample_metadata["sample_id"])
     # download data when the user does not want to subset to specific cell type
-    if metadata.filter.cell_type is None:
-
-        # getting the dataframe of the samples
-        samples = metadata.sample_metadata
-
-        # iterate through all the samples
-        for i in range(len(samples)):
-            s = list(samples["sampleId"])[i]
-            p = list(samples["projectId"])[i]
-            output_file = "%s/%s.h5ad" % (output_dir, s) # getting the name of the file
-
-            # condition if the file has been downloaded before
-            if (os.path.exists(output_file)) and \
-            hashlib.md5(open(output_file, "rb").read()).hexdigest() == list(samples["md5h5ad"])[i]:
-                logging.info(" %s has been downloaded before. Ignore ..." % (s)) # giving message to the user
-            else:
-                logging.info("Downloading data of %s" % (s)) # giving message to the user
-
-                try:
-                    # try to download the data as requested
-                    response = requests.get(url=prefix_disco_url + "getH5adBySample?project="+ p +"&sample=" + s)
-
-                    # condition for error in downloading the samples
-                    if response.status_code == 404:
-                        error_sample.append(s) # append the error samples into a list for debugging later on
-                        logging.warning("sample %s download fail" % (s)) # giving message to the user
-                    else:
-                        # write the file to directory
-                        open(output_file, "wb").write(response.content)
-
-                        # condition for another error
-                        if hashlib.md5(open(output_file, "rb").read()).hexdigest() != list(samples["md5h5ad"])[i]:
-                            error_sample.append(s)
-                            os.remove(output_file)
-                            logging.warning("sample %s download fail" % (s)) # give message to the user
-                except:
-                    # except error when the download fail
-                    error_sample.append(s)
-                    logging.warning("sample %s download fail" % (s))
-        
-        # if there are error samples, we set all the attribute for FilterData object to the error samples and return the object in FilterData object
-        if len(error_sample) > 0:
-            metadata.sample_metadata = metadata.sample_metadata.set_index("sampleId") # change from inplace to this to increase efficiency
-            metadata.sample_metadata = metadata.sample_metadata.loc[error_sample]
-            metadata.cell_type_metadata = metadata.cell_type_metadata.iloc[check_in_list(metadata.cell_type_metadata["sampleId"], error_sample)]
-            metadata.cell_count = metadata.cell_type_metadata["cellNumber"].sum()
-            metadata.sample_count = len(error_sample)
-            return metadata
-        
-    else:
-        # get the dataframe for the metadata
-        metadata.sample_metadata = metadata.sample_metadata.set_index("sampleId")
-        samples = metadata.cell_type_metadata
-        concat_func = lambda x,y: x + "_" + str(y)
-
-        # create new element by concatenating the sampleId and cluster
-        samples["sampleId"] = list(map(concat_func, list(samples["sampleId"]),
-                                       list(samples["cluster"])))
-        
-        # similarly, iterative through all the sample for downloading
-        for i in range(len(samples)):
-            s = list(samples["sampleId"])[i]
-            sub_s = s.replace("_" + str(list(samples["cluster"])[i]) , "") # adding handler for celltype specific to get the data
-            p = metadata.sample_metadata.loc[sub_s]["projectId"]
-            output_file = "%s/%s.h5ad" % (output_dir, s)
+
+    samples = metadata.sample_metadata
+
+    for i in range(len(samples)):
+        s = list(samples["sample_id"])[i]
+        p = list(samples["project_id"])[i]
+
+        output_file = "%s/%s.h5ad" % (output_dir, s)
+        if os.path.exists(output_file):
+            os.remove(output_file)
+
+        url = prefix_disco_url + "download/getRawH5/" + p + "/" + s
+        rna = sc.read_10x_h5(filename=output_file, backup_url=url)
+
+        cell = pd.read_csv(
+            prefix_disco_url + "toolkit/" + "getCellTypeSample?sampleId=" + s,
+            sep="\t",
+            header=0,
+        )
+        # cell.set_index("cell_id")
+        cell = cell.iloc[:, [0, 2, 5]]
+        if metadata.filter.cell_type is not None:
+            if metadata.filter.cell_type_confidence == "high":
+                cell = cell.loc[cell["cell_type_score"] >= 0.8]
+
+            elif metadata.filter.cell_type_confidence == "medium":
+                cell = cell.loc[cell["cell_type_score"] >= 0.6]
+            cell = cell.loc[cell.cell_type.isin(metadata.filter.cell_type)]
             
-            # checking for file and ignore if it is already exist
-            if (os.path.exists(output_file)) and \
-            hashlib.md5(open(output_file, "rb").read()).hexdigest() == list(samples["h5adMd5"]):
-                logging.info(" %s has been downloaded before. Ignore ..." % (s)) # give message to the user
-            else:
-                logging.info("Downloading data of %s" % (s)) # give message to the user
-                try:
-                    # get the data from the server
-                    response = requests.get(url=prefix_disco_url + "getH5adBySampleCt?project="+ p +"&sample=" + s)
-                    if response.status_code == 404:
-                        error_sample.append(s) # append error samples
-                        logging.warning("sample %s download fail 1" % (s)) # give message to the user
-                    else:
-                        open(output_file, "wb").write(response.content)
-                        
-                        # condition for another error
-                        if hashlib.md5(open(output_file, "rb").read()).hexdigest() != list(samples["h5adMd5"])[i]:
-                            error_sample.append(s)
-                            os.remove(output_file)
-                            logging.warning("sample %s download fail" % (s)) # give message to the user
-                except:
-                    error_sample.append(s)
-                    logging.warning("sample %s download fail" % (s)) # give message to the user
-        
-        # similarly, record the error samples and return to the variable
-        if len(error_sample) > 0:
-            metadata.cell_type_metadata = metadata.cell_type_metadata.iloc[check_in_list(metadata.cell_type_metadata["sampleId"], error_sample)]
-            metadata.sample_metadata = metadata.sample_metadata.iloc[check_in_list(list(metadata.sample_metadata["sampleId"]), metadata.cell_type_metadata["sampleId"])]
-            metadata.cell_count = metadata.cell_type_metadata["cellNumber"].sum()
-            metadata.sample_count = len(error_sample)
-            sample_cell_count = pd.DataFrame(metadata.cell_type_metadata.groupby(["sampleId"])["cellNumber"].agg("sum"))
-            sample_cell_count.columns = ["x"]
-            metadata.sample_metadata["cell_number"] = list(sample_cell_count.loc[list(metadata.sample_metadata["sampleId"])]["x"])
-            return metadata
-        
-    # else return None as the data has been downloaded
-    return None
\ No newline at end of file
+        rna = rna[cell.cell_id].copy()
+        rna.obs["cell_type"] = list(cell.cell_type)
+        rna.obs["sample_id"] = s
+        rna.obs["project_id"] = p
+        rna.write(output_file)
+
+    # if metadata.filter.cell_type is None:
+    #     # getting the dataframe of the samples
+    #     samples = metadata.sample_metadata
+
+    #     # iterate through all the samples
+    #     for i in range(len(samples)):
+    #         s = list(samples["sampleId"])[i]
+    #         p = list(samples["projectId"])[i]
+    #         output_file = "%s/%s.h5ad" % (output_dir, s)  # getting the name of the file
+
+    #         # condition if the file has been downloaded before
+    #         if (os.path.exists(output_file)) and hashlib.md5(
+    #             open(output_file, "rb").read()
+    #         ).hexdigest() == list(samples["md5h5ad"])[i]:
+    #             logging.info(
+    #                 " %s has been downloaded before. Ignore ..." % (s)
+    #             )  # giving message to the user
+    #         else:
+    #             logging.info(
+    #                 "Downloading data of %s" % (s)
+    #             )  # giving message to the user
+
+    #             try:
+    #                 # try to download the data as requested
+    #                 response = requests.get(
+    #                     url=prefix_disco_url
+    #                     + "getH5adBySample?project="
+    #                     + p
+    #                     + "&sample="
+    #                     + s
+    #                 )
+
+    #                 # condition for error in downloading the samples
+    #                 if response.status_code == 404:
+    #                     error_sample.append(
+    #                         s
+    #                     )  # append the error samples into a list for debugging later on
+    #                     logging.warning(
+    #                         "sample %s download fail" % (s)
+    #                     )  # giving message to the user
+    #                 else:
+    #                     # write the file to directory
+    #                     open(output_file, "wb").write(response.content)
+
+    #                     # condition for another error
+    #                     if (
+    #                         hashlib.md5(open(output_file, "rb").read()).hexdigest()
+    #                         != list(samples["md5h5ad"])[i]
+    #                     ):
+    #                         error_sample.append(s)
+    #                         os.remove(output_file)
+    #                         logging.warning(
+    #                             "sample %s download fail" % (s)
+    #                         )  # give message to the user
+    #             except:
+    #                 # except error when the download fail
+    #                 error_sample.append(s)
+    #                 logging.warning("sample %s download fail" % (s))
+
+    #     # if there are error samples, we set all the attribute for FilterData object to the error samples and return the object in FilterData object
+    #     if len(error_sample) > 0:
+    #         metadata.sample_metadata = metadata.sample_metadata.set_index(
+    #             "sampleId"
+    #         )  # change from inplace to this to increase efficiency
+    #         metadata.sample_metadata = metadata.sample_metadata.loc[error_sample]
+    #         metadata.cell_type_metadata = metadata.cell_type_metadata.iloc[
+    #             check_in_list(metadata.cell_type_metadata["sampleId"], error_sample)
+    #         ]
+    #         metadata.cell_count = metadata.cell_type_metadata["cellNumber"].sum()
+    #         metadata.sample_count = len(error_sample)
+    #         return metadata
+
+    # else:
+    #     # get the dataframe for the metadata
+    #     metadata.sample_metadata = metadata.sample_metadata.set_index("sampleId")
+    #     samples = metadata.cell_type_metadata
+    #     concat_func = lambda x, y: x + "_" + str(y)
+
+    #     # create new element by concatenating the sampleId and cluster
+    #     samples["sampleId"] = list(
+    #         map(concat_func, list(samples["sampleId"]), list(samples["cluster"]))
+    #     )
+
+    #     # similarly, iterative through all the sample for downloading
+    #     for i in range(len(samples)):
+    #         s = list(samples["sampleId"])[i]
+    #         sub_s = s.replace(
+    #             "_" + str(list(samples["cluster"])[i]), ""
+    #         )  # adding handler for celltype specific to get the data
+    #         p = metadata.sample_metadata.loc[sub_s]["projectId"]
+    #         output_file = "%s/%s.h5ad" % (output_dir, s)
+
+    #         # checking for file and ignore if it is already exist
+    #         if (os.path.exists(output_file)) and hashlib.md5(
+    #             open(output_file, "rb").read()
+    #         ).hexdigest() == list(samples["h5adMd5"]):
+    #             logging.info(
+    #                 " %s has been downloaded before. Ignore ..." % (s)
+    #             )  # give message to the user
+    #         else:
+    #             logging.info("Downloading data of %s" % (s))  # give message to the user
+    #             try:
+    #                 # get the data from the server
+    #                 response = requests.get(
+    #                     url=prefix_disco_url
+    #                     + "getH5adBySampleCt?project="
+    #                     + p
+    #                     + "&sample="
+    #                     + s
+    #                 )
+    #                 if response.status_code == 404:
+    #                     error_sample.append(s)  # append error samples
+    #                     logging.warning(
+    #                         "sample %s download fail 1" % (s)
+    #                     )  # give message to the user
+    #                 else:
+    #                     open(output_file, "wb").write(response.content)
+
+    #                     # condition for another error
+    #                     if (
+    #                         hashlib.md5(open(output_file, "rb").read()).hexdigest()
+    #                         != list(samples["h5adMd5"])[i]
+    #                     ):
+    #                         error_sample.append(s)
+    #                         os.remove(output_file)
+    #                         logging.warning(
+    #                             "sample %s download fail" % (s)
+    #                         )  # give message to the user
+    #             except:
+    #                 error_sample.append(s)
+    #                 logging.warning(
+    #                     "sample %s download fail" % (s)
+    #                 )  # give message to the user
+
+    #     # similarly, record the error samples and return to the variable
+    #     if len(error_sample) > 0:
+    #         metadata.cell_type_metadata = metadata.cell_type_metadata.iloc[
+    #             check_in_list(metadata.cell_type_metadata["sampleId"], error_sample)
+    #         ]
+    #         metadata.sample_metadata = metadata.sample_metadata.iloc[
+    #             check_in_list(
+    #                 list(metadata.sample_metadata["sampleId"]),
+    #                 metadata.cell_type_metadata["sampleId"],
+    #             )
+    #         ]
+    #         metadata.cell_count = metadata.cell_type_metadata["cellNumber"].sum()
+    #         metadata.sample_count = len(error_sample)
+    #         sample_cell_count = pd.DataFrame(
+    #             metadata.cell_type_metadata.groupby(["sampleId"])["cellNumber"].agg(
+    #                 "sum"
+    #             )
+    #         )
+    #         sample_cell_count.columns = ["x"]
+    #         metadata.sample_metadata["cell_number"] = list(
+    #             sample_cell_count.loc[list(metadata.sample_metadata["sampleId"])]["x"]
+    #         )
+    #         return metadata
+
+    # # else return None as the data has been downloaded
+    # return None
diff --git a/build/lib/discotoolkit/GetMetadata.py b/build/lib/discotoolkit/GetMetadata.py
index d641320..82f681a 100644
--- a/build/lib/discotoolkit/GetMetadata.py
+++ b/build/lib/discotoolkit/GetMetadata.py
@@ -14,7 +14,8 @@
 from .GlobalVariable import logging, prefix_disco_url
 from .DiscoClass import FilterData, Filter
 
-def get_json(url : str, info_msg: str, error_msg: str, prefix : bool = True):
+
+def get_json(url: str, info_msg: str, error_msg: str, prefix: bool = True):
     """Funtion to get json data from the server
 
     Args:
@@ -26,36 +27,37 @@ def get_json(url : str, info_msg: str, error_msg: str, prefix : bool = True):
     Returns:
         JSON: return JSON data which will be converted into pandas dataframe later on
     """
-    
+
     # giving log message to the user
     logging.info(info_msg)
 
     # condition for adding prefix
-    if prefix == False:
+    if not prefix:
         response = requests.get(url)
     else:
-        response = requests.get(prefix_disco_url + "/" + url)
-    
+        response = requests.get(prefix_disco_url + url)
+
     # if the loading of JSON is successful, we get the text
     if response.status_code == 200:
         data = json.loads(response.text)
-        return data # return json data Dictionary datatype in python
+        return data  # return json data Dictionary datatype in python
     else:
-
         # print informative message to the user
         logging.error(error_msg)
 
+
 def get_sample_ct_info():
-    """ Get cell type information of sample
+    """Get cell type information of sample
     Returns:
         Pandas DataFrame: return pandas dataframe to the user
-    """    
-    temp = get_json(url = "/getSampleCtInfo", info_msg = "Retrieving cell type information of each sample from DISCO database",
-                    error_msg = "Failed to retrieve cell type information. Please try again. If the issue persists, please contact us at li_mengwei@immunol.a-star.edu.sg for assistance.")
-    return pd.DataFrame(temp) # return Dataframe of the JSON data
+    """
+    temp = pd.read_csv(
+        prefix_disco_url + "toolkit/" + "getCellTypeSummary", sep="\t", header=0
+    )
+    return temp  # return Dataframe of the JSON data
 
-def find_celltype(term : str = "", cell_ontology : dict = None):
 
+def find_celltype(term: str = "", cell_ontology: dict = None):
     """find the celltype within the disco dataset
 
     Args:
@@ -68,9 +70,12 @@ def find_celltype(term : str = "", cell_ontology : dict = None):
 
     # when the user do not have cell ontology, we get the default one from disco database
     if cell_ontology is None:
-        cell_ontology = get_json(url = "/getCellOntology", info_msg = "Retrieving ontology from DISCO database",
-        error_msg = "Failed to retrieve ontology Please try again. If the issue persists, please contact us at li_mengwei@immunol.a-star.edu.sg for assistance.")
-    cell_type = pd.DataFrame(cell_ontology)["cell_name"] # convert to dataframe
+        cell_ontology = get_json(
+            url="toolkit/getCellOntology",
+            info_msg="Retrieving ontology from DISCO database",
+            error_msg="Failed to retrieve ontology Please try again. If the issue persists, please contact us at li_mengwei@immunol.a-star.edu.sg for assistance.",
+        )
+    cell_type = pd.DataFrame(cell_ontology)["cell_name"]  # convert to dataframe
 
     # convert term to lower term for matching. the same apply for the cell_type
     term = term.lower()
@@ -87,22 +92,30 @@ def find_celltype(term : str = "", cell_ontology : dict = None):
     # get the index and return the cell type in the correct case in the form of Python List
     return list(cell_type[idx_list])
 
-def get_disco_metadata():
 
+def get_disco_metadata():
     """get metadata of the disco into the dataframe
 
     Returns:
         Pandas Dataframe: Disco metadata in the format of dataframe which will be used for the filter class
-    """    
+    """
 
-    metadata = get_json(url = "http://www.immunesinglecell.org/api/vishuo/sample/all", info_msg = "Retrieving metadata from DISCO database",
-             error_msg = "Failed to retrieve metadata. Please try again. If the issue persists, please contact us at li_mengwei@immunol.a-star.edu.sg for assistance.", prefix=False)
-    metadata = pd.DataFrame(metadata)
-    metadata = metadata[metadata["processStatus"] == "QC pass"] # filtering to only get the sample that pass quality control
-    metadata.set_index("sampleId") # setting the index of the metadata to sample_id for consistency
+    metadata = pd.read_csv(
+        "https://immunesinglecell.org/disco_v3_api/toolkit/getSampleMetadata",
+        sep="\t",
+        header=0,
+    )
+    # metadata = pd.DataFrame(metadata)
+    # metadata = metadata[
+    #     metadata["processStatus"] == "QC pass"
+    # ]  # filtering to only get the sample that pass quality control
+    metadata.set_index(
+        "sample_id"
+    )  # setting the index of the metadata to sample_id for consistency
     return metadata
 
-def get_celltype_children(cell_type: Union[str , list], cell_ontology: dict = None):
+
+def get_celltype_children(cell_type: Union[str, list], cell_ontology: dict = None):
     """get the children of the input celltype from the user
 
     Args:
@@ -113,8 +126,13 @@ def get_celltype_children(cell_type: Union[str , list], cell_ontology: dict = No
         List of String: return the children of the defined celltype in list of String
     """
     if cell_ontology is None:
-        cell_ontology = pd.DataFrame(get_json(url="/getCellOntology", info_msg = "Retrieving ontology from DISCO database",
-                                     error_msg = "Failed to retrieve ontology Please try again. If the issue persists, please contact us at li_mengwei@immunol.a-star.edu.sg for assistance."))
+        cell_ontology = pd.DataFrame(
+            get_json(
+                url="toolkit/getCellOntology",
+                info_msg="Retrieving ontology from DISCO database",
+                error_msg="Failed to retrieve ontology Please try again. If the issue persists, please contact us at li_mengwei@immunol.a-star.edu.sg for assistance.",
+            )
+        )
 
     # define an empty list
     children = []
@@ -129,13 +147,20 @@ def get_celltype_children(cell_type: Union[str , list], cell_ontology: dict = No
 
     # while loop to get all the children celltypes iteratively
     while len(cell_type) > 0:
-        idx_list = [index for index, each in enumerate(list(cell_ontology["parent"])) if each in cell_type]
+        idx_list = [
+            index
+            for index, each in enumerate(list(cell_ontology["parent"]))
+            if each in cell_type
+        ]
         children.extend(list(cell_ontology["cell_name"][idx_list]))
         cell_type = list(cell_ontology["cell_name"][idx_list])
 
-    children = list(set(children)) # ensure that the obtained children of celltype are unique
+    children = list(
+        set(children)
+    )  # ensure that the obtained children of celltype are unique
     return children
 
+
 def list_metadata_item(field: str):
     """List element inside the metadata columns
 
@@ -153,24 +178,25 @@ def list_metadata_item(field: str):
     if field in metadata.columns:
         return list(set(metadata[field]))
     else:
-        logging.warning("DISCO data don't contain '%s' field. Please check the field name" % (field))
+        logging.warning(
+            "DISCO data don't contain '%s' field. Please check the field name" % (field)
+        )
         return None
-    
-def list_all_columns():
 
+
+def list_all_columns():
     """list all the columns found in the metadata of the disco database
 
     Returns:
         List: return the name of the metadata in the form of list of string
-    """    
-    
+    """
+
     # get the disco metadata
     metadata = get_disco_metadata()
     return list(metadata.columns)
 
 
-def filter_disco_metadata(filter : Filter = Filter()):
-
+def filter_disco_metadata(filter: Filter = Filter()):
     """filter function option for the disco data
     Args:
         Filter (Class): predefined Filter class with default attribute to filter data for the user
@@ -182,15 +208,17 @@ def filter_disco_metadata(filter : Filter = Filter()):
 
     # starting with defining variable
     filter_data = FilterData()
-    metadata = get_disco_metadata() # get metadata
-    logging.info("Filtering sample") # producing log message to the user
+    metadata = get_disco_metadata()  # get metadata
+    logging.info("Filtering sample")  # producing log message to the user
 
     # condition for each provided attribute by the user or default
-    if filter.sample is not None:
-        metadata = metadata.iloc[check_in_list(metadata["sampleId"], filter.sample)]
+    if filter.sample_id is not None:
+        metadata = metadata.iloc[check_in_list(metadata["sample_id"], filter.sample_id)]
 
-    if filter.project is not None:
-        metadata = metadata.iloc[check_in_list(metadata["projectId"], filter.project)]
+    if filter.project_id is not None:
+        metadata = metadata.iloc[
+            check_in_list(metadata["project_id"], filter.project_id)
+        ]
 
     if filter.tissue is not None:
         metadata = metadata.iloc[check_in_list(metadata["tissue"], filter.tissue)]
@@ -202,81 +230,130 @@ def filter_disco_metadata(filter : Filter = Filter()):
         metadata = metadata.iloc[check_in_list(metadata["disease"], filter.disease)]
 
     if filter.sample_type is not None:
-        metadata = metadata.iloc[check_in_list(metadata["sampleType"], filter.sample_type)]
+        metadata = metadata.iloc[
+            check_in_list(metadata["sample_type"], filter.sample_type)
+        ]
 
     # condition when there is no samples with the provided filters
     if len(metadata) == 0:
         logging.warn("Sorry, no samples passed the applied filters.")
         return None
-    
+
     # get the celltype information
     sample_ct_info = get_sample_ct_info()
 
     # remove the unnecessary fields from the metadata and only keep the below defined columns
-    retain_field = ["sampleId", "projectId", "sampleType", "anatomicalSite", "disease",
-                   "tissue", "platform", "ageGroup", "age", "gender", "cellSorting",
-                   "diseaseSubtype", "diseaseStage", "treatment", "md5h5ad"]
-    
+    # retain_field = [
+    #     "sample_id",
+    #     "projectId",
+    #     "sample_type",
+    #     "anatomicalSite",
+    #     "disease",
+    #     "tissue",
+    #     "platform",
+    #     "ageGroup",
+    #     "age",
+    #     "gender",
+    #     "cellSorting",
+    #     "diseaseSubtype",
+    #     "diseaseStage",
+    #     "treatment",
+    #     "md5h5ad",
+    # ]
+
     # subset to the retained field
-    metadata = metadata[retain_field]
+    # metadata = metadata[retain_field]
 
     # checking for the specific cell type
     if filter.cell_type is None:
-        sample_ct_info = sample_ct_info.iloc[check_in_list(sample_ct_info["sampleId"], metadata["sampleId"])]
-        sample_cell_count = pd.DataFrame(sample_ct_info.groupby(["sampleId"])["cellNumber"].agg("sum")) # get the total number of cells
-        sample_cell_count.columns = ["x"] # assigning different column name
-        filtered_index = np.intersect1d(sample_cell_count.index, list(metadata["sampleId"])) # getting the common sampleId from the ct info and metadata info
-        metadata = metadata.loc[metadata['sampleId'].isin(filtered_index)] # filtering to only the common sampleId
-        metadata["cell_number"] = list(sample_cell_count.loc[filtered_index]["x"]) # assigning to the metadata
-        metadata = metadata[metadata["cell_number"] > filter.min_cell_per_sample] # filter by the minimum cell per sample
+        sample_ct_info = sample_ct_info.iloc[
+            check_in_list(sample_ct_info["sample_id"], metadata["sample_id"])
+        ]
+        sample_cell_count = pd.DataFrame(
+            sample_ct_info.groupby(["sample_id"])["cell_number"].agg("sum")
+        )  # get the total number of cells
+        sample_cell_count.columns = ["x"]  # assigning different column name
+        filtered_index = np.intersect1d(
+            sample_cell_count.index, list(metadata["sample_id"])
+        )  # getting the common sample_id from the ct info and metadata info
+        metadata = metadata.loc[
+            metadata["sample_id"].isin(filtered_index)
+        ]  # filtering to only the common sample_id
+        metadata["cell_number"] = list(
+            sample_cell_count.loc[filtered_index]["x"]
+        )  # assigning to the metadata
+        metadata = metadata[
+            metadata["cell_number"] > filter.min_cell_per_sample
+        ]  # filter by the minimum cell per sample
 
         # condiition for none
         if len(metadata) == 0:
             logging.warn("Sorry, no samples passed the applied filters.")
             return None
 
-        # subsetting based on the sampleId
-        sample_ct_info = sample_ct_info.iloc[check_in_list(sample_ct_info["sampleId"], metadata["sampleId"])]
-        
+        # subsetting based on the sample_id
+        sample_ct_info = sample_ct_info.iloc[
+            check_in_list(sample_ct_info["sample_id"], metadata["sample_id"])
+        ]
+
         # now setting the attribute of the FilterData
         filter_data.sample_metadata = metadata
         filter_data.cell_type_metadata = sample_ct_info
         filter_data.sample_count = len(metadata)
         filter_data.cell_count = sum(metadata["cell_number"])
-        filter_data.filter = filter # now set all the changes we made to the Filter object
+        filter_data.filter = (
+            filter  # now set all the changes we made to the Filter object
+        )
 
         # giving the message to the user
-        logging.info("%s samples and %s cells were found" % (filter_data.sample_count, filter_data.cell_count))
+        logging.info(
+            "%s samples and %s cells were found"
+            % (filter_data.sample_count, filter_data.cell_count)
+        )
         return filter_data
-    
+
     # condition to get all the data for the sub celltype
     if filter.include_cell_type_childen:
         filter.cell_type = get_celltype_children(filter.cell_type)
 
     # subset to the given cell type and sample
-    sample_ct_info = sample_ct_info.iloc[check_in_list(sample_ct_info["cellType"], filter.cell_type)]
-    sample_ct_info = sample_ct_info.iloc[check_in_list(sample_ct_info["sampleId"], list(metadata["sampleId"]))]
+    sample_ct_info = sample_ct_info.iloc[
+        check_in_list(sample_ct_info["cell_type"], filter.cell_type)
+    ]
+    sample_ct_info = sample_ct_info.iloc[
+        check_in_list(sample_ct_info["sample_id"], list(metadata["sample_id"]))
+    ]
 
     # condition for different values of the cell_type_confidence
     if filter.cell_type_confidence not in ["high", "medium", "all"]:
-        logging.warning("cell_type_confidence can only be high, medium, or all") # give the user an informative message
+        logging.warning(
+            "cell_type_confidence can only be high, medium, or all"
+        )  # give the user an informative message
     if filter.cell_type_confidence == "high":
-        sample_ct_info = sample_ct_info[sample_ct_info["cellTypeScore"] >= 0.8]
+        sample_ct_info = sample_ct_info[sample_ct_info["cell_type_score"] >= 0.8]
     elif filter.cell_type_confidence == "medium":
-        sample_ct_info = sample_ct_info[sample_ct_info["cellTypeScore"] >= 0.6]
+        sample_ct_info = sample_ct_info[sample_ct_info["cell_type_score"] >= 0.6]
 
     # again condition for no sample found and return None
     if len(sample_ct_info) == 0:
         logging.warn("Sorry, no samples passed the applied filters.")
         return None
-    
+
     # follow from previous block of code
-    metadata = metadata.iloc[check_in_list(list(metadata["sampleId"]), sample_ct_info["sampleId"])]
-    sample_cell_count = pd.DataFrame(sample_ct_info.groupby(["sampleId"])["cellNumber"].agg("sum")) # total cell numbers
+    metadata = metadata.iloc[
+        check_in_list(list(metadata["sample_id"]), sample_ct_info["sample_id"])
+    ]
+    sample_cell_count = pd.DataFrame(
+        sample_ct_info.groupby(["sample_id"])["cell_number"].agg("sum")
+    )  # total cell numbers
     sample_cell_count.columns = ["x"]
-    
-    metadata["cell_number"] = list(sample_cell_count.loc[list(metadata["sampleId"])]["x"]) # count number of cell
-    metadata = metadata[metadata["cell_number"] > filter.min_cell_per_sample] # filter based on min cell per sample
+
+    metadata["cell_number"] = list(
+        sample_cell_count.loc[list(metadata["sample_id"])]["x"]
+    )  # count number of cell
+    metadata = metadata[
+        metadata["cell_number"] > filter.min_cell_per_sample
+    ]  # filter based on min cell per sample
 
     # condition for None
     if len(metadata) == 0:
@@ -284,7 +361,9 @@ def filter_disco_metadata(filter : Filter = Filter()):
         return None
 
     # subsetting before assign attribute for the FilterData object
-    sample_ct_info = sample_ct_info.iloc[check_in_list(sample_ct_info["sampleId"], metadata["sampleId"])]
+    sample_ct_info = sample_ct_info.iloc[
+        check_in_list(sample_ct_info["sample_id"], metadata["sample_id"])
+    ]
 
     # now assign all the values to the attribute of the object
     filter_data.sample_metadata = metadata
@@ -292,9 +371,13 @@ def filter_disco_metadata(filter : Filter = Filter()):
     filter_data.sample_count = len(metadata)
     filter_data.cell_count = sum(metadata["cell_number"])
     filter_data.filter = filter
-    logging.info("%s samples and %s cells were found" % (filter_data.sample_count, filter_data.cell_count))
+    logging.info(
+        "%s samples and %s cells were found"
+        % (filter_data.sample_count, filter_data.cell_count)
+    )
     return filter_data
 
+
 # utils function for getting the index of the element on the left list for subsetting if the element is exist in the right list.
 def check_in_list(var, whitelist):
-    return [index for index, each in enumerate(list(var)) if each in list(whitelist)]
\ No newline at end of file
+    return [index for index, each in enumerate(list(var)) if each in list(whitelist)]
diff --git a/build/lib/discotoolkit/GlobalVariable.py b/build/lib/discotoolkit/GlobalVariable.py
index b737a21..ec626ab 100644
--- a/build/lib/discotoolkit/GlobalVariable.py
+++ b/build/lib/discotoolkit/GlobalVariable.py
@@ -1,3 +1,11 @@
+'''
+Descripttion: 
+version: 
+Author: Mengwei Li
+Date: 2023-07-06 16:59:59
+LastEditors: Mengwei Li
+LastEditTime: 2024-10-23 15:52:29
+'''
 """
 Global variable file to import for the subsequent script.
 
@@ -17,9 +25,9 @@
 timeout = 600
 
 # Define package-level variable
-response = requests.get("http://www.immunesinglecell.org/api/vishuo/getToolkitUrl")
+# response = requests.get("http://www.immunesinglecell.org/api/vishuo/getToolkitUrl")
 
-if response.status_code == 200:
-    prefix_disco_url = json.loads(response.text)["url"]
-else:
-    prefix_disco_url = "http://www.immunesinglecell.org/toolkitapi"
\ No newline at end of file
+# if response.status_code == 200:
+#     prefix_disco_url = json.loads(response.text)["url"]
+# else:
+prefix_disco_url = "https://immunesinglecell.org/disco_v3_api/"
\ No newline at end of file
diff --git a/build/lib/discotoolkit/Utilities.py b/build/lib/discotoolkit/Utilities.py
new file mode 100644
index 0000000..1fd4b9c
--- /dev/null
+++ b/build/lib/discotoolkit/Utilities.py
@@ -0,0 +1,74 @@
+import h5py
+import numpy as np
+from scipy.sparse import csr_matrix
+from pathlib import Path
+
+
+def write_10X_h5(adata, file):
+    """Writes adata to a 10X-formatted h5 file.
+
+    Note that this function is not fully tested and may not work for all cases.
+    It will not write the following keys to the h5 file compared to 10X:
+    '_all_tag_keys', 'pattern', 'read', 'sequence'
+
+    Args:
+        adata (AnnData object): AnnData object to be written.
+        file (str): File name to be written to. If no extension is given, '.h5' is appended.
+
+    Raises:
+        FileExistsError: If file already exists.
+
+    Returns:
+        None
+    """
+
+    if ".h5" not in file:
+        file = f"{file}.h5"
+    if Path(file).exists():
+        raise FileExistsError(f"There already is a file `{file}`.")
+
+    def int_max(x):
+        return int(max(np.floor(len(str(int(max(x)))) / 4), 1) * 4)
+
+    def str_max(x):
+        return max([len(i) for i in x])
+
+    w = h5py.File(file, "w")
+    grp = w.create_group("matrix")
+    grp.create_dataset(
+        "barcodes",
+        data=np.array(adata.obs_names, dtype=f"|S{str_max(adata.obs_names)}"),
+    )
+    grp.create_dataset(
+        "data", data=np.array(adata.X.data, dtype=f"<i{int_max(adata.X.data)}")
+    )
+    ftrs = grp.create_group("features")
+    # this group will lack the following keys:
+    # '_all_tag_keys', 'feature_type', 'genome', 'id', 'name', 'pattern', 'read', 'sequence'
+    ftrs.create_dataset(
+        "feature_type",
+        data=np.array(
+            adata.var.feature_types, dtype=f"|S{str_max(adata.var.feature_types)}"
+        ),
+    )
+    ftrs.create_dataset(
+        "genome",
+        data=np.array(adata.var.genome, dtype=f"|S{str_max(adata.var.genome)}"),
+    )
+    ftrs.create_dataset(
+        "id",
+        data=np.array(adata.var.gene_ids, dtype=f"|S{str_max(adata.var.gene_ids)}"),
+    )
+    ftrs.create_dataset(
+        "name", data=np.array(adata.var.index, dtype=f"|S{str_max(adata.var.index)}")
+    )
+    grp.create_dataset(
+        "indices", data=np.array(adata.X.indices, dtype=f"<i{int_max(adata.X.indices)}")
+    )
+    grp.create_dataset(
+        "indptr", data=np.array(adata.X.indptr, dtype=f"<i{int_max(adata.X.indptr)}")
+    )
+    grp.create_dataset(
+        "shape",
+        data=np.array(list(adata.X.shape)[::-1], dtype=f"<i{int_max(adata.X.shape)}"),
+    )
diff --git a/build/lib/discotoolkit/__init__.py b/build/lib/discotoolkit/__init__.py
index d934903..529e06c 100644
--- a/build/lib/discotoolkit/__init__.py
+++ b/build/lib/discotoolkit/__init__.py
@@ -4,4 +4,4 @@
 from .CELLiD import *
 from .GeneSearch import *
 
-__version__ = "1.1.3"
\ No newline at end of file
+__version__ = "1.1.4"
\ No newline at end of file
diff --git a/discotoolkit/CELLiD.py b/discotoolkit/CELLiD.py
index 922c9de..d8b10b4 100644
--- a/discotoolkit/CELLiD.py
+++ b/discotoolkit/CELLiD.py
@@ -9,10 +9,11 @@
 import hashlib
 import requests
 from scipy import stats
+
 # from fast_fisher import fast_fisher_exact, odds_ratio
 from scipy.stats.contingency import odds_ratio
-from joblib import Parallel, delayed # multiprocessing library
-from pandarallel import pandarallel # multiprocessing library
+from joblib import Parallel, delayed  # multiprocessing library
+from pandarallel import pandarallel  # multiprocessing library
 
 pd.options.mode.chained_assignment = None
 
@@ -24,17 +25,45 @@
 """
 CELLiD cell type annotation function block.
 """
-# get the total atlas from DISCO website for the user to select which atlas to use
-def get_atlas(ref_data = None, ref_path = None):
 
+
+def generate_correlation_map(x, y):
+    """Correlate each n with each m.
+
+    Parameters
+    ----------
+    x : np.array
+      Shape N X T.
+
+    y : np.array
+      Shape M X T.
+
+    Returns
+    -------
+    np.array
+      N X M array in which each element is a correlation coefficient.
+
+    """
+    mu_x = x.mean(1)
+    mu_y = y.mean(1)
+    n = x.shape[1]
+    if n != y.shape[1]:
+        raise ValueError("x and y must " + "have the same number of timepoints.")
+    s_x = x.std(1, ddof=n - 1)
+    s_y = y.std(1, ddof=n - 1)
+    cov = np.dot(x, y.T) - n * np.dot(mu_x[:, np.newaxis], mu_y[np.newaxis, :])
+    return cov / np.dot(s_x[:, np.newaxis], s_y[np.newaxis, :])
+
+
+# get the total atlas from DISCO website for the user to select which atlas to use
+def get_atlas(ref_data=None, ref_path=None):
     """get the all the atlas string from the DISCO website and return to the user
 
     Returns:
         List: return list of string
-    """    
+    """
     # Download reference data and ref_deg if missing
     if ref_data is None:
-
         # check for the data path
         if ref_path is None:
             ref_path = "DISCOtmp"
@@ -42,19 +71,33 @@ def get_atlas(ref_data = None, ref_path = None):
         # if the path does not exist, we create a default directory
         if not os.path.exists(ref_path):
             os.mkdir(ref_path)
-        
+
         # download reference data from the server in pickle extension and read as pandas dataframe
         if ref_data is None:
-            if not(os.path.exists(ref_path + "/ref_data.pkl")):
+            if not (os.path.exists(ref_path + "/ref_data.pkl")):
                 logging.info("Downloading reference dataset...")
-                response = requests.get(url=prefix_disco_url +"/getRefPkl", timeout=timeout)
+                response = requests.get(
+                    url=prefix_disco_url + "toolkit/getRef?type=pkl", timeout=timeout
+                )
                 open(ref_path + "/ref_data.pkl", "wb").write(response.content)
-            ref_data = pd.read_pickle(ref_path + "/ref_data.pkl", compression = {'method':'gzip','compresslevel':6})
+            ref_data = pd.read_pickle(
+                ref_path + "/ref_data.pkl",
+                compression={"method": "gzip", "compresslevel": 6},
+            )
 
     all_atlas = [each.split("--")[1] for each in ref_data.columns]
     return list(set(all_atlas))
 
-def CELLiD_cluster(rna, ref_data : pd.DataFrame = None, ref_deg : pd.DataFrame = None, atlas : str = None, n_predict : int = 1, ref_path : str = None, ncores : int = 10):
+
+def CELLiD_cluster(
+    rna,
+    ref_data: pd.DataFrame = None,
+    ref_deg: pd.DataFrame = None,
+    atlas: str = None,
+    n_predict: int = 1,
+    ref_path: str = None,
+    ncores: int = 10,
+):
     """Cell type annotation using reference data and compute the correlation between the user cell gene expression as compare
         to the reference data. The celltype with highest correlation will be concluded as the celltype
 
@@ -73,20 +116,21 @@ def CELLiD_cluster(rna, ref_data : pd.DataFrame = None, ref_deg : pd.DataFrame =
 
     # initialize the pandarallel for parallel processing
     pandarallel.initialize(nb_workers=ncores)
-    
+
     # check for the input data
-    if not(isinstance(rna, pd.DataFrame) or isinstance(rna, np.ndarray)):
+    if not (isinstance(rna, pd.DataFrame) or isinstance(rna, np.ndarray)):
         logging.error("the rna must be a pandas DataFrame or Numpy Array")
 
     # check for number of prediction of celltypes and set the largest number to 3 only
     if n_predict is not None:
         if n_predict > 3:
-            logging.info("Any value of n_predict that exceeds 3 will be automatically adjusted to 3.")
+            logging.info(
+                "Any value of n_predict that exceeds 3 will be automatically adjusted to 3."
+            )
             n_predict = 3
-    
+
     # Download reference data and ref_deg if missing
     if (ref_data is None) or (ref_deg is None):
-
         # check for the data path
         if ref_path is None:
             ref_path = "DISCOtmp"
@@ -94,42 +138,60 @@ def CELLiD_cluster(rna, ref_data : pd.DataFrame = None, ref_deg : pd.DataFrame =
         # if the path does not exist, we create a default directory
         if not os.path.exists(ref_path):
             os.mkdir(ref_path)
-        
+
         # download reference data from the server in pickle extension and read as pandas dataframe
         if ref_data is None:
-            if not(os.path.exists(ref_path + "/ref_data.pkl")):
+            if not (os.path.exists(ref_path + "/ref_data.pkl")):
                 logging.info("Downloading reference dataset...")
-                response = requests.get(url=prefix_disco_url +"/getRefPkl", timeout=timeout)
+                response = requests.get(
+                    url=prefix_disco_url + "toolkit/getRef?type=pkl", timeout=timeout
+                )
                 open(ref_path + "/ref_data.pkl", "wb").write(response.content)
-            ref_data = pd.read_pickle(ref_path + "/ref_data.pkl", compression = {'method':'gzip','compresslevel':6})
+            ref_data = pd.read_pickle(
+                ref_path + "/ref_data.pkl",
+                compression={"method": "gzip", "compresslevel": 6},
+            )
 
         # similarly do the same for the DEG reference
         if ref_deg is None:
-            if not(os.path.exists(ref_path + "/ref_deg.pkl")):
+            if not (os.path.exists(ref_path + "/ref_deg.pkl")):
                 logging.info("Downloading deg dataset...")
-                response = requests.get(url=prefix_disco_url +"/getRefDegPkl", timeout=timeout)
+                response = requests.get(
+                    url=prefix_disco_url + "toolkit/getRefDeg?type=pkl", timeout=timeout
+                )
                 open(ref_path + "/ref_deg.pkl", "wb").write(response.content)
-            ref_deg = pd.read_pickle(ref_path + "/ref_deg.pkl", compression = {'method':'gzip','compresslevel':6})
-    
+            ref_deg = pd.read_pickle(
+                ref_path + "/ref_deg.pkl",
+                compression={"method": "gzip", "compresslevel": 6},
+            )
+
     ####
     # write list of atlas function
     # we can write a function to get all the atlas from disco to the users
     # subsetting to the selected atlas by the user
     if atlas is not None:
-        select_ref = [index for index, each in enumerate(ref_data.columns) if each.split("--")[1] in list(atlas)]
+        select_ref = [
+            index
+            for index, each in enumerate(ref_data.columns)
+            if each.split("--")[1] in list(atlas)
+        ]
         ref_data = ref_data.iloc[:, select_ref]
-    
+
     # get the intersection of the gene for the user data and the reference data
     genes = set.intersection(set(rna.index), set(ref_data.index))
 
     # give an error as 2000 genes are very low
     if len(genes) <= 2000:
-        logging.error("Less than 2000 genes are shared between the input data and the reference dataset!")
+        logging.error(
+            "Less than 2000 genes are shared between the input data and the reference dataset!"
+        )
 
     # give a warning to the user
     if len(genes) <= 5000:
-        logging.warning("The input data and reference dataset have a limited number of overlapping genes, which may potentially impact the accuracy of the CELLiD.")
-    
+        logging.warning(
+            "The input data and reference dataset have a limited number of overlapping genes, which may potentially impact the accuracy of the CELLiD."
+        )
+
     # subsetting the dataframe to the common genes across data and the reference
     rna = rna.loc[list(genes)]
     ref_data = ref_data.loc[list(genes)]
@@ -137,7 +199,7 @@ def CELLiD_cluster(rna, ref_data : pd.DataFrame = None, ref_deg : pd.DataFrame =
     # nested apply function to compute correlation for each cell type between the user and reference database
     def each_ref_correlation(input, ref_data):
         input = list(input)
-        res = ref_data.parallel_apply(basic_correlation, input = input, axis = 0)
+        res = ref_data.parallel_apply(basic_correlation, input=input, axis=0)
         return res
 
     # apply the correlation to all the columns in user data
@@ -148,9 +210,14 @@ def basic_correlation(ref, input):
         predicted = pd.Series(list(ref)).corr(pd.Series(list(input)), method="spearman")
 
         return predicted
-  
+
     # get the predicted correlation as in pandas DataFrame
-    predicted_cell = rna.apply(each_ref_correlation, result_type = "expand", axis = 0, ref_data = ref_data)
+    # predicted_cell = pd.DataFrame(
+    #     generate_correlation_map(ref_data.to_numpy().T, rna.to_numpy().T)
+    # )
+    predicted_cell = rna.apply(
+        each_ref_correlation, result_type="expand", axis=0, ref_data=ref_data
+    )
     predicted_cell = pd.DataFrame(predicted_cell)
 
     # set the index to be the celltype based on the reference_data
@@ -161,60 +228,84 @@ def basic_correlation(ref, input):
     # define a function to apply to each column of the dataframe
     def get_top_5_indices(col):
         # get the indices of the top 5 values in the column
-        indices = np.where(col.rank(method='min', ascending=False) <= 5)[0]
+        indices = np.where(col.rank(method="min", ascending=False) <= 5)[0]
         # convert to numeric and return
-        return pd.Series(indices).astype('int')
-    
-    ct = predicted_cell.apply(get_top_5_indices)
-
-    # ct = pd.DataFrame(predicted_cell.parallel_apply(lambda x: [index for index, each in enumerate(x.rank()) if each <= 5], axis = 0, result_type = "expand"))
+        return pd.Series(indices).astype("int")
 
+    ct = predicted_cell.apply(get_top_5_indices)
     # compute the correleration based on the genes set that is intersection between the user data and reference dataset
+
     def second_correlation(i, ref_data, rna, ct):
-        ref = ref_data.iloc[:,ct[i]].copy()
-        g = set(ref_deg.iloc[check_in_list(ref_deg["group"], ref.columns)].copy()["gene"])
+        ref = ref_data.iloc[:, list(ct[str(i)])].copy()
+        g = set(
+            ref_deg.iloc[check_in_list(ref_deg["group"], ref.columns)].copy()["gene"]
+        )
         g = set.intersection(set(ref.index), g)
         ref = ref.loc[list(g)].copy()
-        input = rna.loc[list(g), i].copy()
+        # print(rna.loc[list(g), str(i)])
+        input = rna.loc[list(g), str(i)].copy()
         # predict = ref.apply(lambda i: np.round(stats.spearmanr(pd.to_numeric(i), pd.to_numeric(input), nan_policy='omit')[0], 3))
-        predict = ref.apply(lambda i: np.round(pd.Series(i).corr(pd.Series(input), method='spearman'), 3)) # trying with pandas correlation
+        predict = ref.apply(
+            lambda i: np.round(
+                pd.Series(i).corr(pd.Series(input), method="spearman"), 3
+            )
+        )  # trying with pandas correlation
         predict = pd.DataFrame(predict)
         predict.columns = ["cor"]
-        predict = predict.sort_values(["cor"], ascending = False)
-        res = [[each.split("--")[0], each.split("--")[1], predict["cor"][index], i] for index, each in enumerate(predict.index) if index < n_predict]
-        res = pd.DataFrame(res, columns =['cell_type', 'atlas', "score", "input_index"])
+        predict = predict.sort_values(["cor"], ascending=False)
+        res = [
+            [each.split("--")[0], each.split("--")[1], predict["cor"][index], i]
+            for index, each in enumerate(predict.index)
+            if index < n_predict
+        ]
+        res = pd.DataFrame(res, columns=["cell_type", "atlas", "score", "input_index"])
         return res  # return list in the format cell_type, atlas, score, input_index
 
     # get a data in the format of cell_type, atlas, score, input_index
-    predicted_cell = Parallel(n_jobs=ncores, batch_size=32, verbose = 2)(delayed(second_correlation)(int(y), ref_data, rna, ct) for y in range(rna.shape[1]))
+    predicted_cell = Parallel(n_jobs=ncores, batch_size=32, verbose=2)(
+        delayed(second_correlation)(int(y), ref_data, rna, ct)
+        for y in range(rna.shape[1])
+    )
 
     # first concatenate in row wise manner
     predicted_cell = pd.concat(predicted_cell)
 
-    pivot = pd.pivot_table(predicted_cell, index='input_index', columns=predicted_cell.groupby('input_index').cumcount()+1, 
-                       values=['cell_type', 'atlas', 'score'], aggfunc='first')
+    pivot = pd.pivot_table(
+        predicted_cell,
+        index="input_index",
+        columns=predicted_cell.groupby("input_index").cumcount() + 1,
+        values=["cell_type", "atlas", "score"],
+        aggfunc="first",
+    )
 
-    pivot = pivot[["cell_type", "atlas", "score"]] 
+    pivot = pivot[["cell_type", "atlas", "score"]]
 
     # flatten multi-level column index
-    pivot.columns = ['_'.join(map(str, col)).strip() for col in pivot.columns.values]
+    pivot.columns = ["_".join(map(str, col)).strip() for col in pivot.columns.values]
 
     # rename columns
     column_names = {}
     for i in range(n_predict):
-        column_names[f'cell_type_{i+1}'] = f'predicted_cell_type_{i+1}'
-        column_names[f'atlas_{i+1}'] = f'source_atlas_{i+1}'
-        column_names[f'score_{i+1}'] = f'score_{i+1}'
-        
+        column_names[f"cell_type_{i+1}"] = f"predicted_cell_type_{i+1}"
+        column_names[f"atlas_{i+1}"] = f"source_atlas_{i+1}"
+        column_names[f"score_{i+1}"] = f"score_{i+1}"
+
     pivot = pivot.rename(columns=column_names)
 
-    return pivot# return pandas dataframe in the format the same original R package
+    return pivot  # return pandas dataframe in the format the same original R package
+
 
 """
 Geneset enrichment analysis using DISCO data
 """
 
-def CELLiD_enrichment(input : pd.DataFrame, reference : pd.DataFrame = None, ref_path : str = None, ncores : int = 1):
+
+def CELLiD_enrichment(
+    input: pd.DataFrame,
+    reference: pd.DataFrame = None,
+    ref_path: str = None,
+    ncores: int = 1,
+):
     """Function to generate enrichment analysis based on the reference gene sets and following the DISCO pipeline.
 
     Args:
@@ -229,7 +320,7 @@ def CELLiD_enrichment(input : pd.DataFrame, reference : pd.DataFrame = None, ref
 
     # check input data
     # check if the input type is a DataFrame
-    if not(isinstance(input, pd.DataFrame)):
+    if not (isinstance(input, pd.DataFrame)):
         logging.error("The input must be a dataframe")
 
     # check if the user provide more than the needed column which are gene and fc
@@ -240,7 +331,7 @@ def CELLiD_enrichment(input : pd.DataFrame, reference : pd.DataFrame = None, ref
     if input.shape[1] == 2:
         input_shape = 2
         input.columns = ["gene", "fc"]
-    
+
     # else we can just take a gene list for the enrichment
     else:
         input_shape = 1
@@ -248,7 +339,6 @@ def CELLiD_enrichment(input : pd.DataFrame, reference : pd.DataFrame = None, ref
 
     # if the user does not provide reference geneset list
     if reference is None:
-
         # check for the data path if it is provided by the user
         if ref_path is None:
             ref_path = "DISCOtmp"
@@ -256,31 +346,45 @@ def CELLiD_enrichment(input : pd.DataFrame, reference : pd.DataFrame = None, ref
         # if the path does not exist, we create a default directory
         if not os.path.exists(ref_path):
             os.mkdir(ref_path)
-        
+
         # check if the file does not exist
         if not (os.path.exists(ref_path + "/ref_geneset.pkl")):
             # downloading the geneset data from disco database
-            response = requests.get(url=prefix_disco_url +"/getGeneSetPkl", timeout=timeout)
+            response = requests.get(
+                url=prefix_disco_url + "/getGeneSetPkl", timeout=timeout
+            )
             open(ref_path + "/ref_geneset.pkl", "wb").write(response.content)
-            reference = pd.read_pickle(ref_path + "/ref_geneset.pkl", compression = {'method':'gzip','compresslevel':6})
+            reference = pd.read_pickle(
+                ref_path + "/ref_geneset.pkl",
+                compression={"method": "gzip", "compresslevel": 6},
+            )
 
         else:
             # read the data into pandas dataframe for subsequent analysis
-            reference = pd.read_pickle(ref_path + "/ref_geneset.pkl", compression = {'method':'gzip','compresslevel':6})
+            reference = pd.read_pickle(
+                ref_path + "/ref_geneset.pkl",
+                compression={"method": "gzip", "compresslevel": 6},
+            )
 
     # rename the name data to include the reference atlas
     reference["name"] = reference["name"] + " in " + reference["atlas"]
-    input["gene"] = input["gene"].str.upper() # convert all the gene to upper string as the reference gene name is in uppercase
+    input["gene"] = input[
+        "gene"
+    ].str.upper()  # convert all the gene to upper string as the reference gene name is in uppercase
     # condition when the user provide the fold change for the enrichment analysis
     if input_shape == 2:
-        input["fc"] = 2 ** input["fc"] # 2 to the power of fc
-        input = input.set_index("gene", drop = False) # set index to the gene
-        input = input.loc[list(set(reference["gene"]) & set(input['gene']))] # only get the common genes for comparison
+        input["fc"] = 2 ** input["fc"]  # 2 to the power of fc
+        input = input.set_index("gene", drop=False)  # set index to the gene
+        input = input.loc[
+            list(set(reference["gene"]) & set(input["gene"]))
+        ]  # only get the common genes for comparison
 
     # else we only look at the ranked gene list
     else:
-        input = input.set_index("gene", drop = False)
-        input = input.loc[list(set(reference["gene"]) & set(input['gene']))] # only get the common genes for comparison
+        input = input.set_index("gene", drop=False)
+        input = input.loc[
+            list(set(reference["gene"]) & set(input["gene"]))
+        ]  # only get the common genes for comparison
     # filter to get only the geneset that contain the input genes from the user
     # this might good different result from the website so lets remove it for
     # unique_names = reference[reference["gene"].isin(input["gene"])]["name"].unique()
@@ -291,8 +395,8 @@ def CELLiD_enrichment(input : pd.DataFrame, reference : pd.DataFrame = None, ref
     # optimise the CELLiD_enrichment function
     atlas_dict = {}
     name_dict = {}
-    reference_atlas = reference['atlas']
-    reference_name = reference['name']
+    reference_atlas = reference["atlas"]
+    reference_name = reference["name"]
 
     # pre compute the parameters of the fisher exact test for the reference data
     for rownum in range(len(reference)):
@@ -305,57 +409,105 @@ def CELLiD_enrichment(input : pd.DataFrame, reference : pd.DataFrame = None, ref
             name_dict[reference_name[rownum]] = [rownum]
         else:
             name_dict[reference_name[rownum]].append(rownum)
-    
+
     # apply function to the dataframe
-    def process_unique_name(reference_filter, input, atlas_dfs, input_shape, atlas_dict):
+    def process_unique_name(
+        reference_filter, input, atlas_dfs, input_shape, atlas_dict
+    ):
         # use the compile`d pattern to search for matches
-        unique_name = reference_filter['name'].head(1).values[0]
-        atlas = reference_filter['atlas'].head(1).values
+        unique_name = reference_filter["name"].head(1).values[0]
+        atlas = reference_filter["atlas"].head(1).values
         # print(unique_name)
         reference_full = atlas_dfs[list(atlas_dict.keys()).index(atlas)]
         # reference_filter = reference.iloc[name_dict[unique_name]].copy() # subset the reference data
         # reference_full = reference.iloc[atlas_dict[atlas]].copy() # full reference to the atlas
-        reference_filter = reference_filter.set_index(["gene"], drop = False) # set gene as the index
+        reference_filter = reference_filter.set_index(
+            ["gene"], drop=False
+        )  # set gene as the index
         # reference_filter["gene"] = reference_filter.index
-        input_filter = input.loc[list(set(reference_full["gene"]) & set(input["gene"]))] # subset the input genes
+        input_filter = input.loc[
+            list(set(reference_full["gene"]) & set(input["gene"]))
+        ]  # subset the input genes
 
         # condition for no passed gene set
         if input_filter.empty:
             return None
-        
+
         # different computation to either include the fold change in the fisher exact test
         if input_shape == 2:
-            a = (reference_filter.loc[np.intersect1d(reference_filter["gene"], (input_filter["gene"]))].iloc[:, 0] * input_filter.loc[np.intersect1d(input_filter["gene"], reference_filter["gene"]), "fc"]).sum() + 1
-            b = input_filter.loc[np.setdiff1d(input_filter["gene"], reference_filter["gene"]), "fc"].sum() + 1
-            c = reference_filter.loc[np.setdiff1d(reference_filter["gene"], input_filter["gene"])].iloc[:, 0].sum() + 1
-            d = len(set(reference_full["gene"])) - len(set(reference_filter["gene"]).union(set(input_filter["gene"])))
+            a = (
+                reference_filter.loc[
+                    np.intersect1d(reference_filter["gene"], (input_filter["gene"]))
+                ].iloc[:, 0]
+                * input_filter.loc[
+                    np.intersect1d(input_filter["gene"], reference_filter["gene"]), "fc"
+                ]
+            ).sum() + 1
+            b = (
+                input_filter.loc[
+                    np.setdiff1d(input_filter["gene"], reference_filter["gene"]), "fc"
+                ].sum()
+                + 1
+            )
+            c = (
+                reference_filter.loc[
+                    np.setdiff1d(reference_filter["gene"], input_filter["gene"])
+                ]
+                .iloc[:, 0]
+                .sum()
+                + 1
+            )
+            d = len(set(reference_full["gene"])) - len(
+                set(reference_filter["gene"]).union(set(input_filter["gene"]))
+            )
         else:
             a = len(np.intersect1d(reference_filter["gene"], input_filter["gene"])) + 1
             b = len(np.setdiff1d(input_filter["gene"], reference_filter["gene"])) + 1
             c = len(np.setdiff1d(reference_filter["gene"], input_filter["gene"])) + 1
             d = len(set(reference_full["gene"])) - a - b - c
-        
+
         # get both value from the fisher exact test
         # pval = pvalue(a, b, c, d).two_tail
         _, pval = stats.fisher_exact(np.array([[a, b], [c, d]]))
-        odds = (a * d)/(b * c)
+        odds = (a * d) / (b * c)
         # only get the significant p_value
         # we can make changes to the p_value as in like we allow the user to specify it so that they have more control on the result they can obtain
         if pval < 0.01:
-            return pd.Series({"pval": pval, "or": odds, "name": unique_name,
-                            "gene": ",".join(reference_filter.loc[reference_filter["gene"].isin(input_filter["gene"]), "gene"]),
-                            "background": len(set(reference_full["gene"])), "overlap": len(reference_filter.loc[reference_filter["gene"].isin(input_filter["gene"])]), "geneset": len(reference_filter)})
+            return pd.Series(
+                {
+                    "pval": pval,
+                    "or": odds,
+                    "name": unique_name,
+                    "gene": ",".join(
+                        reference_filter.loc[
+                            reference_filter["gene"].isin(input_filter["gene"]), "gene"
+                        ]
+                    ),
+                    "background": len(set(reference_full["gene"])),
+                    "overlap": len(
+                        reference_filter.loc[
+                            reference_filter["gene"].isin(input_filter["gene"])
+                        ]
+                    ),
+                    "geneset": len(reference_filter),
+                }
+            )
         else:
             return None
 
     # Split them into cores
     unique_name_dfs = [reference.iloc[name_dict[key], :] for key in name_dict.keys()]
     atlas_dfs = [reference.iloc[atlas_dict[key], :] for key in atlas_dict.keys()]
-    
+
     # return unique_name_dfs, atlas_dfs, input, input_shape
     # return unique_name_dfs, atlas_dfs
     # using joblib to apply multiprocessing
-    results = Parallel(n_jobs=ncores, verbose = 2, batch_size = 64)(delayed(process_unique_name)(unique_name_df, input, atlas_dfs, input_shape, atlas_dict) for unique_name_df in unique_name_dfs)
+    results = Parallel(n_jobs=ncores, verbose=2, batch_size=64)(
+        delayed(process_unique_name)(
+            unique_name_df, input, atlas_dfs, input_shape, atlas_dict
+        )
+        for unique_name_df in unique_name_dfs
+    )
     results = [res for res in results if res is not None]
 
     # concatenate the result into the dataframe to the user
@@ -363,8 +515,9 @@ def process_unique_name(reference_filter, input, atlas_dfs, input_shape, atlas_d
         res_df = pd.concat(results, axis=1).transpose()
         res_df["pval"] = res_df["pval"].astype(float).round(3)
         res_df["or"] = res_df["or"].astype(float).round(3)
-        res_df = res_df.sort_values(by=["pval", "or"], ascending=[True, False]).head(min(50, len(results)))
+        res_df = res_df.sort_values(by=["pval", "or"], ascending=[True, False]).head(
+            min(50, len(results))
+        )
         return res_df
     else:
         return None
-    
\ No newline at end of file
diff --git a/discotoolkit/DiscoClass.py b/discotoolkit/DiscoClass.py
index a055cb2..ab3d4fe 100644
--- a/discotoolkit/DiscoClass.py
+++ b/discotoolkit/DiscoClass.py
@@ -6,10 +6,10 @@ class Filter:
     """
     Filter class object to save the attributes for filtering the dataset from DISCO
 
-    sample                      String    e.g. GSM3891625_3;
-    project                     String;
-    tissue                      String    e.g. Lung, Bladder;
-    disease                     String    e.g. PDAC;
+    sample_id                      String    e.g. ERX2757110;
+    project_id                     String;
+    tissue                      String    e.g. lung, bladder;
+    disease                     String    e.g. COVID-19;
     platform                    String    e.g. 10x3';
     sample_type                 String;
     cell_type                   String;
@@ -20,12 +20,12 @@ class Filter:
     return Class object
     """
 
-    def __init__(self, sample = None, project = None, tissue = None, disease = None, platform = None, sample_type = None,
+    def __init__(self, sample_id = None, project_id = None, tissue = None, disease = None, platform = None, sample_type = None,
                  cell_type = None, cell_type_confidence : str = "medium", include_cell_type_children : bool = True, min_cell_per_sample : int = 100):
         
         # handling for string and list input
-        self.sample = self.convert_to_list(sample) # sample id
-        self.project = self.convert_to_list(project) # project, lab, or dataset from different author
+        self.sample_id = self.convert_to_list(sample_id) # sample id
+        self.project_id = self.convert_to_list(project_id) # project, lab, or dataset from different author
         self.tissue = self.convert_to_list(tissue) # organ tissue 
         self.disease = self.convert_to_list(disease) # cancer or non cancer, or COVID-1e9 disease
         self.platform = self.convert_to_list(platform) # sequencing platform
diff --git a/discotoolkit/DownloadDiscoData.py b/discotoolkit/DownloadDiscoData.py
index 273986a..484c7d4 100644
--- a/discotoolkit/DownloadDiscoData.py
+++ b/discotoolkit/DownloadDiscoData.py
@@ -1,11 +1,11 @@
-'''
-Descripttion: 
-version: 
+"""
+Descripttion:
+version:
 Author: Mengwei Li
 Date: 2023-04-14 16:33:44
 LastEditors: Mengwei Li
-LastEditTime: 2023-04-16 12:14:48
-'''
+LastEditTime: 2024-10-23 21:30:03
+"""
 
 # import libraries
 import requests
@@ -14,14 +14,15 @@
 import re
 import os
 import hashlib
+import scanpy as sc
 
 # import variable and class from other script
 from .GlobalVariable import logging, prefix_disco_url
 from .DiscoClass import FilterData, Filter
 from .GetMetadata import check_in_list
 
-def download_disco_data(metadata, output_dir : str = "DISCOtmp"):
 
+def download_disco_data(metadata, output_dir: str = "DISCOtmp"):
     """function to download the data based on the given filter
     Args:
         metadata (FilterData) : FilterData class to filters data from DISCO database
@@ -29,116 +30,209 @@ def download_disco_data(metadata, output_dir : str = "DISCOtmp"):
 
     Returns:
         None: This function does not return any object and instead download the data for the user
-    """    
+    """
 
     # define a list to store the error sample that can not be download
     error_sample = []
 
     # create directory known as DISCOtmp or based on user defined directory if it does not exist
-    if not(os.path.exists(output_dir)):
+    if not (os.path.exists(output_dir)):
         os.mkdir(output_dir)
 
+    samples = list(metadata.sample_metadata["sample_id"])
     # download data when the user does not want to subset to specific cell type
-    if metadata.filter.cell_type is None:
-
-        # getting the dataframe of the samples
-        samples = metadata.sample_metadata
-
-        # iterate through all the samples
-        for i in range(len(samples)):
-            s = list(samples["sampleId"])[i]
-            p = list(samples["projectId"])[i]
-            output_file = "%s/%s.h5ad" % (output_dir, s) # getting the name of the file
-
-            # condition if the file has been downloaded before
-            if (os.path.exists(output_file)) and \
-            hashlib.md5(open(output_file, "rb").read()).hexdigest() == list(samples["md5h5ad"])[i]:
-                logging.info(" %s has been downloaded before. Ignore ..." % (s)) # giving message to the user
-            else:
-                logging.info("Downloading data of %s" % (s)) # giving message to the user
-
-                try:
-                    # try to download the data as requested
-                    response = requests.get(url=prefix_disco_url + "getH5adBySample?project="+ p +"&sample=" + s)
-
-                    # condition for error in downloading the samples
-                    if response.status_code == 404:
-                        error_sample.append(s) # append the error samples into a list for debugging later on
-                        logging.warning("sample %s download fail" % (s)) # giving message to the user
-                    else:
-                        # write the file to directory
-                        open(output_file, "wb").write(response.content)
-
-                        # condition for another error
-                        if hashlib.md5(open(output_file, "rb").read()).hexdigest() != list(samples["md5h5ad"])[i]:
-                            error_sample.append(s)
-                            os.remove(output_file)
-                            logging.warning("sample %s download fail" % (s)) # give message to the user
-                except:
-                    # except error when the download fail
-                    error_sample.append(s)
-                    logging.warning("sample %s download fail" % (s))
-        
-        # if there are error samples, we set all the attribute for FilterData object to the error samples and return the object in FilterData object
-        if len(error_sample) > 0:
-            metadata.sample_metadata = metadata.sample_metadata.set_index("sampleId") # change from inplace to this to increase efficiency
-            metadata.sample_metadata = metadata.sample_metadata.loc[error_sample]
-            metadata.cell_type_metadata = metadata.cell_type_metadata.iloc[check_in_list(metadata.cell_type_metadata["sampleId"], error_sample)]
-            metadata.cell_count = metadata.cell_type_metadata["cellNumber"].sum()
-            metadata.sample_count = len(error_sample)
-            return metadata
-        
-    else:
-        # get the dataframe for the metadata
-        metadata.sample_metadata = metadata.sample_metadata.set_index("sampleId")
-        samples = metadata.cell_type_metadata
-        concat_func = lambda x,y: x + "_" + str(y)
-
-        # create new element by concatenating the sampleId and cluster
-        samples["sampleId"] = list(map(concat_func, list(samples["sampleId"]),
-                                       list(samples["cluster"])))
-        
-        # similarly, iterative through all the sample for downloading
-        for i in range(len(samples)):
-            s = list(samples["sampleId"])[i]
-            sub_s = s.replace("_" + str(list(samples["cluster"])[i]) , "") # adding handler for celltype specific to get the data
-            p = metadata.sample_metadata.loc[sub_s]["projectId"]
-            output_file = "%s/%s.h5ad" % (output_dir, s)
+
+    samples = metadata.sample_metadata
+
+    for i in range(len(samples)):
+        s = list(samples["sample_id"])[i]
+        p = list(samples["project_id"])[i]
+
+        output_file = "%s/%s.h5ad" % (output_dir, s)
+        if os.path.exists(output_file):
+            os.remove(output_file)
+
+        url = prefix_disco_url + "download/getRawH5/" + p + "/" + s
+        rna = sc.read_10x_h5(filename=output_file, backup_url=url)
+
+        cell = pd.read_csv(
+            prefix_disco_url + "toolkit/" + "getCellTypeSample?sampleId=" + s,
+            sep="\t",
+            header=0,
+        )
+        # cell.set_index("cell_id")
+        cell = cell.iloc[:, [0, 2, 5]]
+        if metadata.filter.cell_type is not None:
+            if metadata.filter.cell_type_confidence == "high":
+                cell = cell.loc[cell["cell_type_score"] >= 0.8]
+
+            elif metadata.filter.cell_type_confidence == "medium":
+                cell = cell.loc[cell["cell_type_score"] >= 0.6]
+            cell = cell.loc[cell.cell_type.isin(metadata.filter.cell_type)]
             
-            # checking for file and ignore if it is already exist
-            if (os.path.exists(output_file)) and \
-            hashlib.md5(open(output_file, "rb").read()).hexdigest() == list(samples["h5adMd5"]):
-                logging.info(" %s has been downloaded before. Ignore ..." % (s)) # give message to the user
-            else:
-                logging.info("Downloading data of %s" % (s)) # give message to the user
-                try:
-                    # get the data from the server
-                    response = requests.get(url=prefix_disco_url + "getH5adBySampleCt?project="+ p +"&sample=" + s)
-                    if response.status_code == 404:
-                        error_sample.append(s) # append error samples
-                        logging.warning("sample %s download fail 1" % (s)) # give message to the user
-                    else:
-                        open(output_file, "wb").write(response.content)
-                        
-                        # condition for another error
-                        if hashlib.md5(open(output_file, "rb").read()).hexdigest() != list(samples["h5adMd5"])[i]:
-                            error_sample.append(s)
-                            os.remove(output_file)
-                            logging.warning("sample %s download fail" % (s)) # give message to the user
-                except:
-                    error_sample.append(s)
-                    logging.warning("sample %s download fail" % (s)) # give message to the user
-        
-        # similarly, record the error samples and return to the variable
-        if len(error_sample) > 0:
-            metadata.cell_type_metadata = metadata.cell_type_metadata.iloc[check_in_list(metadata.cell_type_metadata["sampleId"], error_sample)]
-            metadata.sample_metadata = metadata.sample_metadata.iloc[check_in_list(list(metadata.sample_metadata["sampleId"]), metadata.cell_type_metadata["sampleId"])]
-            metadata.cell_count = metadata.cell_type_metadata["cellNumber"].sum()
-            metadata.sample_count = len(error_sample)
-            sample_cell_count = pd.DataFrame(metadata.cell_type_metadata.groupby(["sampleId"])["cellNumber"].agg("sum"))
-            sample_cell_count.columns = ["x"]
-            metadata.sample_metadata["cell_number"] = list(sample_cell_count.loc[list(metadata.sample_metadata["sampleId"])]["x"])
-            return metadata
-        
-    # else return None as the data has been downloaded
-    return None
\ No newline at end of file
+        rna = rna[cell.cell_id].copy()
+        rna.obs["cell_type"] = list(cell.cell_type)
+        rna.obs["sample_id"] = s
+        rna.obs["project_id"] = p
+        rna.write(output_file)
+
+    # if metadata.filter.cell_type is None:
+    #     # getting the dataframe of the samples
+    #     samples = metadata.sample_metadata
+
+    #     # iterate through all the samples
+    #     for i in range(len(samples)):
+    #         s = list(samples["sampleId"])[i]
+    #         p = list(samples["projectId"])[i]
+    #         output_file = "%s/%s.h5ad" % (output_dir, s)  # getting the name of the file
+
+    #         # condition if the file has been downloaded before
+    #         if (os.path.exists(output_file)) and hashlib.md5(
+    #             open(output_file, "rb").read()
+    #         ).hexdigest() == list(samples["md5h5ad"])[i]:
+    #             logging.info(
+    #                 " %s has been downloaded before. Ignore ..." % (s)
+    #             )  # giving message to the user
+    #         else:
+    #             logging.info(
+    #                 "Downloading data of %s" % (s)
+    #             )  # giving message to the user
+
+    #             try:
+    #                 # try to download the data as requested
+    #                 response = requests.get(
+    #                     url=prefix_disco_url
+    #                     + "getH5adBySample?project="
+    #                     + p
+    #                     + "&sample="
+    #                     + s
+    #                 )
+
+    #                 # condition for error in downloading the samples
+    #                 if response.status_code == 404:
+    #                     error_sample.append(
+    #                         s
+    #                     )  # append the error samples into a list for debugging later on
+    #                     logging.warning(
+    #                         "sample %s download fail" % (s)
+    #                     )  # giving message to the user
+    #                 else:
+    #                     # write the file to directory
+    #                     open(output_file, "wb").write(response.content)
+
+    #                     # condition for another error
+    #                     if (
+    #                         hashlib.md5(open(output_file, "rb").read()).hexdigest()
+    #                         != list(samples["md5h5ad"])[i]
+    #                     ):
+    #                         error_sample.append(s)
+    #                         os.remove(output_file)
+    #                         logging.warning(
+    #                             "sample %s download fail" % (s)
+    #                         )  # give message to the user
+    #             except:
+    #                 # except error when the download fail
+    #                 error_sample.append(s)
+    #                 logging.warning("sample %s download fail" % (s))
+
+    #     # if there are error samples, we set all the attribute for FilterData object to the error samples and return the object in FilterData object
+    #     if len(error_sample) > 0:
+    #         metadata.sample_metadata = metadata.sample_metadata.set_index(
+    #             "sampleId"
+    #         )  # change from inplace to this to increase efficiency
+    #         metadata.sample_metadata = metadata.sample_metadata.loc[error_sample]
+    #         metadata.cell_type_metadata = metadata.cell_type_metadata.iloc[
+    #             check_in_list(metadata.cell_type_metadata["sampleId"], error_sample)
+    #         ]
+    #         metadata.cell_count = metadata.cell_type_metadata["cellNumber"].sum()
+    #         metadata.sample_count = len(error_sample)
+    #         return metadata
+
+    # else:
+    #     # get the dataframe for the metadata
+    #     metadata.sample_metadata = metadata.sample_metadata.set_index("sampleId")
+    #     samples = metadata.cell_type_metadata
+    #     concat_func = lambda x, y: x + "_" + str(y)
+
+    #     # create new element by concatenating the sampleId and cluster
+    #     samples["sampleId"] = list(
+    #         map(concat_func, list(samples["sampleId"]), list(samples["cluster"]))
+    #     )
+
+    #     # similarly, iterative through all the sample for downloading
+    #     for i in range(len(samples)):
+    #         s = list(samples["sampleId"])[i]
+    #         sub_s = s.replace(
+    #             "_" + str(list(samples["cluster"])[i]), ""
+    #         )  # adding handler for celltype specific to get the data
+    #         p = metadata.sample_metadata.loc[sub_s]["projectId"]
+    #         output_file = "%s/%s.h5ad" % (output_dir, s)
+
+    #         # checking for file and ignore if it is already exist
+    #         if (os.path.exists(output_file)) and hashlib.md5(
+    #             open(output_file, "rb").read()
+    #         ).hexdigest() == list(samples["h5adMd5"]):
+    #             logging.info(
+    #                 " %s has been downloaded before. Ignore ..." % (s)
+    #             )  # give message to the user
+    #         else:
+    #             logging.info("Downloading data of %s" % (s))  # give message to the user
+    #             try:
+    #                 # get the data from the server
+    #                 response = requests.get(
+    #                     url=prefix_disco_url
+    #                     + "getH5adBySampleCt?project="
+    #                     + p
+    #                     + "&sample="
+    #                     + s
+    #                 )
+    #                 if response.status_code == 404:
+    #                     error_sample.append(s)  # append error samples
+    #                     logging.warning(
+    #                         "sample %s download fail 1" % (s)
+    #                     )  # give message to the user
+    #                 else:
+    #                     open(output_file, "wb").write(response.content)
+
+    #                     # condition for another error
+    #                     if (
+    #                         hashlib.md5(open(output_file, "rb").read()).hexdigest()
+    #                         != list(samples["h5adMd5"])[i]
+    #                     ):
+    #                         error_sample.append(s)
+    #                         os.remove(output_file)
+    #                         logging.warning(
+    #                             "sample %s download fail" % (s)
+    #                         )  # give message to the user
+    #             except:
+    #                 error_sample.append(s)
+    #                 logging.warning(
+    #                     "sample %s download fail" % (s)
+    #                 )  # give message to the user
+
+    #     # similarly, record the error samples and return to the variable
+    #     if len(error_sample) > 0:
+    #         metadata.cell_type_metadata = metadata.cell_type_metadata.iloc[
+    #             check_in_list(metadata.cell_type_metadata["sampleId"], error_sample)
+    #         ]
+    #         metadata.sample_metadata = metadata.sample_metadata.iloc[
+    #             check_in_list(
+    #                 list(metadata.sample_metadata["sampleId"]),
+    #                 metadata.cell_type_metadata["sampleId"],
+    #             )
+    #         ]
+    #         metadata.cell_count = metadata.cell_type_metadata["cellNumber"].sum()
+    #         metadata.sample_count = len(error_sample)
+    #         sample_cell_count = pd.DataFrame(
+    #             metadata.cell_type_metadata.groupby(["sampleId"])["cellNumber"].agg(
+    #                 "sum"
+    #             )
+    #         )
+    #         sample_cell_count.columns = ["x"]
+    #         metadata.sample_metadata["cell_number"] = list(
+    #             sample_cell_count.loc[list(metadata.sample_metadata["sampleId"])]["x"]
+    #         )
+    #         return metadata
+
+    # # else return None as the data has been downloaded
+    # return None
diff --git a/discotoolkit/GetMetadata.py b/discotoolkit/GetMetadata.py
index d641320..82f681a 100644
--- a/discotoolkit/GetMetadata.py
+++ b/discotoolkit/GetMetadata.py
@@ -14,7 +14,8 @@
 from .GlobalVariable import logging, prefix_disco_url
 from .DiscoClass import FilterData, Filter
 
-def get_json(url : str, info_msg: str, error_msg: str, prefix : bool = True):
+
+def get_json(url: str, info_msg: str, error_msg: str, prefix: bool = True):
     """Funtion to get json data from the server
 
     Args:
@@ -26,36 +27,37 @@ def get_json(url : str, info_msg: str, error_msg: str, prefix : bool = True):
     Returns:
         JSON: return JSON data which will be converted into pandas dataframe later on
     """
-    
+
     # giving log message to the user
     logging.info(info_msg)
 
     # condition for adding prefix
-    if prefix == False:
+    if not prefix:
         response = requests.get(url)
     else:
-        response = requests.get(prefix_disco_url + "/" + url)
-    
+        response = requests.get(prefix_disco_url + url)
+
     # if the loading of JSON is successful, we get the text
     if response.status_code == 200:
         data = json.loads(response.text)
-        return data # return json data Dictionary datatype in python
+        return data  # return json data Dictionary datatype in python
     else:
-
         # print informative message to the user
         logging.error(error_msg)
 
+
 def get_sample_ct_info():
-    """ Get cell type information of sample
+    """Get cell type information of sample
     Returns:
         Pandas DataFrame: return pandas dataframe to the user
-    """    
-    temp = get_json(url = "/getSampleCtInfo", info_msg = "Retrieving cell type information of each sample from DISCO database",
-                    error_msg = "Failed to retrieve cell type information. Please try again. If the issue persists, please contact us at li_mengwei@immunol.a-star.edu.sg for assistance.")
-    return pd.DataFrame(temp) # return Dataframe of the JSON data
+    """
+    temp = pd.read_csv(
+        prefix_disco_url + "toolkit/" + "getCellTypeSummary", sep="\t", header=0
+    )
+    return temp  # return Dataframe of the JSON data
 
-def find_celltype(term : str = "", cell_ontology : dict = None):
 
+def find_celltype(term: str = "", cell_ontology: dict = None):
     """find the celltype within the disco dataset
 
     Args:
@@ -68,9 +70,12 @@ def find_celltype(term : str = "", cell_ontology : dict = None):
 
     # when the user do not have cell ontology, we get the default one from disco database
     if cell_ontology is None:
-        cell_ontology = get_json(url = "/getCellOntology", info_msg = "Retrieving ontology from DISCO database",
-        error_msg = "Failed to retrieve ontology Please try again. If the issue persists, please contact us at li_mengwei@immunol.a-star.edu.sg for assistance.")
-    cell_type = pd.DataFrame(cell_ontology)["cell_name"] # convert to dataframe
+        cell_ontology = get_json(
+            url="toolkit/getCellOntology",
+            info_msg="Retrieving ontology from DISCO database",
+            error_msg="Failed to retrieve ontology Please try again. If the issue persists, please contact us at li_mengwei@immunol.a-star.edu.sg for assistance.",
+        )
+    cell_type = pd.DataFrame(cell_ontology)["cell_name"]  # convert to dataframe
 
     # convert term to lower term for matching. the same apply for the cell_type
     term = term.lower()
@@ -87,22 +92,30 @@ def find_celltype(term : str = "", cell_ontology : dict = None):
     # get the index and return the cell type in the correct case in the form of Python List
     return list(cell_type[idx_list])
 
-def get_disco_metadata():
 
+def get_disco_metadata():
     """get metadata of the disco into the dataframe
 
     Returns:
         Pandas Dataframe: Disco metadata in the format of dataframe which will be used for the filter class
-    """    
+    """
 
-    metadata = get_json(url = "http://www.immunesinglecell.org/api/vishuo/sample/all", info_msg = "Retrieving metadata from DISCO database",
-             error_msg = "Failed to retrieve metadata. Please try again. If the issue persists, please contact us at li_mengwei@immunol.a-star.edu.sg for assistance.", prefix=False)
-    metadata = pd.DataFrame(metadata)
-    metadata = metadata[metadata["processStatus"] == "QC pass"] # filtering to only get the sample that pass quality control
-    metadata.set_index("sampleId") # setting the index of the metadata to sample_id for consistency
+    metadata = pd.read_csv(
+        "https://immunesinglecell.org/disco_v3_api/toolkit/getSampleMetadata",
+        sep="\t",
+        header=0,
+    )
+    # metadata = pd.DataFrame(metadata)
+    # metadata = metadata[
+    #     metadata["processStatus"] == "QC pass"
+    # ]  # filtering to only get the sample that pass quality control
+    metadata.set_index(
+        "sample_id"
+    )  # setting the index of the metadata to sample_id for consistency
     return metadata
 
-def get_celltype_children(cell_type: Union[str , list], cell_ontology: dict = None):
+
+def get_celltype_children(cell_type: Union[str, list], cell_ontology: dict = None):
     """get the children of the input celltype from the user
 
     Args:
@@ -113,8 +126,13 @@ def get_celltype_children(cell_type: Union[str , list], cell_ontology: dict = No
         List of String: return the children of the defined celltype in list of String
     """
     if cell_ontology is None:
-        cell_ontology = pd.DataFrame(get_json(url="/getCellOntology", info_msg = "Retrieving ontology from DISCO database",
-                                     error_msg = "Failed to retrieve ontology Please try again. If the issue persists, please contact us at li_mengwei@immunol.a-star.edu.sg for assistance."))
+        cell_ontology = pd.DataFrame(
+            get_json(
+                url="toolkit/getCellOntology",
+                info_msg="Retrieving ontology from DISCO database",
+                error_msg="Failed to retrieve ontology Please try again. If the issue persists, please contact us at li_mengwei@immunol.a-star.edu.sg for assistance.",
+            )
+        )
 
     # define an empty list
     children = []
@@ -129,13 +147,20 @@ def get_celltype_children(cell_type: Union[str , list], cell_ontology: dict = No
 
     # while loop to get all the children celltypes iteratively
     while len(cell_type) > 0:
-        idx_list = [index for index, each in enumerate(list(cell_ontology["parent"])) if each in cell_type]
+        idx_list = [
+            index
+            for index, each in enumerate(list(cell_ontology["parent"]))
+            if each in cell_type
+        ]
         children.extend(list(cell_ontology["cell_name"][idx_list]))
         cell_type = list(cell_ontology["cell_name"][idx_list])
 
-    children = list(set(children)) # ensure that the obtained children of celltype are unique
+    children = list(
+        set(children)
+    )  # ensure that the obtained children of celltype are unique
     return children
 
+
 def list_metadata_item(field: str):
     """List element inside the metadata columns
 
@@ -153,24 +178,25 @@ def list_metadata_item(field: str):
     if field in metadata.columns:
         return list(set(metadata[field]))
     else:
-        logging.warning("DISCO data don't contain '%s' field. Please check the field name" % (field))
+        logging.warning(
+            "DISCO data don't contain '%s' field. Please check the field name" % (field)
+        )
         return None
-    
-def list_all_columns():
 
+
+def list_all_columns():
     """list all the columns found in the metadata of the disco database
 
     Returns:
         List: return the name of the metadata in the form of list of string
-    """    
-    
+    """
+
     # get the disco metadata
     metadata = get_disco_metadata()
     return list(metadata.columns)
 
 
-def filter_disco_metadata(filter : Filter = Filter()):
-
+def filter_disco_metadata(filter: Filter = Filter()):
     """filter function option for the disco data
     Args:
         Filter (Class): predefined Filter class with default attribute to filter data for the user
@@ -182,15 +208,17 @@ def filter_disco_metadata(filter : Filter = Filter()):
 
     # starting with defining variable
     filter_data = FilterData()
-    metadata = get_disco_metadata() # get metadata
-    logging.info("Filtering sample") # producing log message to the user
+    metadata = get_disco_metadata()  # get metadata
+    logging.info("Filtering sample")  # producing log message to the user
 
     # condition for each provided attribute by the user or default
-    if filter.sample is not None:
-        metadata = metadata.iloc[check_in_list(metadata["sampleId"], filter.sample)]
+    if filter.sample_id is not None:
+        metadata = metadata.iloc[check_in_list(metadata["sample_id"], filter.sample_id)]
 
-    if filter.project is not None:
-        metadata = metadata.iloc[check_in_list(metadata["projectId"], filter.project)]
+    if filter.project_id is not None:
+        metadata = metadata.iloc[
+            check_in_list(metadata["project_id"], filter.project_id)
+        ]
 
     if filter.tissue is not None:
         metadata = metadata.iloc[check_in_list(metadata["tissue"], filter.tissue)]
@@ -202,81 +230,130 @@ def filter_disco_metadata(filter : Filter = Filter()):
         metadata = metadata.iloc[check_in_list(metadata["disease"], filter.disease)]
 
     if filter.sample_type is not None:
-        metadata = metadata.iloc[check_in_list(metadata["sampleType"], filter.sample_type)]
+        metadata = metadata.iloc[
+            check_in_list(metadata["sample_type"], filter.sample_type)
+        ]
 
     # condition when there is no samples with the provided filters
     if len(metadata) == 0:
         logging.warn("Sorry, no samples passed the applied filters.")
         return None
-    
+
     # get the celltype information
     sample_ct_info = get_sample_ct_info()
 
     # remove the unnecessary fields from the metadata and only keep the below defined columns
-    retain_field = ["sampleId", "projectId", "sampleType", "anatomicalSite", "disease",
-                   "tissue", "platform", "ageGroup", "age", "gender", "cellSorting",
-                   "diseaseSubtype", "diseaseStage", "treatment", "md5h5ad"]
-    
+    # retain_field = [
+    #     "sample_id",
+    #     "projectId",
+    #     "sample_type",
+    #     "anatomicalSite",
+    #     "disease",
+    #     "tissue",
+    #     "platform",
+    #     "ageGroup",
+    #     "age",
+    #     "gender",
+    #     "cellSorting",
+    #     "diseaseSubtype",
+    #     "diseaseStage",
+    #     "treatment",
+    #     "md5h5ad",
+    # ]
+
     # subset to the retained field
-    metadata = metadata[retain_field]
+    # metadata = metadata[retain_field]
 
     # checking for the specific cell type
     if filter.cell_type is None:
-        sample_ct_info = sample_ct_info.iloc[check_in_list(sample_ct_info["sampleId"], metadata["sampleId"])]
-        sample_cell_count = pd.DataFrame(sample_ct_info.groupby(["sampleId"])["cellNumber"].agg("sum")) # get the total number of cells
-        sample_cell_count.columns = ["x"] # assigning different column name
-        filtered_index = np.intersect1d(sample_cell_count.index, list(metadata["sampleId"])) # getting the common sampleId from the ct info and metadata info
-        metadata = metadata.loc[metadata['sampleId'].isin(filtered_index)] # filtering to only the common sampleId
-        metadata["cell_number"] = list(sample_cell_count.loc[filtered_index]["x"]) # assigning to the metadata
-        metadata = metadata[metadata["cell_number"] > filter.min_cell_per_sample] # filter by the minimum cell per sample
+        sample_ct_info = sample_ct_info.iloc[
+            check_in_list(sample_ct_info["sample_id"], metadata["sample_id"])
+        ]
+        sample_cell_count = pd.DataFrame(
+            sample_ct_info.groupby(["sample_id"])["cell_number"].agg("sum")
+        )  # get the total number of cells
+        sample_cell_count.columns = ["x"]  # assigning different column name
+        filtered_index = np.intersect1d(
+            sample_cell_count.index, list(metadata["sample_id"])
+        )  # getting the common sample_id from the ct info and metadata info
+        metadata = metadata.loc[
+            metadata["sample_id"].isin(filtered_index)
+        ]  # filtering to only the common sample_id
+        metadata["cell_number"] = list(
+            sample_cell_count.loc[filtered_index]["x"]
+        )  # assigning to the metadata
+        metadata = metadata[
+            metadata["cell_number"] > filter.min_cell_per_sample
+        ]  # filter by the minimum cell per sample
 
         # condiition for none
         if len(metadata) == 0:
             logging.warn("Sorry, no samples passed the applied filters.")
             return None
 
-        # subsetting based on the sampleId
-        sample_ct_info = sample_ct_info.iloc[check_in_list(sample_ct_info["sampleId"], metadata["sampleId"])]
-        
+        # subsetting based on the sample_id
+        sample_ct_info = sample_ct_info.iloc[
+            check_in_list(sample_ct_info["sample_id"], metadata["sample_id"])
+        ]
+
         # now setting the attribute of the FilterData
         filter_data.sample_metadata = metadata
         filter_data.cell_type_metadata = sample_ct_info
         filter_data.sample_count = len(metadata)
         filter_data.cell_count = sum(metadata["cell_number"])
-        filter_data.filter = filter # now set all the changes we made to the Filter object
+        filter_data.filter = (
+            filter  # now set all the changes we made to the Filter object
+        )
 
         # giving the message to the user
-        logging.info("%s samples and %s cells were found" % (filter_data.sample_count, filter_data.cell_count))
+        logging.info(
+            "%s samples and %s cells were found"
+            % (filter_data.sample_count, filter_data.cell_count)
+        )
         return filter_data
-    
+
     # condition to get all the data for the sub celltype
     if filter.include_cell_type_childen:
         filter.cell_type = get_celltype_children(filter.cell_type)
 
     # subset to the given cell type and sample
-    sample_ct_info = sample_ct_info.iloc[check_in_list(sample_ct_info["cellType"], filter.cell_type)]
-    sample_ct_info = sample_ct_info.iloc[check_in_list(sample_ct_info["sampleId"], list(metadata["sampleId"]))]
+    sample_ct_info = sample_ct_info.iloc[
+        check_in_list(sample_ct_info["cell_type"], filter.cell_type)
+    ]
+    sample_ct_info = sample_ct_info.iloc[
+        check_in_list(sample_ct_info["sample_id"], list(metadata["sample_id"]))
+    ]
 
     # condition for different values of the cell_type_confidence
     if filter.cell_type_confidence not in ["high", "medium", "all"]:
-        logging.warning("cell_type_confidence can only be high, medium, or all") # give the user an informative message
+        logging.warning(
+            "cell_type_confidence can only be high, medium, or all"
+        )  # give the user an informative message
     if filter.cell_type_confidence == "high":
-        sample_ct_info = sample_ct_info[sample_ct_info["cellTypeScore"] >= 0.8]
+        sample_ct_info = sample_ct_info[sample_ct_info["cell_type_score"] >= 0.8]
     elif filter.cell_type_confidence == "medium":
-        sample_ct_info = sample_ct_info[sample_ct_info["cellTypeScore"] >= 0.6]
+        sample_ct_info = sample_ct_info[sample_ct_info["cell_type_score"] >= 0.6]
 
     # again condition for no sample found and return None
     if len(sample_ct_info) == 0:
         logging.warn("Sorry, no samples passed the applied filters.")
         return None
-    
+
     # follow from previous block of code
-    metadata = metadata.iloc[check_in_list(list(metadata["sampleId"]), sample_ct_info["sampleId"])]
-    sample_cell_count = pd.DataFrame(sample_ct_info.groupby(["sampleId"])["cellNumber"].agg("sum")) # total cell numbers
+    metadata = metadata.iloc[
+        check_in_list(list(metadata["sample_id"]), sample_ct_info["sample_id"])
+    ]
+    sample_cell_count = pd.DataFrame(
+        sample_ct_info.groupby(["sample_id"])["cell_number"].agg("sum")
+    )  # total cell numbers
     sample_cell_count.columns = ["x"]
-    
-    metadata["cell_number"] = list(sample_cell_count.loc[list(metadata["sampleId"])]["x"]) # count number of cell
-    metadata = metadata[metadata["cell_number"] > filter.min_cell_per_sample] # filter based on min cell per sample
+
+    metadata["cell_number"] = list(
+        sample_cell_count.loc[list(metadata["sample_id"])]["x"]
+    )  # count number of cell
+    metadata = metadata[
+        metadata["cell_number"] > filter.min_cell_per_sample
+    ]  # filter based on min cell per sample
 
     # condition for None
     if len(metadata) == 0:
@@ -284,7 +361,9 @@ def filter_disco_metadata(filter : Filter = Filter()):
         return None
 
     # subsetting before assign attribute for the FilterData object
-    sample_ct_info = sample_ct_info.iloc[check_in_list(sample_ct_info["sampleId"], metadata["sampleId"])]
+    sample_ct_info = sample_ct_info.iloc[
+        check_in_list(sample_ct_info["sample_id"], metadata["sample_id"])
+    ]
 
     # now assign all the values to the attribute of the object
     filter_data.sample_metadata = metadata
@@ -292,9 +371,13 @@ def filter_disco_metadata(filter : Filter = Filter()):
     filter_data.sample_count = len(metadata)
     filter_data.cell_count = sum(metadata["cell_number"])
     filter_data.filter = filter
-    logging.info("%s samples and %s cells were found" % (filter_data.sample_count, filter_data.cell_count))
+    logging.info(
+        "%s samples and %s cells were found"
+        % (filter_data.sample_count, filter_data.cell_count)
+    )
     return filter_data
 
+
 # utils function for getting the index of the element on the left list for subsetting if the element is exist in the right list.
 def check_in_list(var, whitelist):
-    return [index for index, each in enumerate(list(var)) if each in list(whitelist)]
\ No newline at end of file
+    return [index for index, each in enumerate(list(var)) if each in list(whitelist)]
diff --git a/discotoolkit/GlobalVariable.py b/discotoolkit/GlobalVariable.py
index b737a21..ec626ab 100644
--- a/discotoolkit/GlobalVariable.py
+++ b/discotoolkit/GlobalVariable.py
@@ -1,3 +1,11 @@
+'''
+Descripttion: 
+version: 
+Author: Mengwei Li
+Date: 2023-07-06 16:59:59
+LastEditors: Mengwei Li
+LastEditTime: 2024-10-23 15:52:29
+'''
 """
 Global variable file to import for the subsequent script.
 
@@ -17,9 +25,9 @@
 timeout = 600
 
 # Define package-level variable
-response = requests.get("http://www.immunesinglecell.org/api/vishuo/getToolkitUrl")
+# response = requests.get("http://www.immunesinglecell.org/api/vishuo/getToolkitUrl")
 
-if response.status_code == 200:
-    prefix_disco_url = json.loads(response.text)["url"]
-else:
-    prefix_disco_url = "http://www.immunesinglecell.org/toolkitapi"
\ No newline at end of file
+# if response.status_code == 200:
+#     prefix_disco_url = json.loads(response.text)["url"]
+# else:
+prefix_disco_url = "https://immunesinglecell.org/disco_v3_api/"
\ No newline at end of file
diff --git a/discotoolkit/Utilities.py b/discotoolkit/Utilities.py
new file mode 100644
index 0000000..1fd4b9c
--- /dev/null
+++ b/discotoolkit/Utilities.py
@@ -0,0 +1,74 @@
+import h5py
+import numpy as np
+from scipy.sparse import csr_matrix
+from pathlib import Path
+
+
+def write_10X_h5(adata, file):
+    """Writes adata to a 10X-formatted h5 file.
+
+    Note that this function is not fully tested and may not work for all cases.
+    It will not write the following keys to the h5 file compared to 10X:
+    '_all_tag_keys', 'pattern', 'read', 'sequence'
+
+    Args:
+        adata (AnnData object): AnnData object to be written.
+        file (str): File name to be written to. If no extension is given, '.h5' is appended.
+
+    Raises:
+        FileExistsError: If file already exists.
+
+    Returns:
+        None
+    """
+
+    if ".h5" not in file:
+        file = f"{file}.h5"
+    if Path(file).exists():
+        raise FileExistsError(f"There already is a file `{file}`.")
+
+    def int_max(x):
+        return int(max(np.floor(len(str(int(max(x)))) / 4), 1) * 4)
+
+    def str_max(x):
+        return max([len(i) for i in x])
+
+    w = h5py.File(file, "w")
+    grp = w.create_group("matrix")
+    grp.create_dataset(
+        "barcodes",
+        data=np.array(adata.obs_names, dtype=f"|S{str_max(adata.obs_names)}"),
+    )
+    grp.create_dataset(
+        "data", data=np.array(adata.X.data, dtype=f"<i{int_max(adata.X.data)}")
+    )
+    ftrs = grp.create_group("features")
+    # this group will lack the following keys:
+    # '_all_tag_keys', 'feature_type', 'genome', 'id', 'name', 'pattern', 'read', 'sequence'
+    ftrs.create_dataset(
+        "feature_type",
+        data=np.array(
+            adata.var.feature_types, dtype=f"|S{str_max(adata.var.feature_types)}"
+        ),
+    )
+    ftrs.create_dataset(
+        "genome",
+        data=np.array(adata.var.genome, dtype=f"|S{str_max(adata.var.genome)}"),
+    )
+    ftrs.create_dataset(
+        "id",
+        data=np.array(adata.var.gene_ids, dtype=f"|S{str_max(adata.var.gene_ids)}"),
+    )
+    ftrs.create_dataset(
+        "name", data=np.array(adata.var.index, dtype=f"|S{str_max(adata.var.index)}")
+    )
+    grp.create_dataset(
+        "indices", data=np.array(adata.X.indices, dtype=f"<i{int_max(adata.X.indices)}")
+    )
+    grp.create_dataset(
+        "indptr", data=np.array(adata.X.indptr, dtype=f"<i{int_max(adata.X.indptr)}")
+    )
+    grp.create_dataset(
+        "shape",
+        data=np.array(list(adata.X.shape)[::-1], dtype=f"<i{int_max(adata.X.shape)}"),
+    )
diff --git a/discotoolkit/__init__.py b/discotoolkit/__init__.py
index d934903..529e06c 100644
--- a/discotoolkit/__init__.py
+++ b/discotoolkit/__init__.py
@@ -4,4 +4,4 @@
 from .CELLiD import *
 from .GeneSearch import *
 
-__version__ = "1.1.3"
\ No newline at end of file
+__version__ = "1.1.4"
\ No newline at end of file
diff --git a/discotoolkit/__pycache__/CELLiD.cpython-311.pyc b/discotoolkit/__pycache__/CELLiD.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d77e36fb8e54a12eb9e7d8f3e47540a3f47fba08
GIT binary patch
literal 24971
zcmdsfYj7M_c3$_qG0&dC;6;MOfZzil2|y4DQhXD9QzAi9e25Ec1r5;ya7bVV+cO{q
z>_7{x>2ScnB}f5H$RW$KE=k6&C@);Ko50RGW>R*dKS@oaI)%xk7}FK2V*N=$k(Aw%
z|KvNj=g~a?xa8XYku(OkZ{NO;b6@A2d+vApk6bQ0hb!>+HGO}3j^q9ZJ>)M}&hym;
zp5yLuA}8|wT$g(1yLk3&=rXdqsmsjnmM#msTf406ZtJqKyPD3>Up8Rxvh$ROvEMP^
z>~b={soyo=?s5-!x;*UJ+%F6WT>|`k$au}0C+M0l@9Qql`$UUq?e`B<bX5#gc2y2k
zbyXpiMYQ!-57czk46N!}#h%OhR}a*7)$&{!C)#guU2DWD(Q%9KT3d#Y{GV7Yx=>zS
zA#b<t$rB2QLa1K!;_eI8_d3P$Tjs8Hh14~oUl+eZ_mod%6DyIbLFCVKO{>13qB@)S
ztT`4B#$)i<qoNq=mV)tcw8@aQoefIC{{B#Z)+&bjgTo;aQ5=K8h?ooi0$sy38MD^@
zXkTAA(w7wmrBHA9t!^<K>xp&`Nk|zF4}_vaab#yd7VeLSq$9z2Fl$l$$j{YtHPmyx
zI~?im569v?hJ5vLFT=kP|6g??^d6VsQg~Q;iTuZ);}fEVU89BspUV05w<R%+3GP}U
zoygY|qK}#qrUbwIyX!;~Cwb!LLUc|v=xO*N4{BMwVjR^c@(Gj3k6_?GG=7YFeNw3L
zs2OxWZyHOSS}Av3D^Qf+K)od&t}iEHS|N==G>N9)F~n{8bR2ihn)mk_imj6{E+0ot
zuZ^iMc?9%P%}1>X>k2fOpP2^BZ%BiK<1qNmBfmTxl_U)7cqkAG_FN4_0yo3)tEzvX
z#cmJaq6sh%!k~?@$L7Mt9^VS=4nzi9f|3**W&we~`K!UfP@pq#IdCDD#qdgzPHQ5)
z8j24|5nUlH!;;)7-_rqBULXv5uZBT4BR<q08VE(=fp9Dk4D{qG7LG>X5B2tjd%}2`
zBS@m|9eLrc0OoOEI5032iwCX-Z-fHztD!&))eS_32CjspK(seNlXoy0j>Kavn5rgG
zjP`am*|MgAP%r|nL6yzgF9rLDLPsSjDrJqaxR^CW2D3&n8qe|<veroGX7H_W42k%+
zvivZT4t2l9+~F)AK>|F%8O|DeqBmkh8nruJ-yUnd7(>gq_JfE>-_20C^+@z)q(2%I
zW35L{o<Dph9*y>2568O)hnok2;YcgvI=R5s!$(h@3Lj}19L`qvg(4yFv{1LEVY&x`
zgVH(_w-*1gU&0yV7T0l3*SkCJ?3k<{-!-vo>`2Dp)IHkDCajbEgng_dW3?qaCXP>b
zOuYQVAB_FrvAa55b5?erQ{3m$wsRS~<J}E+Hcax<{B*|$=Hv$1zE-iX9Xq_@i^q27
z#KuYQWXIIu@s^2}>9z;b{TnH9rj7nSz4Gwd%syq~uDR`hW%x_`XC3tS$ffM*l<UqY
zb!X(&XJq>s<nD0a^-Wby8>c(y4_S;%pH`|j&m8`_;b-=_4*HvQDO>l;-UEvFfb2Ms
zHXZm1Rp)eUm)Hiep|5;7wmbG;6a1J#G>r0bik!QO%rRA9VflA-Pn59bm_J@%wO}j-
zMpqh-2csWLJzq(paRepmS#a#i@+?L)>ErQSJg;jYk0Qg13sejB63vu<yr2Pqd0X_X
zZPXC2$j7F-s}zY=eXJCi&(bSlxaQ1<E}?C8{takz=lzBJZDN^dzi)Wx(3dmHiOySS
z$7`N^iX+^|@P3ki<VKD0`oaSjZz%c`Mm-JsC$LV}$UA+ZIAa&k4%B<RKNt(d;u4gA
zK(7=XV4-Z125yF~#KK^*#BL<U=K}F4dq0Hft@7C%|J$A8qJh9E=mNX*(Md5NZlvbV
z;&f(hDhn7G{MOY9rK4MfTG29iz5m-lo=XrdcoULBksc^bcpnSJTUuJat;+KVRO8i9
z?}7O6U}ztb{@9Q;LT8bvld^`JSF)zQk?`ObG#~KF0zq&!Dnen24nWO`#ri`xLj6C6
zMwAZ#zwOZsagL-hsfu46fpd@RLx+5K%_zU3c2Y8cwfe|=hKaT3LB+R*=4QsgQLZ^Q
z*dHdv%Fyyc)<j|;MiVg*kf>+BweMvL6^PwGvIaHn4pJwz?Cp>C1p8zA;URsCUSCG^
zSQDHvE-f5Ve>46`|Gic3ubMt0d+QZ%eG1O{!dr>POOr2ah{m#}!C?Gq))E4jkH@m+
zf$L&el1M4Zn!=IZXx1i$zBd%2cScYlk;bpKuqiqif)u$aVRU4zJ<&)ULMm$*6tfN~
z6coD$!#&sg*_-~Mfd~pBf*~wI@g@`Hs`k0+V>BQvHW-b>LfJA5Otro-6UrlT!Wt!8
zR%i&R6EF7QKjuZBf%&^Rn~=6vJ#mz$9jh}{^~n?8SUABqar51`@4P*IRrYLBJew9d
zzO3%C(>-xw^0Mq)t2o!Doog2@oYRj)kNuTX$M1E%-<f)8CMq}Vll}V@|NceJ>^j3g
z7OJM4=`{yr;h-WMObZ7euWq=%_i=S&ddm^H`pAFkTQnMKeP42Lz9whVhSI$ilb2H-
zxuSK}FAF;qVaI~7cV5^#cUjqYS{6DLp))OXKCY^ndOIUjPb2TmnN7`)_;l?d{1(l|
zYWJ5MoUh4Ao>;VV-orflaJE`$>%d(W4k^N+v6GJ-p1a1$lkd5w+_Ixianz+vb*zWK
zV4d|c8YydK?H%gNn%OYQmPNWT3B+(uTzZAxTOtsyp;%8rUqm4JnIJJ?INauL^S2GR
zjkis=h1=fSzI&Wb92DkF!t$_0aDYJ<^jeM^=i&vu7A%pvmtA@W7;7m>Lp8}=E9u1$
zPz830oC{iTscz46<ED3N$Du1<XQfM(x^xA7Tr}%hCzgVyJEn2|9pkuRk{|ni!VIBm
z88yfKdCE{~K|rJmBBA&ef~6!+(5w+V*9wBHG#+T?Ayl8MB+<I8P}R0kv97k#y2jTO
z=`M<)c%T?6mNB79N?7iCXxI1Rs5QPJ|BmXeQY4lLyHsI0UW|*_)@y~@E}=~`O0N~#
zuaLh}bct>VRga#UJN&rex!X{9jyCizq4U?bq3^ldkWutc*oJEUE486v-1tsg(%8$x
zd|`_>7kg}+zgCrh(znAB)y0Om)%m!hQ8fOJAz{<5I`8oD7xV8qF14!YFZ$x!ivDr#
zT0zaym#vG%uM|r^PDab(yNY=y%Jeyk{_Bj_Nh@)i3VnQVzo+F^xomEQaU)ih=7ze`
zl80UK19>0jO9`FD)&3puj^|FpCk6Cz^)1_|U97pq-4VvQRG?T|(U-7`tHjmg{KMLh
z!A(CY@bgheTsun(;R%PnMxzex$VoW53q2{Wxy6k-6OI+ib?WoE<4u~9hF*iX_Ll7q
zRzBlCv2L6n=f=Hyx_HjO#Sd%fw}=6;{^7chA?ZFLX~(XlH_0ctq_5W`Hej3|&4-ix
zy4H|?66=bstf!`Q%<JElw9Wc_eoNa%*M<dNnsA8g(JmYF>9luoy}qo}Chgn2U%!|T
z>xc{WapHzQ<i(8-H+>9w{R#fnYt)r+sqc!l9d(I~SfIFybN*UmK7MK46Rz$8X9#nn
zZe44ZEar}I-)$fDh)oHPO4GmO9vBjCakKV$FA5U3XhRTVtM-}2Xx4@xhVGeSOzcRw
zQcZcPwfBVkVT-;MsSU7nY7JImARi9VvI&b0tcfB~m|n?HGZb68USVq{QqhO7ej@XX
z#()vH5{lmpg(8KORG^2lC0j=z^cGv7P_|f>Q3gRNR9IW(R$uz1QA^+g<&P{FVaybE
zHQaX<io34Vn_(1kg(!*i^bd(4kxF2s<&vrkx(=x?R$R?Vkzk<lEZL1?0W$v{Bdb~<
z5$GHm7#vpZSxvjq0x&v>upgrav|8(>SS^9h5DFohrC<c=eGEn_T0REV6=*mjhTbBx
z7tuhKSB+>)<Ik-SmKMD=5FMmO5B8&2tN=AjL{=z~!ph9IeQ#7MRI;?^S^`Hxy}_aW
zIB19lI-`+LOQETAO7l_*P;RmPj~qSDDtrYC)TiafH19y;dA7{f)j<uSHL@C~&^)Y7
zZw6r*1Z{Z8_g10sr<GSw@dJ$~u{i4sNxDLdX2RS`0Bx2ZK>BRk^esY*wLqh~n%CvX
zUZ~-D8$p3k)<XJh^=+*#Kz<#mhtFQ5@f?a3`dAuL=MXa>==!qlg`CNBiFu-QEEh}_
zGzv>!27>)jm?GGeVlzc!p2aqalp8NFu-YS6x^G6M>#%x`v{6)=ILttc85az&ofqv4
z!l+cIPzRZyJHH_DGxBmrWH@kxY_c>Sihan&MCh#^Os8028^}>7a&Raf9RLf0QJ^1l
zQM?9wusZm+wMZl}BoY%6Ndbviv_u?F+Ct7&a!7@fTF7aGGvZW@F`+)P#f+@d+G4~|
zpT-I^vPy4-ITGqC7-Do*6k6>uD@p{OyL`@$()K!R9iu$<dJHLqFTKR4SX1H!ISqMS
zV(z-Z{j@c~!`|}Zc~07ne47l?PTZsu<Tpt#;g&Tswj+^ZCSl`}%Uc=7n3X}=ji8Zr
zss*|mY!WRhEoXT7l#-r=^DEdNI8&v)==_badeS@@&v?q;9l0|yy<YazDW19%uXxsd
zB+j(|==z7()1Ef`7R{W?_k?-z#Fp*hG&HA?bEjaVB1Rbry!gTkFR08B%ohCfDqK+~
zK*ABo7}h&X=CHA+<BPDm)Q6Mkhmr9M7?>sT^|KUl)g6Uk0JeKrcSOyWLi9#R>JJVM
zlC)#|dTVYUV<0#j7>tq)D9nap7*eYzXJ+i_8IpoM!)!|erRQw>I;%CQ8*G?df`^8Q
z*1V7?84RPa(54C|2P7z5<Jb}u`G*F*I!D6{;@j9Mk%bK=Hw*wYSk}b$HVkZRWZjUm
zpgaW2)8JtLFvdz3B1_DCVg$#@dcoksJ=$Fe{kX~c$%xmD#?;0c*}Bp|S}yiyC^AOV
z5ku;+;Gewso@2_9G-uqE3+@f`?hWaUJLiTzYfHN~$nMjM`*hNn@%WN<Hg0t7i6Nx6
z=PW$>)+}27wMLfGHr6XZ%WrII`)Gw+ON>GKTv<<{OEjCP`JT$!VuPWeG!TsZn3q~n
z2lU5L+_IMQAt@Y+seNW5jS4&1=$~HJUD>jNFpCkF$XgDjH}Ejhv{F|r>Dm8`=&?g^
z#<*{6Tt(H?$p>O;+XvzMVcFlP_!}4e&GY_d?Cga9+%$LbPaVH-$Sp^dmLs2arTxva
z|FYu0oGg3dsZQIfS>KR>mH59p0F7iKjX~$=CT&gnEtJE?+479@U*YviJ&0{;8q!NH
zkrxf68`Lm(u{l%H2WE|0&+8T(#YJ05FQy8M18u>gMV~WfoV7D+90*5_feBL|!mu0b
z2}MLOk1r?-)*4wOHYl4c5*cz}PQD6lE^C6ac%AjP)I-6RV2oM{GZ*`eKqY{GhkL*u
zsqT5(u$OGZ@#tW8JJ1O1^~(LdB<knLh_Ui9?sK1is&RVTAH4MYFU_>i9i4tjUVTVe
zedtkee)VzLcS7-<_{^*LUP)R%_jo6Ur>kU7z2d1~@HEbQ8fPkGPpjf-P1{=4DOqSj
z<Zr`2V{r+L%)16yRIRsdz(w%a@It*rfLpMGuaR%qtJgI4cnKpccAdWIwbqp7r>=el
zII6=GBuZ9guwfMJnvl5mBE8zJw{g36$0K1XZpG>CD@B^0g)&itThP}LmRF5cI7O|y
z<kHd&iVF5leNM2p>Y0!kaSM6tdZWaC`+Ri)D#h?@`97`BpSN%CSQDn@H!hP_*m#V$
zN^HM(Z1G~XxS~yZd`Udf)W?t8x=GZqYuuJ{=l$9{#S_h?TdsF(u&|URZM{a(3L8rS
zIE4{rTdAi?Z40B8<!Mb=V6m|MA^)(fq-QM&3)qsqGi&aXqC<n&BE*_g$94;XGleRY
zDk94ZIn8Ex$^1-d=YOF|w7pwQJ6^(MmkLL*1aX9#m^!5(@|CWTL#k7gImbv*J&2p5
z6izHu66vKO%ZHhqR5_=r?|y6B+aDabA_n)5>|IGf#zL?dh+4fys}QTHzeNeLeQ=<=
z+`Te&^|9caDx0o<&pG8>5bEcJ`j6Iq*feugUiYH1?!~#?X`x;gjw!;i&-gEmhAM1x
z!5QOJaS1jr9c$vH>&Q;mJ%#1OPw?ihF}IvJQ&>*$<0eYOaX)qZCQFNFPNQK<<7UxV
zoG}Rv3~k)lXTnnCEOnIB4x*UAV;jKI_vvv59Ed(zlt{FVt1d}~$@eOptTm_2#HiZP
zWyZ#|OO*Y;S#Mz*mhBt!&<0|?$XH#zcxd^Oa|+8B|K#wzCy@38uugIN-tD^6HEohz
zYZTX->7e2Yq}Kn@=0Ds#b3yIX**0Ze`|QoRp`X3|D5UH>^;t0OJo|(tM-HrGvGrR<
zMg~>!p|uzG!D$hlbug8jM0e;dsC>uqs!6~eH1@kyT@T<SDGP-UkRbCQkP2~$mR=HZ
z92Pt4L>QLdp&qggvF6Mgks4c`q|d9`V_Cs|m373TQoNh3v}4(_5X>PAWz^)5So^<B
zv>CC2&l<2v8loq@Px>yE#?}R+<fBfK&ccb2fP51@gzMnJTrXvPOQ+4q=4bD;|3w);
zKrj6Y*BFOIz{Eff8x&6+`Y1Nj_M;mg-uUokxvo{IYfaa-D()8+-0k!3cG<mCaqmnT
zAKP5Xo^ktxJ!yaJ^kkgn6Q>@mN!^^eschOgSN@CDa?Mc%Kj*Op=PUEhS7hgD#d-R(
zUd4G4Q_^L}lyupXJhrl3!h*AQ-dX$Le5(Aznh)N%|HjNY+1aW%Ta$do?VaemD^9n)
z*EiLtxYsVY8|K{&vU`K#-jFmt5z42m4@}eNKCs@mepH@1_oK=WE2lk*(6}Hp&kN16
z(5eWnN%Iq5#l5EYn-&cQmwOTAYMd{PrgG2%Cu#l0&Xre8ZJoX;`!*@QO-XBJbzmB|
z2~WmXv*6n>@7s_XO7+cEq<tG?-vPyUAZcAR<i~Y!gcp6E*EP;mJzOK#?NsV^E}9Yb
zOPU8L&tp#-Q6AL1{^@P$#!I=M`!(79n&N&<yY^yp`ilTM><iLKu_UoXy01mA#Ip7)
z!8lfhvG5465Li54feBQ{(MaHI!~a)T;oLR7!to3&``xXhJhm#B#*P=b#!HkBonlfV
zJjRXVrg4LQ>*Er4kLS6GW)T3&AM>5i$%(5-190*@XcYzgE;%&(r3rH0g_GkyU_UvL
zQMQ)^C+yVwN9tD^(1j=tVl=b>`_<oZf5(xn#CV*ysO}S|YthJ+t^ZwXT39FB8WdZ@
zM~7$lA07Mf*vyI9p1Bug+kVBif6-V>qD`*#GX^7#p`0e8^fqqHwj=!jzO1=7EX4pk
zQMoKJm#if>^{v6azTTlokFHY+y7~_hH78{Djsmux!t>Hg+ZAz*%m+A5r&;Rt&S%ko
zAwKp&eDGPm8@$tW4QNg*sQ`ZbZBRkTZz(*8o@5f_dkE4liJsYzwU#=FvH*g}e^LF-
zl~10(clrIx4|+aoNX0%FxIZ9QY*i|@E>yfYU-6<`v0bUyUWlv;ZGU0y-Cf}E-C4WF
z?}|l-q@Iw9?dis{jxbHQGZD}&y#o@OHcK=kvi5K!Or|87H})`gR{(Mc5KPr*#t}o+
zbF74BhxJs2ESW=uW0h!MRN6z%UUEn$k@k~wkem*3?Bo#BmJX3~n4BZz93$tSlS89K
zqW)I}%|9a_328_b0H3TkgHi+wS?M-~pCBhm4z1R*W~^UfZzsTE)<SzN!8nlj;Tut`
zP=i+p6~)xgE5lh^&(J^*RJFMD0gDW@8@5fF%GJ5YW+BNJbtp4&^9ntlC+8HUu*U!%
z?unDx2}&*Pph(>m?$&pP6r(^hqao|WA{v;iZYnCKiaVN~>da^Jy&R8k(nSI-cO5)<
zP(@+6^)p@~1I6q3kM*MRB&+Cx;bc2!78KK}RBzhUESs7YQ!_jyOsXbh4-VZwB|DoG
zXA>}4=2!SnyXK64{^~Ef<mMAf^9eQVvCT8#N>`qgZ7(afm(#YFfz+~j)7ILI!$0}G
zJNuIR9+O1N2(Od0A_X!pH7s1ss^m#TFZ03zGu`(=^#5aAn3HpsCl6@RYZ19&uk7Ea
z`1cWdtuv2a+a(LT6#<I(?jm~axLkewznr_gB7Ds)SG3HUWT9OV+82a9^TM9F6UyFG
zvT#}vPN#*_k1MOEA_QOCFAGh2_*xag*GR92lRUoY<7}?9wHm@$Yba+Xa}vSTO4Ba6
zZnsjm8#1_(WN;<P;7X`2K)(5Fr}m}1?C<5&8l}87NdUX0TFIy9XRCj<R&GA1G#`|m
z9g4GK>?k7I9Le@~MsAHvUYNJmq^&h<WM!P)7&m34<&;79g#%%~TujfRfpc9pltczq
z4)?^SyXz;1-rF*@MFog^swNT-j-)C+c;)^pil=ex#G=J$E{Bv_>NqXtnnkYQRCARz
z_rCM~cQUrxnZ`NqBXkK@SqDU6r3D05TE;q24>b1csTR4SO{r)b>%{o3U9YU(GS;b>
zsu%4h^9yLryyN8T)eB{7=F8Tk*S5-KFDPX%j2&9E)DR&nIXgIWRoYZJ`Mv4J)ZrPQ
zY}%@rwx&&6pHv*XdpUJ!&hV%sU2#mVIHpt_(@Nf_`#<+qPR6E&Q{JhB>|GBWDxi&!
z{}>!_F*;N~Rny3->7;6YXZmf7Jf)&#tn+i=H9Ir5%8Yw;Mp(NbY@8Q1rbhEaa{s)5
zp+~69M{TljB;#pGvtLH2T69=kC<e~g<cu9%6tLNn_O+=$#rYzFEL_DU9)j$V`7d3{
z!3*%o{)>wL;@Ht!r}5HXId<x?%Qv}8UG-2;?qCk|-Yv%;)j#U^cMa(+$MH)#(-oUi
z^{I{@HNb<PY-`MH+B^HbIq#pu(wp|;mvm0PF&#|tAM{MVG5LmUTc6p~KFiP6|A{g6
zcIxdVZ_~E*Pn_O&Pu)3{s!f7hwkXb)v7=8M!q_QQ*P(?-wydyw)1%Qmn6;7Yh+iFq
z)r+Aw9k-pgUANu0J$hp@5Sh?X^4GP3U$40UsYqfucNKA)rDw8$ROE{1n6%bA(RiKF
z2jtZ9IW4g|J|!ncrQ{TicU+=bwDcLqxo*;C*%h&i1`3CDN)rkM<&%T~m`xkN{u(<~
z5h<D~*a!1D(LGUeHmn3^N{R%#O5?$5`dp=`cuKv77`q-XU}u0XEnrIHq^RdE(LMrS
zyf*)g!wn?}OIUX`*0qE|ElG6hdpIQ&b13>?3H9cm^t7RVG`@0I@pxq+f1of$4{(@*
zzMMOJr1m#WInImTqNzn>=t+c?7sIG6C>AMx<D973+_*+M4ZBhWWNAKx?jPg-lR^Nc
zWqG(TZq?Bi7L5qCXv)GWfkM=>QUxSxArp8oI{gnTN?L&CNM$}&oOVWXSG+CnW3|vw
zuWx%@JY6mI@kXH7>qCmv-{B%%K=u7UN41W76sjXu>g&mKVeXC-|8D*G7&Yt15G=@K
zn9E&pf(qoWQhTgEe<0O<59c7w32T7_ZrqvD%))#=r6cPGFS!y{@Fu4|HI1T3eQ9Vh
z_tctVjzwR>s;3LbuPi#6+|NamTc4U}niv`{34MN=Chmz<K#_Wq?j#S4CUB+W&qUV}
zd%rE|WI5Czhi7W-(j4wMU{s+6VeU!*e=M$gxLOYwNIFmsmvkjPy=JlYmMvi`%}3wr
zqh*9vO_Y_!8ZAqdNq?>5b_GtHD8m-*T0Lh-1@d%h?|RNOY6s?4gFH>uYvbnqaazpg
zE<LTOhJ3j8E(Y|ms0`|T-0L;{{ZCa^y;@dky_TlDU#t`B`wZeb(cfoy*ih198}hNV
z_xS1Jlek_xt4ndx`rPzwKrMZq6RohnYzS_Eusa4b5kq?djYmsuI+Yp-MI@Z#Wyg1e
zfFusbfMz5ppcnvtQ;)=<Gy%YCaLN~_>3XC6{n48Qq@h^qX}iJjV5lDm${cb{kHcgZ
z@@2t@7mL_SkwG+793&1IQ`Ue#0wPA}n>QPYgsp+zo~Acj0&mj8n<|o#0DFu)vI#Kq
z-t6tsyu&8~h+sejfk^7pV<dC<%Owy|9XRmSQan;jJ!#15-2xaV_9W1V47W*OCSXko
zAecuyl7)^nJp;b!QExbw2uMio2sGgU3Bgo2#)zhY+4X7ALnzSQDyZ@NQf%f~LOY^y
z^gWp`1rR<u?3VAu-f%y_RlR{QJIc+@)P=;S75y~SBd~$FbKFJLBM8X@9`h}t&mLG!
z248(X%wsIv7YXB>IU!ApgOouIF`5bU)hvZzP>KQGg@KK61#DPQ>SLps1{NiXsW8!z
zH5V}E)UjFwKT?CSL8<P3y@&ZZvUO?1KC~t_aWHiNe*|dvW)yHwc79O0fh1W&Z%-$K
zHI4`jsmRL3u~0%Qx?2J3I1WP2hvH|i_dgx!m^INrA2Cr~m~rhGj%1odDLOdv(qV?U
z&i5%nQBY^rEbJ&N^+91UvtXLiIGNf*w!i#Ex<6^0U%d(~k-mPRMMtFav2^#+lWN7?
zX6*(ezB7T7Xd0fZ%On4=a$cjBgh9Il4RnA;kI$zs)mrqNaru4iL<s<>NFy{QZUxW~
zR%xQ;VaY*j0{9ir`Dv-up4CfOwfeoDLR~zd4~n$t<2_A79);X5t_~#wZJa<~u&L{T
zNnUyzN3;v8&Uj&cft6ZuRmf?}PS~_hdf8K?uy~44#RG{(E#?C>{kk%BrC_vZYmc=|
z5pE}zX>+FUdn|u$QLllB23lo7w^@l2SAG=f>-+4KJO>r_75kt_g{}qkH2r3M@Er6x
zv?nQbqw*=ES3gEq%t2Q#eVUXf-v`>eu6*4n(Wide6Z-;;GxBeBRQ&_TzP`Nt_*$`q
zV$bU8@U-!@Yzg*fC_&f8Pa8YSmhg|z`-w7Y@r0eZ4z=~QAAcy?XLKBh_MX_4C`;J;
z3<*cV*=Oi0v1RFLW*{2Zs2eGq#0wJkgoAza2m55B9-N`FUuQW=e-Yo5%N)1jIo*Ui
z;lU|f$MQA>?L5`@RH|SLDdvjOb$zR+R5WY(mj0ctPc^gPY0_fl9R4w>E^_#%m8X9v
zI`5aP@UbGHFCe)(r9VS+sp=FNsMIY&(vprz|AwAP$CCaOPT%+ae|-FJMs6MWmfzZB
z#TOs2^NnvmNNNU5uL4S;=Q{04M#QW&hw;ZIC24i|UPD#ZbQM}I+oH??udj!OW9n{b
z8Dglg9d#>|P&rw1XkaittZvZ)*^|TNw25p#6(4~Z8pu*QG2GjWz#p?DLlMI5kS>yS
z_J(5s0(Xbr!WjYpc-Ri_QM^&HY!4`oi8h4+5fTc@SU1#fl#~O4t9#30Z!8Zj%6;!B
z7miOKWF06HAGcuppse|_ZcS!G{r!X~!eS(sah4Be`72qzC(Db{eWK<s7$Nl4wHR4X
z^E;*aySiOkJ}5;AtlX_BZKNf)hgw`c76En3#81JH7qi2D4)lbLtJ;*hp;WdkRBoHE
z+&0@GSME^gw{I?}R31z^GD09#r3g(6Ld(3+GV?uIXjAC-5>PfZ>pt2(WB;k=Cmy+R
zm(sXPZrH6f>|SU%INxyaQH9)aT*0sAMAA*0OIsIKx6iL`&$!ByC#Ku6AyvDBK5;=i
zX0>)~tb&y4HB-Znox<Jn$v0Bi7TO|vw<_MPpT=g}=PG};Pj2l{T03NKhwMD0I1hbo
zG*sJ@PVAgHs};u@`Wi=dro3u$I|_4FP1nymR?CjnPlSLXtf&3V&UC}J*<(t>p1J<d
zb}0K_mHihK{{`gY_5H06+f?*7T|XKB+HCNyK_2CGif<iDnA$sQQ5trpcb)ufqq6Iq
z?0;49znZ>qIqkoU<mJ9Et!A(9mrl;(yVElsng~q>zw}tW)yWQQQ+n4ZLV$9?w#!Vf
zQs185dHgervh$4WKdbo9reD33_FrN|{7+xS`wj1HnA(u>m49h9di`i3Y(cv@d~-#u
z!Krk8+w4ZAe%IVyWU(ig1xx(3g+m>J^f&FD6u(CL+F#l@WLf@R#Z(1l=`iQA{6^rs
z{=0{#-RSq~7TMpb_*-S+1x0w_=i43&<!S%=)Fs8gRTi2Rp?N{rHZN@Z`QbU|qf5$5
zr{wLYmF>CjjbICNJMEKi$JVUD>rQqgJ8--KSs&80_3LP|eYqx+h4YGVK7HvmS$Hkk
zfuay|`DT%YbBb^-ec_@kTr9*~w)RV7E?2Qx?B;Es#n^pm4!*sN(DG`<xBjD7KfH`h
zv$mNVbNpP}?2UBGetE+IWy1m4cTn*i9Jk`=_V$c`k8!<cp01w`ey~CI1d=8iV#4J5
zJA0CQ9s@zP>q%`OvubUoc2lOd?hj7<{)tR=qf)&!Q@eHMl2W^Up?24N?XJ14k8UWt
zJLTFlO6{5Sx!2OQuc7N|RsoQ<rV^XeYbvp0+_3SZo|%@pN@epwd1HsNu|r;WNLhF2
zaoxIKn`J(;0A&`S%mNJ<@^amFrEdFTo#6E?a&W#h`uylMIE$PQ<ExSLS5IzdTYx&y
z5-khZl9RA5cMUn%Qq3qoZoGyN^%}?L6^nvMU0`U1DF8>SaCGkX5F0SeAL0ho6k&t1
zM(})mI0nmRd>|5-$bXH%+zBWhEOo<ESX=R{Mc)udYep8o0_dpnqWYUVKRG=0b}Bq;
z{4g>bq(2;ZQmS4mMEIw#wDh=GW$C}eFU^zl53B6|(N-3-g#G4>lQmOoQ}&r{A9`lJ
z^p_62sCc)jZ{f1aesPfKF-q?f6;tH=3pl!dhk-#w5E4@8uDO693@kwqmMqW{KueDs
z#?3fH33ab{<UfZLtS3c-jRYXEX>c`J)sf)VNV0CRS4!iZ+Du*KlgasSaALI4%TUYt
z0dfxkBkP|wKuG5xT;bnSlLZmI02(wOB+wvpZJcZMqLnM#K(N6EY@{pLOLwlCKAPG!
zbA4{N2B~4xq*Bu^ahHxM(m~inBE>W}jV$!#uRq3DMKM8O5mqM%oi1h2yYC|6Qs~{Y
z%>=+axfo4JOa%8X^*0%tDW9oJ#%Aj1k5Py|_$@?YItPRLOONn)0hoTnm0<>y|Kddi
zG;NY-wXVU*(XO^MLk_J8R9M?T1RVF<$R%qY8pO#p=|518x5+s{4(Wa>`i^!|nO-OT
z6?{PYAv(?@V3={zLkcM?d=Nkyq-S$4J`EX{-l7P1$oWfhh;KCsD&m@^Ap2UC2L0wK
z>T67|(<*Spsj?OcADx7@>Cq3_7=00oPT<85BNWpQVm5yNno47Xm^3>L5DqZ7JP^*i
zWFSlb8<E>+=$50ZDiRJ1?wdvey$wCMa=%}8Zc&_D2;jCehi_|_Z95d(j<jtDWR%Ty
z>-&>!x89!SHAtm&<q_F-RIwdR+X%2yh~u5Nu1Z^1Wh$$n^wrd*{L}8FdBT-+k&al+
z!lrE$LWuyJtNsm8a6lb-f#AO9e&0R)R(kVZxo)p4>{A3l%Y-ub=T&u6iO2r+=}kLj
z|IWXv0*;KnS_TJN27R@RP+H!}>U3?3>?I(Xvt4nvFF5ziJ99uX202eVPd~0+b>9sj
zS$Z`f;mvv^Sq&k{Y6wYI<4&GTp8Uqm)vSKT*o#iCW?gEdQr$FjOs;-$_B(Som3BhC
zjh#&P%BG5pzb0dILlRe5{?<}P-~AllG_eWC9nALR#&=GmC2i%C!Fg*HJ}!rbo^DV1
z@9&W-Hzv(VaU804#XZmao|I+g;-p6wwkg6koYoa8rpi>M_&d`#l!}eAca!4X^l8IP
zX!h8{0eSOY1?}kFD?9fo&V8f|+L4<XlzJ=wk#d8=Xm;K@mG-_On@%aFQ)$yFi~*m2
z?B$H$8#|e)tV>yy%EqxXnToZHL<TP_uYO>hKKX(BzFS4tFLGsO^fR2KCF58%y+d)V
zo7w!+)}OS>oA)T2_oN;4%WT^<+n9WL@&`~(t6Fd>Anj~@;;hU)<A}h5bH}`M$LtN+
zxnFVaPdoQNanww2R2=m)H9uYZleKcwE@1!C4*F%<cFx+8CnqD&XDheL&gQhU`H8b4
z_pCUZ7o6MYo!e(G$<Dosb8njc7A>#x=ITXG;{wK11z1;JuYl*#$^Y2-?>prq7nCCx
z@F-VaR4Ol`{&{JflgT50@q<76fqdYsa^NgNR8@8CBvve*gZ%Gao$j3p&K*j-56JEV
ziu*uL8ZD71D~8*+x|YQ<5=mdOL|-!(eh(mOaK?^f5O{V&22A(Ck=?r#_bvot`g*7z
z{EsmgYo>S3c+*}iwYDhUEeqcEd2hSy-KltYqL;u0Ece$g)U?dkw0t@w*R(4&?F%(~
z=4<xMb;vdQm74uYOY+9JJ5#l0TAWHORBfKG+Wcv|T-ByjwJlWboUhtB$IDf_m8#u{
z7$0|Lyj2U{hIwzpjAho8di&A(&&-e3q`eKY_l)8_v*5ij@4bMtC6fj$Ppt2qngUL(
zUUAk#1p8>6=(8s+AYZ6Vx}JE-lW|p)89F<gES;U5%zDu&odMIt>3sFEX}xq1&%nCJ
zqLHjc{c47KddCb-KA5vA7^Dz`cxSCwgR!gq;VX2ID2%tvfKbK!E$qYSEdXH0v73YM
zhz?6wLPHwiWCl{G(ufor6>@G>KP!VzRSrbOP}Ya7PHe@B-N6{`kI@cq)_uG`dL`I@
zDJX>r$IVdZMJ##NeuTc9ei)$km_%SLi7b^_$MI17bSNIAJuvmW(mHzANzOPqBvIKp
z3<jlUGDP~E!Wzh_hl4YEplPT-v|r*OUGRgt0li_-$n*RY&NIgTGMr<K{bjiFbpH2*
z^QH5@3|BVB{xlx<82if<GpR~*x*z(FZ%uP8+FyoSpI+{l;TqEV-;)hZV}@JyTONg5
zM>!oxbNg5NWw_1h<$f7%V_N@PG#Pl)qP>Pc$OF$>ba$`i1B;yAImUBq*JZHY^HyXk
zDl`7-jL*O5Ic?;<iyXO6+`>fvqL~F)I6S<*Xk`yJ&goe!V_rKalrK7%m#hYhF6MOu
zf~?I+%`fM^X(1BbTwMbzvV5`ZbtCUz<jBo9ToZd2jpTiT^F@h8Gka&DtgXxo6<5%v
LP}41S*%<l1<d%yi

literal 0
HcmV?d00001

diff --git a/discotoolkit/__pycache__/DiscoClass.cpython-311.pyc b/discotoolkit/__pycache__/DiscoClass.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7560470f6b25dad4359462fbf4cc12d54dc6b25b
GIT binary patch
literal 3209
zcmai0&2JM&6rWvx#2*Q1`LGiwAzMlThr}iU0;QA#NsFK;s7>0^RMl#;J9ZY<yYB27
zn$%KLDTf?-;KrquIMGlpJ@wo_fkc+FTD4N8o_aG1Cr*8DXT5RaM2x+^nfGSiy!UqA
zdpp1O^rQ&1=5KxFzmkOfi$tqMgTeMS7(6B#nI|H-K{R2NXyQ|mkmvC5HZS-y5txbY
znGv<vEXl{0hf-69V%t*5Vp2&r9LjXFENP0P*wjg>t(~y%3W)6q7(6DF%nP7g)P(s+
zk|c?i*5Wj(b<voXpz%^%ORmP|yR_6QnNMgUDC_p(ANv}VhhSk!g2rh{^-ZuA?ori|
z980p52UKzvsiZg#(-&$E-D@CD21ZIHW>uu=o3m4QQvRm4QmGnL)-}}7*=Au&a1k0U
zlnc^FpWU6fbn(*o__+^!=_<3(jDy6vG+*lIwq2v08j!O`i%~Pnqtb$*Xc}c)TGMR`
zFL___J<?NmzPLF(HvWEKp`kc%T$TMK_Xv)k`|kW{E)~2LXQ|q$vfU|_8iu^@s+LoH
zzi^$bT4qVtsHuh<@3CT0FE4T--BgX5M&&l)McvRCH6`h~Wf|Nh|6Jx+jdG!iZgPPt
zWiodobp5931Y=+5{5d#9{}>r{YRr^)aP!FW`7>U;LN&csnfp(JF!qkNnG*Wp<qkH~
zJF<^GYC6ab>>|Ol{j-31QYTN4+tKR6Q<#4a4{voMw~e2)*t`0=*dY@p`+<5Q9WrR4
z-wu?CcF45K&cXh~I%HyR)D!QJiN8^%t3#$$nMgeWZ!r<>kOT_KCvWu>P<~lOS|hXM
znJ|-2d%Y094=8hF$C3@*cD!U*K)lpWHSpp*-My|LalJ$}NJKB)mRw#kj3|b_c>M>&
z5;YlG3Km3dYLr+v5UdB`5JE4)TL^C>97e$X7&_}q+8A_UCP>ojTgA_9%Iu<{SEyNj
zNcG~h_0TjdMYD^3eRLqj?(2?RT^g$>x>*EIR4ZT>r;+gUrBGe+60)qDuvA{|<wd&C
zVq`(9{RhAbX(mYS_*VbeX8+i_zR`cV(SLa@`qLp7n7P7cuCTtmk-O5!T>)mVo5^iu
zMm94e>ysOqiAH7um}xgVu$4W#nLWFHV<UT^k-Y%SZntl6t8a9(Z*=|cM&D$kZxWb2
z?!fTYKyh=R_<MR|;A&&w>RPms=yUsr_ECQltY{?Oar0BypK{-Y9U2Mgh5z3M%ZVCh
z4G<XFm=W8P0FQ|iW+5SU!D(B$`xD9{b)hWG!kSsaFs|~Vm(p$Bv>gR%p%<|omKWJk
znDHVH6lUX2lEjWd$MEQuGr^nP6M&~+&XxdH$lvK>kJMkzZ={DC>ER}cB(v_(?4yfo
zxBoekS-ZvXDDtTp-Wd8Z$1vyeNjyJOsZd$=QnFmJv>JR7Wv^S7zpW`oFw-T=nx)Dz
z%Ylp+U4ZEDBD(3Y4A8!b&xo&s+l=psA)pGo3a}FXnqqG((If~b$oIkzLQ^E4igRK!
z(i6MXBmnM}4}HQd-{(w`d?L=YE$~-yI3A5<o1_!CF=&DcPuN-|WBUTIc3(_1k^e+O
zqfcS>c_>thX^~Z7KIUIdqF#!h#WY-emVflW6kjr>3SXSo<-~-GNma5+{=Fo)x9r?l
z%G7DyM^4+4U8__SwgkPpC@Hpt6+u$18hor2Q-kXbUQpT?c$=wEN5LBn|4OMXpU_?m
z9CUj7HUNB31Sn)M;;k+M6^3j*0#q2X^(as%>9=|eC=3$6VFBb}8}oXD`qi?*I~>+9
zIplQ*i+n%4l)r+H<0b!Emyi3w&M<!2X@oNfd4v%F8-3#SiU$;i+47N{7ax|Gaa=tL
zutMD70w(XEdje`hBQfj_p25D~&7I^`Wf1Gg8vJ-R#MvOou_1)x2*U{Y4tP|bz!ByR
zH{m3X5Kx=_H9%WTM@5Wjgg6-~{5k#Nt8ZQy)h3pO>tfKoL~2@m#k(en#fA<9t{1h<
z<Rfi{`9|zE>~-QQPugqjEUd@#weJBmBZ2_AB??h63?T8I0GZMWFOm0ZXJST331C<!
NyhPrseI|Tk{|EL_=LG-&

literal 0
HcmV?d00001

diff --git a/discotoolkit/__pycache__/DownloadDiscoData.cpython-311.pyc b/discotoolkit/__pycache__/DownloadDiscoData.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2a62d1a76870485a0d392a5d87d89ec233525f2f
GIT binary patch
literal 3492
zcmb6bTWlN0agRKb$K&xRimGMDigkX;VH}&XDLD>nTd=LxRb#7Y?Dm5|L7aFe@~Gn-
zvv;&Dm1<O=1q6eD89+tVCIN(^a9ubb{^*B43$z9LNg4-)I6#1bz#sl&;P%H~on7kq
zi0cB~UF^=z&d$uv%+BtgqEQ|}nGGK(e0Bn%ugSzK{ypGj4S?Sw1!a-KC_bIZGIaE1
zeRT9^Svm%?0Xhb=K{|%AAv$u|u;N!(oi9hSk+P5#7zFcx9xaPmu^h|B=sc*8l;hbr
zz`Vk#qsu-e{Hs7Vq4;j1WMu6VeqOcmSgY8!W*Qm(wu&tZT&~(h6KA9ws!_P3YSML$
zpO<YlBb}W-n?5x?bLz~Dbmm+pozBe6@YiL_{+^=QCboulc=%gdxw{j#O`kfOmd>8Z
zq^C2}Y5oPV#o`MF`lQbd>1Lsz83lKwg4Jd1ZcfpxyqT+F9TxfTYr3ss&_Z^DT{y|O
z(R@+OujDi%r)!q&a`Ou}ul~p`S6tq#+LfveEO6dOr0kU-jKl*kZI)QSfYyDsw+3U0
zs6xxm;4Tk|YRm(mc+{h^#_S#pyFK5&M#msK++%`js1)#K3Uj~%(J=3I{~+DE-;VYO
zksa&9HUIFw;n_TDFhBd-Iy)ra|7NK*wy)26;9I+V{k35!L%V`C<}*g|*8<S9U$E<;
z|I5OmVa|Bi!kl;R+PCPy!lT|ALP}r_Cbb~x6+eY3W}UOY)2Av$Jh&FBabE@>fX^TG
z{JtKx`^QD4p7n?1gs23rFF^?_{Fjjj5DAZZ(Xh_f_>$<!QUpc#wXcAb=pUHn0gD0^
zD)szY8u93V`HLZZ+`}v43e^?w2kyPI*LWq?S6dlbp^?_dtn<c+H}H$^@f-gi{6=5r
zH%#5vdne&(Iyer=?wxVi=Y!hT!oYvb<IrE(8StaGiqHc9AN8i-!NFU{_jMbVR|}W+
zd7G5P1psS-Asj<-!6BTF;=()Q-p;}C_2_Ic^?xV=WZf}Yft0aaHS#3qO13E}<{d*f
zWks@!s-!@Ila^#lRV1<sn1Xg&HKb*l{&)$>Wn8c_6e6>-YRh!fJN*oqlrmC(CgpY6
zvWTQE6RRs^8JlH^CQLdfGJ8z>*;9JwrVTlnaD-f{<{`_kO3NmeEP!C4tMuLpKxv}N
zX=z?vmaDqmWe@q7r<^}h?J70~bYC<Lco}XLHB0IlL@`xMGE7^-lv<LFRmof`fn)#`
zNi!^41zWvtG@=x-SJhH673YR6xm?lJoTj*Z1)GEiz^|CpTHthXR#w(VJuaz&YJVi(
z`N3=oo*E9!q#ANrb@?SZzf!G`CuohcO;cad?9|#pAik>V`mNQ9dXq|+>k3*>F1}*I
znA}QW<G0j`8!UpdDqc+bDf$IjdqJ+G%NDgl(T(MSQO*WVIe6=^TKN6%NO>{m2Ft3V
zRm+7mEnm;M;T~ynzw0wCm#xTl(G9A1;pwy7KzRim<g)Z3jP0G=3l2AcWuu_FemIyL
z#A?}ur=F_ev+`~PgOa&3(|2>lS=U!lT#h30)@_&7bTjV;yMcqr^>fF1dgSt^v8*Yo
zktdvij@2y9a6^;;jLzoG%Bt%(mq5EaShLk+#N|AdF!>_6f{L+;b6s`aP<OOkMspeH
zD{f>j5{XM(Kls7*V?zecU^a`SZ*m3p%BvrzegeL?QaXG%=)Wa3zZaNRs{1qOx-eH+
zJyn)9BSk+Lo?kuMdGhNZ|BTb1e8P*6z0U{<Z~R@5OH5&%1b1$r&OS7@Z{zyEgD4uS
zU%S6}Z*e=*5~dtss)Lxo+q)cJ=NfOdxCw`wXmS(J#nHxjCvo!Ohfd;rOZ>nQKiIg|
z38L7zBTj8xdmbJAO}cUC(+gV{T2aY~N*#14F!>GXHkn;vq%ppI(1{=K3fTGB5t1Hy
z!ii6{gcFW%;vwS*?`|^BrlxmFJEg~mNp0?%*^KN)qV@OhU%Gc``(!IJ<wS^v;iJ2P
zSYK$Yw1gv$aHJ_5c`hDm9-eK9vrib%@5$AcIQQ4XE%EB6e^(r-8#^mc@=ftdOT6NU
zS2q36WAXaWcZG@VwU&@<zPHd47XEp;CERF_jI|SEoe(?1e}ka=7wI<Hj(|qS>Z{vv
zS{JrSop54Pn`}FhsLwf(W6j8PJ2qbb#EGRJ-+mgq*o<9lk00E+yTdk*pL3-5pQN8k
zmz?pr2HRkt#Sd&vI`P>j`=7=yH{+MzK-f-<Z=K(s|9tVQiPY1H)MM^1F=u+AHSt3S
zTH?da`DbFhact{7N1XghJoZ#P))J?{8*t#sqdW6|Tx?CIoyqj3zaDBtz>)1qsSits
zDJ-<Ox7%Fge)wLvak9l7a=1fH?hpW-i{Luy#Fr2=D9K>*C?&_-P%fvK`CN`%w~wnT
zxx{SAGU0+&Ak)bfJ`QxQ53Bes0)&dPRn)a5H)!Q$qq2&R60CSlH<x7nr!v;$C0)hv
ztwr6N&C@`>N^(CY-(DOd^pR_-ePi#*z~natdCJB#r2mXCdDCzPx>wwVvZ+*c^%7oy
zDf#4De}}H)XBejA3o&d5k!}nzBO5VN+bFU@Ya1n+UTdT92CV~txIyb68ALyGu8C$}
fZ*BB$^S`w=nraTWPH=_+Geh?<Plq>l^pyVrQ*>UZ

literal 0
HcmV?d00001

diff --git a/discotoolkit/__pycache__/GeneSearch.cpython-311.pyc b/discotoolkit/__pycache__/GeneSearch.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..35e1c64907d3e2154c8e4172c22ddc7472ac44c1
GIT binary patch
literal 6477
zcmcIITWs4_mLyF{lw@17Wh<5wJG33gR+`v(G<7G_CX>WTYInw&#%Vfn>u4xlTB1vf
z8j^}1OQGr@i}4l#>H-~99i&)&%w!$JgUJ9pK=x~cL4hf-Kcqpx0)YYr7K=sxGTZ4-
ze!Az9q8?70#R9uDb?>?Np2t0pd+wqCsjV$Q!1LiR@1m{_g7|kla36ma_<99^PY6UH
zQY7Y_o}44?F*Vm>_w<ry&PzILzB$I8^UnpKr^Mh=>s)Jft!=Iy`4P>BkcV&OLwtCF
zin;O#<mEe%kMCSy5Od2j7eVAK5f41Z>oalE^1PW9r0nCvBtckon$Pl<hf_sPv3v_c
zS`pTGs|DqR6kX#-5bXfB!0+n-DDw%S0;&6Gq)NFkLEHuSer+tbXk?l!kciZXH7`LF
zXw_54UBn#iN2Bio$NM#|1&`(_H_CSriMs&ZuZ;@cG*R%ukAa^begXIeHSZ*`LC$*%
zt(vdgAg@M7sBLw4(dWX5x<zQMsHJYmGUKi__Gs5`CwJ|tS(oM!NodS>?+GM(>XM(K
z@_#Rt_c>I7@1<&xg?wwE9>{OtglC@FL-4z~HCfXQ_V2SX^Z#$--TU4kZ!>Cl-Ghrk
z7uI}QK=W%s<iE4ar-e4Ht=#J3HTEcwCQ)#!wK>vl{D!EiQ=qoy{a{SgR`*#MpRH#e
zwZA^qOpmEwfklmc|AdBlg&KH;>r{0}Bbzxi;6ti<_StpD+SCJeoW+O>7ur#{5K`mr
zm>?EwHbxy<i$HF5Hp?Z6ChVw7i6Xo4G^%(~9e39WLOs?1zfgxk`>wh+&0L;0DuhvY
zp+gI69Sc-}Qcu)5F7~>xMr}o#c@#RE_P@7=3A;ex2NXs@<k3QC59$#})Q)0W`&RE=
zh@$(oNGe2{c<gg|I2^#A?Z9I%@Yshu0;xr|`tHKTa^JpOFm9o%X}5hf`hK*3i*1fc
z$Tg92LmaudaIyicXVG^cUl+OyQNZ3`$EibFH)?P6fY#k?7c{U29R9xH0@9+pD0W%$
zSqe0WhR}gVDQcsi{ml}aM_a>p;cmGPKj*RaX5CR^->i3G58M_#+MY>b{#+rZ^?Y}B
z(_-a;MjkHQ%<Fq@3SaY18}F9p6_U7p9_&Bpa&PRlp53@wWO2yFKnLq5pxkg$-F1RM
z!ROMC{O{?TVsO}%gho+(>(K5PY{EV0;%Z%sBQA{gZoTkas?i$e;oX`vaj)efbmX~s
zhg^OdfyTDRpDVi-h0XqkGOQhns-xU#2>pxsY)XZaid7|6;W;^#VHYHsRWm$`%PW>&
z$;rH;U<{GJ$&2=i!Y%PEr`WKN&E-`Gw@RPnWLC(slT)+jFSE;hQV~>sED&JfIV-1?
zQ#OQ0wt&%DRTi@8_$fAvxk?Ll*}x>l<ph-zLAI*$@)*mHrN`JGU%z(Y*jSB?T>!FG
zJ}hkh$AY3_Mh@r0?ByKRkP|`1n+h+ph+h!0JYpR}tnN)g$#bHha2Rono#Yp|yr^QE
z%t&B_X4`P0$ezVTC$H2uQ7u2&Q8k|v`6ijOP?k38C37kW2ZFrJV?)^#&j_uvhJuzI
z6|!m*)5)JsvEUd<%-h~`edcg&Vs_%_(W>>Y@M>PpHaGw)cZ$855fpa8jRPd{3Y(Qw
z;Eu6bF1xB`z(H)1R|LefIZ;wo7~AGwyO2@UoN{V>e0h0!OjugVX8|jl7Wot}ier+T
z9tQ`GgLyM~X&n3e!b%Q$^$NcL&>1}Q>adckRnHLJxa!3>_18zC`GiOl8ZjR!kQ(_1
zy1905J|`bhx4UMEd&E8XNLUahPQ6E2)Ywt0<t8U85a<NUKAj%N{#V9DVTsSCmw91)
zQd-UeGE&Cva8)Hqd{<Btxz!^}oRA$yf|8P|ljE0Qv$OUQ8OyCc4ZH&JqNb#!-08Lb
zSdWAg{$sC*QVK+#2EawY2T{TKtw`vH-f`ZCL;BC&2JrVMA4Kk*+B#KR_!{sC8SnTh
z`NPN9GLP}chD9d+@603zu?&2(%eaKA+DFDEmD~x<|ELiza9oX!By{_IIIF*X1Hj+&
z56<6P+*)*Ip_Q>a<Fw@gSLFGp0UR5wx|-wRv&}w(0C<;pByd?7R}~lGL(k7CDL#uJ
zmSmjcXJlNmq<`}%ejcA$3seIg&ITY;a?0{AXCTj{WdX^!4tk6~f8wN!zmVV?QOsqy
zD)qmdNyD7|#9J*IaVlq#D;BwGwPf<}J-I9(HDh^@gwpUenaZnH0P;c}PA%jHi^1`q
zcz0Q`JVG|h%a&h+T;kC9a-5x~{?p<9)fvmXCdr7G;{h4pc`^<G8Q(cFzAWV<xWQM6
zjPDj1UjY`Alpy0uImM#572)%5zx~#tMSekzQx+pjs<R0y!#&0UV)?3hXstD$%Ry!X
zlR=t{Q}P(BrWfqzzs_LOtAZ+mIuM&ydlF<MR4Kt_QyEFN=xj<7trk_vSsqzPXH<K$
zS3x2TTL9N2;lxWzU`$+0tOd?v_g%&ATMNS>cCUwP*tr&+ltctB2`ori6;2qiiyyWE
z0%Qe61%a>!MU_Dp{ug;(Q5CBNvVlb}DqveL2TEj-MP*?poGf#zmPg5nf@(*FMdgs?
zp9FVZl;NhZf{G-o2|J`9j^LgWCBy>{1xJ>Qzh^8Oi?!&Klv}l^RLTmZA!{f3)kNkd
zY$FT86%g)+CIM8~K9FX?NPO06$%(4vb)w7)+!Q2mGj_e@!w$lLcSRLa?<!Ud6iA3X
zqyUu2a9JesN<!q4ya<+armYZ0vC*pw44zQ=71au1;f+gPB?keefO@b1iBm|zzJgd(
zBFBN^ibcap&I*FlAkE^S1VJk{W6Qg0_wt0Ty^@iZ<GnIYsInI~_%O)$U|21AS;S6I
zazM`8;b?V$4Xe&{&1xUjMD?huf}()WEC!NZ5?`QJz&W%S+wwr@$*W*YZV4{H1X{4#
zQ+WmA2Da_o-gbQB|I>iAvJfBuWb6R3S^$>ucM^`c#Bs}4wIdkNF{B@Y$}y_thtpOC
zX`V}E6nxroHSyh#oztvfLxPqIK!Ed~@^3WG$lvr6Oy|b%XA#5KXZre{5TvK4(h=S8
zt$Ut$iQpAdfAhK#m@A(DGPLKmRyuElSTn>HU#kQoAHMp*tN(E3_L<@&V8-_9ba$mA
zQk<@|hl`ggp$@>oLs;vKJ|R4w6!}-XUH4R&*bc*PGwkm=f7|0^{zu)9e(`zFU;6*t
zum5z`ID6GRdleRpL+_Z-4$PYa^9FOnWNzrpjSADb!^F0kSZTOS-WfHRA(I)>nV||3
z`mp_j_Kke$jgMChhBX;hXV?l8{xI}GsN^YecbM{tPun*=o7@&-^c*#qV<vM<XO7hv
zel~a~{=0L(ea%VTk1l*M@cC<hnfddKK6}lWe9N4C3l@wc2@~1_KQ|Bj-00=ZUd~{W
zCX>{eBmm!diH^v}t@_BzUoiSS(sOHi-`~M&gf%m)6{jlU*iLwOJ3L%oDK9>}pofQz
z@HsPluDUX?9Udr;7~w-^_z+CR2Fnv>?1i1!@$K00Kb<sUKQLoIC{C@X4LS;DW!iLK
zPo=A`By9T4{^L9S6Wje04?RZzX%pVAGdo?A+g+1J*G04I;<_KU7VY_XzBmJvVeku#
zJ<P338?B|39val?A%h+==^+4|m7i?9V9@<0-LKRASXij|-j|)-zfOHN1oB$V{*y-U
zOJ?s&Ms&iAP8gjpo1HJO`zqbh^}tt=-qN(*IkN8ik3ie)L#4O$z_1Y*HUq=O^S~SC
z`b*b04?i4vw2F&>R7THAPYB{Ya*BfP8a0o*8x)7TH>o65=^xxoJv{xSg$%q7F@)Q{
z*=_M0uxkz7{$Xj~=AH+_BSF8O)+JTf-UG_Z)OD)TJ6Jwj>f1aHw5{Vn+By!Dt>eYX
zZy2oobg8B6)#(v~9x>?=06@__v7PAg?dWkM`l1<q@%_uiOY7GvwD(t2zXZeU(SyHM
zN)sPxA8X||K6(#^X8Xuy^g-xPkq6iQxc8s;KIHz`{~%<>fAoc4e~X80ER*M`ZwTUb
zY6f?gsVi8kS!#~@D;~W~{mtoM^g8tp9^tMy1!K=V9zqKQxW%Eh6CT_S4;tYEX7~V(
znNB-!@S%O#Zr14v)3L+!Y%@Kju7B+NP2Z>c@9f`+jcv!qHWedw!i=4GaMp~yWH1vZ
zGojlrD7QmLx9RA{szDE!^ngwefH05m*3>$0(p@_=yG^s@?wXN+0G84w9o?Y^x9P$1
zNrR4?bX=$7PUu`NMars9A2#U2CVdzHXZp4Et7cnY`HD`D8uX}1j{@MVOs{JOz0aig
z>GZxYY2U4BJv3m@gC-4eIfy5w)=}xEP9HSrgC>0tfGWrLr*BOcr@sn@)?akWtd^M>
z8S)wt?~>0$CtrZZ^5)=T;uLur4?31O72jZ@kc2|kxyzhh{uy5PK_Q!z<g7f4N1kkc
zDYq(Lvf02*fGaY)9))W_l;jkz%5T7$d=4Yfxa5Um1Aha_*YGq>wobK)i$FUYKV@E7
zl2BgcPs_i8Ib7i?{|XI+C`nd`P|^OY5W%ATS0N(0`&Nkkdh=T$x^?%h5S_aFR)`+G
s`EA5};uT3U{Di>mhT`<)3!6im>So+L@S-#EDDU)8)jGiQTig8q4O%uFH2?qr

literal 0
HcmV?d00001

diff --git a/discotoolkit/__pycache__/GetMetadata.cpython-311.pyc b/discotoolkit/__pycache__/GetMetadata.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5678d4c13a2b32d510d96cd783adaec76ec0c1da
GIT binary patch
literal 13674
zcmeHOYiu0Xb)MPhEO*IWkxPmXQ5-%baczo{tXP&M>PcFUNTwW9QXM+VX0<b<R@w(M
zGn7PzbeKRuC_zBzz^R$mMz=tm(y){8PXhsM0kv>Y|J)&_@L~W0E&><uuhMV~1p3wQ
z+}WMoAt^--0<;A>oW1k9_uR)h_uTKCbNP3*wKW_aziA%-kFRmu@7O~r36!p1uHoiw
zPU1#6iI-eyew1fd*QkqK-J>3M^^SVk)i>(L)tweHfzbfZNgm0YuE_*PgPG81h&}t#
zwVArnx=eU9jOR8<kRwt6zZ(34@|H0{3f=LJ)=RZ_xX}h_n-s?Th#VdBNL%o}QL4xL
zCaGO&kWotBI_7Gz+g-{RMZRXKQI3qcq^3Kb(H3bd%Cx#TIs6Oc{lLyY#!Jm|7`a+d
zqD^W=u5FTQgo|yTK3a2H*2IjgC8UIw5R>VIs)}PdMbxgz;#exJ$%-tAiL4}MbJ@xZ
z%B;#-&F56daEv#-m$Io`_H!POF_-C2=f=lV*>N*6p~z#Y+i@wSCUfygC5?|Yrx@8G
zkT8AL{YBG_Z>EsSj^*MRb=(Zdijq^<t<NHn^w{0uIJ$S^_oaXe-{v%`wsc&#?`Qa5
z;Qj;qndfHQYNO^Y=j6ESM1vhOoHOsNC%bL)oSINB!*Mg*G#=KMobg<@>yY>xF3PX8
zX|<Jd*Zt*t<hALoqvM_NjyQW(mQ`-X<;)BDB5t}9m(bZ$lUWU{5Vf2*4%S{*anE==
zrsOh=t*Wftl$Dwq5yuH-Ts_3@=m|^^dq*?{tcx8I<C3f<mDB{0i`#k1Jo=*<Z5IB~
zoEC@0-mAG>x=+lp3KQw|E$A1A<gvtLT2rZS7nMo5-|EAwvNow?tGe;^kqg6zM1{Q;
z>A@O+Th~&_YvQd`IxSw6#bhpvQP3dWsVwMCK*kd)%Q&VaGP0OXK=#F4mUaB|kHN9w
zSdBuQH9a&2rl82*os?BgHQg9!#YbO)s+Q0u)p#-|$)<<K-t?w(2}w0QntWR`y=+io
zK81LxkXllLl!T}-2&xmgtcsFsei23DiYPuhx91YLrtV3nGIDnOmYmu%l)IH>l=lps
z9T~i!Vd!q8wD`o-?o1+;-NPna>E#|w@s~?8ynkZK6wur_wM}V74fXh`KSVOeeYK7A
zhVGnSXx2TghNrdQX<hSiq53t>?LD~K(Dv3q-mN$E7!5u1!o2Vqz4$kA@A7_KJfJtc
zU^KiiFCf1VG5l?d>WaUs;O|-uH{73I?0sjq9_}^5z4M-8xM4o{m5&QV?$<7)bU`!(
zu~0t0L@T%?CFO&Ln@#ku<K}S_U8_<J5>Hc7a<SAcc|P?10_8ujC;BWVEvAZZTE(D<
zNg1-BO-;y<0Vuspg3X-Vn5ZT)6KR>TeuH$qVo3_=uTvx`wxn$kA&3>J(Mhn5k(w6D
z95L&`gF)1O5tWZjW-<w7>JcAMNLjq^GTo{?VfwDg2}xFBZqqd(k#HoWcv8K|1PvM&
z0+pFiNxO*(Ee(uh+-i6{sl{23cHqN4{M7wOz~CA#RCoXIs?fZ6NEdb#dIxl2;9<|l
z-(7RNLcT9JB)_L*ZipBknLDSn5oJg`=0e7-rSXdAFkPVEr;<#w;X55Rqh3Xeegsy{
z@N@OmdRcNwo3Vq}Y}P;@+Z5O9Zpl|!{bpTvxqMYAs{xccSG{|j<i7)5IkfKmtb3lr
ziYVL>G+U!Wm%E)hbweqSMl-c^%y^u+r9er~H*K!xr5a~`Xlx$ZeM^}h>UYg}Dt&b3
zhQJ0B$1$*DDJ;Y!Qp5-*x3^N-wG@;+-mxjq6d0|{OyHc_op@NKi`gTVXJ|PHs>-y!
z2-z>wXA^^wAr<6cz1TaHN@_9j$hz0bM3z+Rgp#|NlAu|yPT3OxGmPC^(~RnoKmscZ
zB%d|9G!4axDeYP=>s0o`x$I+=J<TFk_Iyg!4vCZ!b<E}DH8i0-QmiAxkG7U)U}^qc
zC<*hUU6|)eO1_Du4@%#XaFH#UOuw=wUIpfPbnjFml?Gvq8CDo7B3BB$l9rK2)X=BI
zIP`G8c-9)wl&Vf*V4%a%KDAGTcA<<|gtSCbgNhInnwU<-OX~f2DwCPa=F<I%-B?kS
zep#CASH~GJ13&|bL^dh+zwchRj+g<)rYy8lk~Bm4AO+cg-<JoFyv>b630`TM<!AUj
zo;M!OdGx01{?-xheU4CGI(G{i$z{6w_nIC#k-VnTloc%v^r&Vajix1YnTey*og}OA
z^62*uq;ttcT0M%Ja+X(7QWcTRafPNs*16<a(B7W@$@H5uZ_U^*nG$mJhgL?`pCR1*
zlrQyY+qck|S$@`4t#>MT@U9!U2{}Hnwal+uXL$6keS}l?f)Mpfkxic}CzRy1m`8aI
zFHJs$^&Yxb*@Y*SMiR4Oj;UM))&%zyJ-tLF`;q(sT@7Ed&Ns&w5{uD)QvPx3<LJkl
ze(a)g>>|D_@OBQ?Z?+^%?;OrCNrnVWW~F|yTF$aL%vj7}%<fo#E$K=Z>M{fN3Z>8z
z08E{o%*YC0jp<IySqsYeZzYs0A$1S&CFW(lRC-x{CQCH6%&>hMXA%IUF5Qk(36&K8
zcQ7KQMX1)ncI70#4&bN$772R4*31cw1^>3yX0aHF6&sJPxwx%K*P7qcQu76e<oA@!
z*J7dC((qPj@roXK)`&d2bk2wzoIA@Fr5_)<cj)fG{eiimB9ofs00c#EI%G5*n!A7;
zk;|14e6MBcnBIBV=sb);)x(#J@TIvkKp#SI{@@R0@60apEB=;(zhzYj-wzj>kL$t-
zLpV_oP7qAdkTDcl2rTv*k#0TMV+4B&o*qjW(Hr})m%eY~)#ES<8I6+rL(fJulDEat
zridHK>s$h0yjZy911PSFy;f4!mg;9XmaPPUCJe+r1|L@;7?$mjI7IeYh3rOMU8X2H
zK{TL^w9Z_#Y)A|53|`Z;3H8vPJxra+Dg-SplPvY;lyM8A#&7PACni#Rs`T;*17Bst
zs%*z_uvwsV;LZ$`AWlk(xtX9V-PUV>TBscP_WQgklnvSq_=t@Zw5(<CDrZpiJNT)m
zkpLgnagqA_-zy3)7ekR^I0Cth02U#ETt<9zXV&UDZ|x_ZrWH?9QE2$W?eY>}^7__T
z0(ptfU9hI@F%WbhB(nJkwS3tRhL;dh&*qw{+Vb2SH#vo4mLKP!^=zw><L+|RcB)hB
zDZabhIJD%9>mHZgx~`PvDM2%VrqIrcu2g`!P)C&&dv~4XEBS7acI1?M3}5+yufp_f
z9S-ya^z}Hb<Luc66b2ohw=L!}ZMv?bAE?luaaZo0)SXZKpG4yx_cUX&e6PBF8m(QW
zW4$f!Y!h(_=&vU67M63G`3#(0OQj`6&X&PiDmyW0*)j|V*>*m&J2t_AwlWucnb+oZ
zU_Y@B=tI@6!04tbN+c~fCj(<*fhi+Ih2^Bmtv8Q@h=lJP);z1%##a9mSn)i%NDUuh
zVAnR>HXBq4eyeO1ekA-#BUCAFAxh;LFj9Gzl0HgyQ$i};0a$2aFu>A<8ym0$%YbFZ
zb=Q59`*nc9kx&VaSeT@QLcC11Df@{!sg})vNDKmEBd&O|Bqf_cufHS;gl=d_<~-4w
zO7icKwgvBkh6Qk8st^`&u6eoI`cK&{o|twseG>@?pr#P0fmeijr5vJ!JPKw_3Yy-+
zs4!(UML9~J2nQ&K>AMfE5IHN6nX3V2QGYDRA(R`foWMKQN5U4CecQNc_Y{JWH!1gF
zOcCl&v<xv?-wCaTqQ&U$V#`V3jJ9#Y8MSrL=e2dv=eTqG2yq~puZ3oh)LY=7ej)bm
zi0<z+{GDr@*V|r<wl5r>7mCqs^MU!mXMu)=v+rtppvMUGtZ_WPH*A|102YFE^QpW2
z_xl(3t<-EQ)NEUGIYyR^@n;>qzl<(r9-J}yU(#czjMyo?<FwIndd-WxXjzmHP_zb~
zlo~Fu<;UTB;decYSC$SN?JqnKjP?_H^GT!mq%I5^!eBudT#Ys_w&%ZRwCrC#W3(L6
zqeqSC(fPnv;SG)X3jK?&0{EP{TYJA2D;KS4NU)|=EnB8{EG4HUhwKvAW(yaY1x|e9
zG!$S^!0fL8?wg#_RlSr|FEumB`*dm<W^$F8UhP_i`xR|-D$KGgboxKpT$j@_EGi9?
zSA{{!R9kuNhwYV~%w*NgEM+zbVlPA&`fz;#5#DhtgRN;RrZ<_0k3n73Acx8+tCPuW
z3XVjv($-DeNd_k=D$?q(7FAfEFx_R?flLui(u}E?s-l$3?!ux#_9Zmet{v*Gf622}
z+r|UT<KzuS(&`Oajl*zF5+*YJrO~mb?kjl3>;>fr?qV)wgkBJ4s<!{kdg_r4uHwvU
zx{ORer;JyS0AV$8kuE*lZG^j-<O#J4f)No5emdbT{ORDLkZ*h^tVer|Xs;fQ8R1w#
zh<!!?y%@Ss7<o+(eG}3fz&t?$+=T?7KRT~%MCK&5Oma`*=82NaRlA{PsvJ>H>B3UX
zPyY?$TY@}?U`J`3EW@nq<gi7?fzQdQ``?idVn}(7w<%UZbNW`6p{k`Au$WkAagIz+
zg}Psjgd>bk=mGrHYz`=|A;&*ZE)fa16sh9UYB0=rqz5~VU}wS8Sy}?D=#WEI5He!}
zF2Ps!!~u8s{nn`wSGCfp#Au|A@f_|tEv)SL!Oj_1MLASsvKbg&6;%V~+GU&ww~_~_
zi>t(Fo=CaMXIEY3iIi>p`b5g!N_#-rF7rgnwxWC@<tkq)DBES8NZGavo=Dm6XtQ1B
ziIi=x>Juqf*$Zg1UFPwWwMtVq#LKF@wPd`Pj_bB<vx&DBC|qkV=Une;o^_*5wym?k
z&;4e`{TNwvJ7m!znUC#@TkEXUeBJgL&$#pTmG_l9$vwVqrbxlcVC1W8kE${ur=oK{
z&3dHTael@VkCsbl1b9ow^``P2qM2LE_Zs<}N{4d>I%zw5J?q6Rwj1j#mnUqw;h0(H
zD6tEZwOZW<PZiy&xh2(BN;q4F+Jl5y;hD!>B^P*2Q4w}n7H{Pn>a0BcndI)zD@*7N
zAKzAP|BM??wp>4{?Z`)t7<Ooc*h`0VY}Sn+9~bR>fQE5fjyKov0FQPZf6YsEc(%*q
z8Bo|sA5JkuQNRzfKiHwPXoN~iNA;|9mYm_@_I!Q9_(m$sbkKr*SMiKg@<0zf)x3Ae
zrBesMhWP+L>u}BuM?Kv2o^mc}%S~R1I!1Jd{jJ{FZ*n76m29|yo&X>;Kn~nev{HlA
zc!O!i%9~ktw%t*-sZ_RpL)onx%Q_@$U8L<#)5ecm?VomR(cdam@Ys=d{V&wLkI`yR
z+w%%f|6lk0U)O!H=EM)7E-WZ6j!kA+NVT+u#&*nC%u{A6I-|nY4lJ>EkhXro+{I>`
zvhS<($=Vb~k!BHJXa^30mGGcloz!4b=U@!eHZc|<wX3xEJWy!JqC?@Rek*Uk?Sg~_
zk{yF-4l}kYG)+5eQrHbs_Bo;wgq30Xyxw&!HGZwDPwdLbQfe|ocd%o+t~|!(uGC!V
zU};|v8E;jeb62m+2*g%}H;{ovvssLz(oNb6CzUqeus*XfosL+vD12-MD2A0Gs8+KE
z!EmA+Pf4avV_{%Zn84;D3WS?}u!aJ`*f_+39&z&ES|PvPBRNHx>chUf5<wMvlvIkh
zP|Fh&6X_JiUf9^H{brC2DI$DVWd)A4$&AV0p02fvQmd!8uo$8$B8H32eH2M#zBlYJ
z@zAiEK5I}-H->cj`AvC5UfWzYJw^PWGJSB55F_B|RYVk1lGN#{cp>6Siah>TKcnOG
zV~T{&<NMk1yJNAP3dJlebCuQ`g%)LN+&n2njIrZ{oD_)buwbL|I(_~YCFD&|zD-G-
z62fVwhhmuut$mhD(e_H3{&6KYIdOFgJsTfa$R;*j*$Fd<h#8zI;LbitIY;G=(kD07
z>>(ef*^oiiY#_jv(&id7T;f&PCO7LVLuJ1Ul`<tU%$kxQx-~4D_2QfKq9v70rY9xY
zs)1N<d2JD#C9kF#t<bGZ6UznTQy7Jy#s4_D7MMTAtSi@R?Eo^jl%Hwq4y)U&vkJ+Q
z6)#iaMB`|h+K$#TQMPHb+#}_C$o8-JslNiNm?x*9{ms6&`sU89hC9nHomEfp&iTTY
zZr#&kcqm>9=ZsKi__r4P-9_veJMv!p((MO9W6#jTG2_%_z4I>&oDHwd3!nOH4S#dN
z-@~%4hFTVT^8-ff-sPRfbAx(l$OsMn-tA(pQ2`|H%)((*p(q%8PCPvD(Gk7#8wSpX
zR~f-O-^U)bN~RiP&)~z$AI0^~O9sw{%T}g03jWSYrhu{M<ik@R4eOmF2F`|yrKTZH
zyrZgVM~v2e%Lk18FX^FEM(9+jX^2~I7F!M#8i#b@B|~_rAiPwLEjRV*jWMG!HaEQX
zoHa(ezoXc)%V_DN-uExH8LiJR-+mZ0UO2CZUN%B6qmtH|`8v3o+dJMlUktT=66#tB
zb>+|HkLaO&MrhyiZX<O3lhDvgXz1Z3J#@|porABeoqT2OH3gvs%-9*rqY*8J(7D>u
z_RjWVsND#4zjtw|clogK%rSk(abw4Elxd|hR2hmyx4kt`3`G}SH$t7Cgm$ikcIMyE
zLkEn|fpV{XT=y<UsWabC&FIEA5a}i&-Ed|vwCh3#qeOJBpvL@|vGe#!=y;g|Hl~H=
zZ{Iy~|H$Hrm0)`z*uENSxPQLTaz+oGH9}_#p|iz?tqWfRk=EWnIzn)s@7gmTG=%PA
z>;<~+T<zYOzx}sIetv|W#MRE8{A)`YW5<BrdBo^sFSmb+osMoU5H*BWun`*+L!X9Q
zjqt7lJHd_Eu>w2S+<ff`{tLI8oRCPcKN5++McN7i?U*Ea-$jvuL4LJu?{br|?}XlV
z(r7z5?=ysEWN7L!8hclT#)WMDb)$)i(V#9283IXolMe#1A<y4`?Sb&H%{ch7zJJ)*
zKdcKE4B<jKH-rvVb`_o(EcoeU+Zl?X!Hs@WmM=fH;_oZ?`ygveb%hts7`<md9$D#q
z#c27)eBFFqF%U*%V@X&E>@5WLZkoy3LUC00w;KM|#l(ugqu}qLS~m(hx)N!=Ka1Y}
zR9ZZkKWDV<)tmNFua7=_wz?-2iU3^*WvD1LeIjgM5w_=pi?h10+YokxS;D|4!toX1
z_`^nBIBf{0(YqFkVYK)Po<>yDwIlDvo#AQ!Y<pLcM7;f7DSr@qN4gK{+g~(r3Wpx-
zH-uv-*+C^cQ04p;-P6VzSrj4*?S{~i-)VFWtOx@bUDl?jozfebwtVoxIb;7BT{vq9
zXDiI=es=Dh;pr&u*iYB4)sC+G!Jo~(GfPj~R@=pV^U`jk^Pt}TqS5{$y=*PEbkMbl
zY1dDKO-8W0z)r}|j>83ZLMj7CAe6Kt0SRr|^+6~@TMM2h%QenyJt%NGNar(8?M2-D
z2s^0edF+Yg5wo|gPsG$IN9kC<L*8juu8-5hQSKUcxQ3}h76GND+KzxWS0bR*72jQF
zA|f{0#A2f@<*iygTdGTYw5+w8VxwjN08W$1zr9(6w9^46Hj9jYAKh0ehWiI>)e2s+
z&Npu?$cq;rT>S0Dk7NHF|0rHKauMhJOV$-B8()M)4x^yeFzn&InNXBB@WKj_e8<``
zc8zjS@&;8zUNlrxELVi;-uvb+wdKM0r#_t0cf4rqcoDDk;3We&JeRC$_=~7%CraY~
zE{0?6ravB+a>;mHi4biXV+zmGZgwj!dw||}fea_6lr;Maqye0&5O>YG)9KvRMEY_<
zNhPkPWh=@<%ra|+X#Hnqfd$jZ2dxa#ClBq^x3-!Q+`@i!#QK$!^!*Ga1fv*)Q7HVw
zq6-W@u%5HHHtPx-l1IWoCMQj%<)g|xGL!hJzd{Om=J_?3i}xVNgQSMzBXeQ;D{{d(
z_E+Q@3gxrN1?Jdak*hD1&mz}caGsc?{DA`Z{Km7$JyY21T=Q+?5eG)H(9JIS26kDt
zE)OL3v_3g1@R2pnPMSLT9cvttMK`-FN7-e4a)xi<@x@O1o~1`5`AbXtmpw}mi09e!
zL)2RSR(1C+KFUWIlWQC<>y!Qf-?hfsNi)~?+|q&f4}N%X4HC+4wbJ~~>L<s2*30vk
Mc`R$2ri`Ee1wP9@cK`qY

literal 0
HcmV?d00001

diff --git a/discotoolkit/__pycache__/GlobalVariable.cpython-311.pyc b/discotoolkit/__pycache__/GlobalVariable.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3eb82e455fbb4893c0bdb29974e2ba31f211b2eb
GIT binary patch
literal 620
zcmY*WF>ljA6n<wrb#MudL<dH%MoOKeq^K4`pa!Z+AsGs&@?tr;t9$LmXME?Bb_%}&
z24)Z{Kc+|>Do;pEY>~Qk;%qkq@7=xky?6J$dwTc1(Xc?t=;BeN>i~a>U@0mt<>o-S
z&w#;)dw>zv81hl+HD4>e?&}&5{n)r;6u7p;KlN`2;6m+65MaY!!8PB++CDfd=bx-T
z5tEcAQc@m!wlyQEsL;zy9`V$(-;g*uCDa~Js~<|@+1++`v(<juYH!({Ew8t&mNf{4
ze1)mxsrZkv%)h16iut6~X}7wYcBkj{x?XqN`df51bWmPnWJVZ0&;ZUKA4!=A&vof^
zn#DxWIAUZ%7~9}!<YFo&JeX|;VM1M}o|{2{`6LMPdP+WIL`abv$AZVX$#@hgZoWDW
z1)c2h_>e}qu{V4@%1x<S;+f2wNlFgs$Dpd3rR)dX6g}Mz-FHGw!)5AXIRdxOPh-YI
zEZqLy{?15p&Q7QdlC#z{q;b{PUDDlUd>pd(VM@a>BOA%tZT;Dl<BXB#>6$8E{D}yZ
zS?CC%g=QjS0flKA%G?LkoUavn4Xg7~ui?SG)Z%b`ZeGE|FS}m{m*D&Y=L#GJep`*X
RxoE=r3v>z1YNZYJ#y`%duHOIv

literal 0
HcmV?d00001

diff --git a/discotoolkit/__pycache__/__init__.cpython-311.pyc b/discotoolkit/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b82f3d3003d824bd4f5685a4acaafc916254dbe3
GIT binary patch
literal 375
zcmZ3^%ge<81pQ6RQsw~Z#~=<2FhLogjev~l3@HpLj5!P;5SkH6GeK!)D9r+-nW9)z
zn1dNKSza;%HEJ^6V$`Z)HPkcIGxpPDyT$FETH>2pl9-ZMl6Z^DC9^m=-#I6-xcHW!
zOMZD?PJUtvSj+_^&F1Xt<CE!fi_1MVFEuzdu_!s?7I%DnS!z*nW`16L{7Qz;AUFQ<
z&<`yJis|QM=BDPQm#1dx!^|qyckv8%_AklL&&kd#i7%+s%}vbA(@z0e3l-FlkI&2l
z%EjvyRQ}?y$<0qG%}KQ@;sqKEa(}TGkodsN$jEquLFxhw-C&Ts07Ewz6fVHf4F=H*
TFodk;0iS3CHwYGS1C0X!FnMeQ

literal 0
HcmV?d00001

diff --git a/docs/.DS_Store b/docs/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..0a3af45637ad12d3bdffb720abe52c5a4303fd83
GIT binary patch
literal 6148
zcmeHKyH3ME5S)b+5i}_&?-%%kQxv{{4+s!9NReCwdRKf`K8@LjA|eaP4Go%=cIR$y
z=PXa*^#ZW%=kN$@0Icbb`0_9}KX;$mO=XNo=N%*V81RY@`Z9Cw9Xanf;qa3`oDRpg
zaXc=&MpaTk3P=GdAO)nrLIu3{($@Ehic&xdNP&w2{(We4$6h!k#;1cJMgZc9=`gNi
zmLN7y5PRX2$PCSrN=&NNh+#=*zExc>oD!1`tKq}y$yO7J#nX9zi*i^`RFncz;97y(
z+-|)8Khb}g|F22fNdYPFuN1J=?s>Q6D^+iuy`1;jMt`Au&6)1Tbx;_h9TTG+bK~v!
dA&RoD`I_gwa7qk1^Fb%-XTWulNrAss;2Q^Y6<q)T

literal 0
HcmV?d00001

diff --git a/docs/CELLiD_celltype_annotation.ipynb b/docs/CELLiD_celltype_annotation.ipynb
old mode 100644
new mode 100755
diff --git a/docs/Gene_search.ipynb b/docs/Gene_search.ipynb
old mode 100644
new mode 100755
diff --git a/docs/download_data.ipynb b/docs/download_data.ipynb
old mode 100644
new mode 100755
diff --git a/docs/index.md b/docs/index.md
index 8ff70d5..ec14464 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -68,7 +68,7 @@ Deeply Integrated human Single-Cell Omics data
 <span class="badge-container">
 <a href="https://github.com/JinmiaoChenLab/DISCOtoolkit_py" class="badge-link">
   <span class="badge-icon">📦</span>
-  <span class="badge-count">1.1.3</span>
+  <span class="badge-count">1.1.4</span>
 </a>
 </span> <span class="badge-container">
   <a href="https://github.com/JinmiaoChenLab/DISCOtoolkit_py/stargazers" class="badge-link">
diff --git a/docs/scEnrichment.ipynb b/docs/scEnrichment.ipynb
old mode 100644
new mode 100755
diff --git a/setup.py b/setup.py
index d425795..5732d1c 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
 
 setup(
     name='discotoolkit',
-    version="1.1.3",
+    version="1.1.4",
     url='http://www.immunesinglecell.org/',
     author='Li Mengwei, Rom Uddamvathanak',
     author_email='uddamvathanak_rom@immunol.a-star.edu.sg',