-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathutils.py
61 lines (41 loc) · 1.76 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# %%
import scanpy as sc
import scanpy.external as sce
import pandas as pd
import numpy as np
import seaborn as sns
# %%
def qc(fdir, countfile, metafile, MTcount = 10, geneLB=200, cellLB=6, filtercellLB=3):
if countfile.endswith("h5"):
adata = sc.read_10x_h5(fdir + countfile).T
elif countfile.endswith("tsv") or countfile.endswith("csv"):
adata = sc.read_csv(fdir + countfile, delimiter='\t', first_column_names=None, dtype='float32').T
adata.obs = pd.read_csv(fdir + metafile,sep='\t')
mito_genes = adata.var_names.str.startswith('mt-')
adata.obs['percent_mito'] = np.sum(adata[:, mito_genes].X, axis=1) / np.sum(adata.X, axis=1) * 100
adata = adata[adata.obs.percent_mito <= MTcount, :]
adata = adata[:, np.invert(mito_genes)]
sc.pp.calculate_qc_metrics(adata, inplace=True)
## remove cells with <200 genes per cells
adata = adata[adata.obs['n_genes_by_counts'] >= geneLB, :] ## at least 200 genes per cell
## remove cells with too few counts
adata = adata[adata.obs['log1p_total_counts'] >= cellLB, :] ## at least 200 genes per cell
## fitler genes
sc.pp.filter_genes(adata, min_cells=filtercellLB)
return adata
def preprocessing(adata, maxV=10):
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata)
# sc.pl.highly_variable_genes(adata)
adata = adata[:, adata.var.highly_variable]
sc.pp.scale(adata, max_value = maxV)
sc.tl.pca(adata)
# sc.pl.pca_variance_ratio(adata, log=True)
return adata
def clustering(adata, desc, nb=15, np=50):
sc.pp.neighbors(adata, n_neighbors = nb, n_pcs = np)
sc.tl.umap(adata)
sc.tl.tsne(adata)
adata.obs['>Description'] = [desc]*adata.n_obs
return adata