forked from gcyuan/SCUBA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
EstimatePseudotime.m
97 lines (79 loc) · 2.97 KB
/
EstimatePseudotime.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
function EstimatePseudotime(dataset, pcvmethod, anchorGene)
% options for pcvmethod are 'Rprincurve' and 'ksegments'
% use anchorGene to order pseudotime so that the average value of
% anchorGene in the first 1000 cells is higher than for the last 1000 cells
% set default pcvmethod
if ~exist('pcvmethod', 'var')
pcvmethod = 'Rprincurve';
end
% set default anchorGene
if ~exist('anchorGene', 'var')
anchorGene = false;
end
[~, processDataMat, ~, ~, ~, ~, intermediate_filesDir, ~] = initialization(dataset);
load(processDataMat);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%step 1: dimension reduction by using tSNE %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
ndim = 3;
method = 'tSNE';
[mappedX, ~] = compute_mapping(pro.expr, method, ndim);
pro.tsne = mappedX;
save(processDataMat, 'pro');
if strcmp(pcvmethod, 'Rprincurve')
% save csv file to process with R's princurve
ncell = size(pro.expr, 1);
csvout = fullfile(intermediate_filesDir, [dataset '_tsne_d' num2str(ndim) '.csv']);
fout = fopen(csvout, 'w+');
ntsne = size(pro.tsne, 2);
for k = 1:ntsne-1,
fprintf(fout, '%s,', ['T', int2str(k)]);
end
fprintf(fout, '%s\n', ['T', int2str(ntsne)]);
for k = 1:ncell,
for j = 1:ntsne-1,
fprintf(fout, '%f,', pro.tsne(k, j));
end
fprintf(fout, '%f\n', pro.tsne(k, end));
end
fclose(fout);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% step 2: Fit tSNE data by Principal curve analysis %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
switch pcvmethod
case 'Rprincurve'
% Calling R's principal curve
system(['Rscript principal_curve_analysis_tsne_d3.R ' dataset]);
% pcvin = fullfile(intermediate_filesDir, [dataset '_tsne_d' num2str(ndim) '_pcv.csv']);
parin = fullfile(intermediate_filesDir, [dataset '_tsne_d' num2str(ndim) '_lambda.csv']);
fin3 = fopen(parin);
s = fgetl(fin3);
c3 = textscan(fin3, '%s%f', 'delimiter', ',');
fclose(fin3);
pro.pseudotime = c3{2};
case 'ksegments'
k_max = 10;
alpha = 2;
lambda = 1;
INT_PLOT = 1;
[edges,vertices,of,y]=k_seg_soft(pro.tsne,k_max,alpha,lambda,INT_PLOT);
pro.pseudotime = y(:, 1);
end
if anchorGene
% We use anchorGene to find beginning of pseudotime
[~, loc] = ismember(anchorGene, pro.gname);
[~, pseudotimeSortIndices] = sort(pro.pseudotime);
nCellsAnchor = 1000;
anchorGeneAverageBeginning = mean(pro.expr(pseudotimeSortIndices(1:nCellsAnchor),loc));
anchorGeneAverageEnd = mean(pro.expr(pseudotimeSortIndices(end-nCellsAnchor+1:end),loc));
if anchorGeneAverageEnd > anchorGeneAverageBeginning
% We flip time
pro.pseudotime = max(pro.pseudotime) - pro.pseudotime;
end
end
tmin = min(pro.pseudotime);
tmax = max(pro.pseudotime);
tbin = 8;
pro.cell_stage = ceil(tbin*(pro.pseudotime - tmin + 0.0001)/(tmax + 0.0002 - tmin));
save(processDataMat, 'pro');