.Rhistory

b <- RealignEnds(b, ref, aln[i, 10]);
b <- FinalizePositionIndex(b);
b <- TrimEnds(b);
b;
});
names(b2b) <- aln[, 1];
###############################################################################################
ind <- cbind(
first0=sapply(b2b, function(b) b[4, 3]), first1=sapply(b2b, function(b) b[4, 4]),
last0=sapply(b2b, function(b) b[nrow(b), 3]), last1=sapply(b2b, function(b) b[nrow(b), 4])
);
if (verbose) cat("merge base-to-base alignment\n");
mtx <- MergeBaseToBaseWithInsertion(b2b, ref, read.id = names(b2b));
frq <- CalculateBaseFrequencyFromMatrix(mtx[[1]][, -1]);
smm <- data.frame(frq, check.names = FALSE, stringsAsFactors = FALSE);
smm$ref <- mtx[[1]][, 1];
smm$con <- colnames(frq)[max.col(frq)];
smm$ind <- mtx[[2]][, 1];
smm$cmplx <- as.vector(CalculateMaxComplexity(ref)[smm$ind]);
smm$polyn <- as.vector(CountMaxPolyN(ref)[smm$ind]);
smm$max <- apply(frq, 1, max);
smm$total <- rowSums(frq);
###############################################################################################
dim(smm)
out$summary   <- smm;
out$base2base <- mtx[[1]];
out$snp       <- FindLinkedSnp(out);    if (verbose) cat("link SNPs done\n");
out$indel     <- FindLinkedIndel(out);  if (verbose) cat("link INDELs done\n");
out$merged    <- MergeLinkage(out);     if (verbose) cat("merge SNPs/INDELs done\n");
out$assembled <- AssembleAllele(out);   if (verbose) cat("create contigs done\n");
######################################################################
}
o <- lapply(27:30, function(i) {print(i); DeNovoSeq2Seq(fns[i]); })
faln <- fns[28]
aln <- readRDS(faln);
ref <- aln[aln[, 1]==aln[, 3], 10][1];
id  <- aln[aln[, 1]==aln[, 3], 1][1];
aln <- aln[aln[, 1]!=aln[, 3], , drop=FALSE];
out <- list();
out$id        <- id;
out$sequence  <- ref;
out$filter    <- list();
###############################################################################################
# remove reads multiple alignment
mlt <- unique(aln[duplicated(aln[, 1]), 1]);
out$filter$multiple <- aln[aln[, 1] %in% mlt, , drop=FALSE];
aln <- aln[!(aln[, 1] %in% mlt), , drop=FALSE];
###############################################################################################
lcf <- 12;   ###### Length cutoff of individual INDELs
pcf <- 0.05; ###### Percent cutoff of total INDELs/total matches
ndl <- cigarRangesAlongPairwiseSpace(aln[, 6], ops=c('D', 'I'));
mth <- cigarRangesAlongReferenceSpace(aln[, 6], pos=aln[, 4], ops='=');
lmx <- max(width(ndl));
ct0 <- sum(width(mth));
ct1 <- sum(width(ndl));
pct <- ct1/ct0;
out$filter$indel <- aln[lmx>=lcf | pct>=pcf, , drop=FALSE];
aln <- aln[lmx<lcf & pct<pcf, , drop=FALSE];
###############################################################################################
pcf <- 0.01; ###### Percent cutoff of total mismatches/total matches
mth <- cigarRangesAlongReferenceSpace(aln[, 6], pos=aln[, 4], ops='=');
mis <- cigarRangesAlongReferenceSpace(aln[, 6], pos=aln[, 4], ops='X');
pct <- sum(width(mis))/sum(width(mth));
out$filter$mismatch <- aln[pct>=pcf, , drop=FALSE];
aln <- aln[pct<pcf, , drop=FALSE];
out$filter$remaining <- aln;
###############################################################################################
# Base-to-base alignment of all reads
if (verbose) cat("base-to-base alignment\n");
b2b <- lapply(1:nrow(aln), function(i) { # print(i);
b <- AlignBaseToBaseViaCigar(ref, aln[i, 10], aln[i, 6], aln[i, 4]);
b <- RealignEnds(b, ref, aln[i, 10]);
b <- FinalizePositionIndex(b);
b <- TrimEnds(b);
b;
});
names(b2b) <- aln[, 1];
###############################################################################################
ind <- cbind(
first0=sapply(b2b, function(b) b[4, 3]), first1=sapply(b2b, function(b) b[4, 4]),
last0=sapply(b2b, function(b) b[nrow(b), 3]), last1=sapply(b2b, function(b) b[nrow(b), 4])
);
###############################################################################################
if (verbose) cat("merge base-to-base alignment\n");
mtx <- MergeBaseToBaseWithInsertion(b2b, ref, read.id = names(b2b));
frq <- CalculateBaseFrequencyFromMatrix(mtx[[1]][, -1]);
smm <- data.frame(frq, check.names = FALSE, stringsAsFactors = FALSE);
smm$ref <- mtx[[1]][, 1];
smm$con <- colnames(frq)[max.col(frq)];
smm$ind <- mtx[[2]][, 1];
smm$cmplx <- as.vector(CalculateMaxComplexity(ref)[smm$ind]);
smm$polyn <- as.vector(CountMaxPolyN(ref)[smm$ind]);
smm$max <- apply(frq, 1, max);
smm$total <- rowSums(frq);
###############################################################################################
######################################################################
if (verbose) cat("De Novo assembly ... \n");
out$summary   <- smm;
out$base2base <- mtx[[1]];
out$snp       <- FindLinkedSnp(out);    if (verbose) cat("link SNPs done\n");
out$indel     <- FindLinkedIndel(out);  if (verbose) cat("link INDELs done\n");
out$merged    <- MergeLinkage(out);     if (verbose) cat("merge SNPs/INDELs done\n");
out$assembled <- AssembleAllele(out);   if (verbose) cat("create contigs done\n");
i
# output summary
cnt <- c(read=nrow(aln));
if (is.null(out$snp$selected)) cnt <- c(cnt, snp=0) else cnt <- c(cnt, snp=nrow(out$snp$selected));
if (is.null(out$snp$linkage)) cnt <- c(cnt, set=0) else cnt <- c(cnt, set=length(out$snp$linkage));
if (is.null(out$snp$weaklink)) cnt <- c(cnt, weak=0) else cnt <- c(cnt, weak=nrow(out$snp$weaklink));
if (is.null(out$snp$orphan)) cnt <- c(cnt, orphan=0) else cnt <- c(cnt, orphan=length(out$snp$orphan));
if (is.null(out$indel$final)) cnt <- c(cnt, indel=0) else cnt <- c(cnt, indel=length(out$indel$final));
if (is.null(out$merged$set)) cnt <- c(cnt, merged=0) else cnt <- c(cnt, merged=length(out$merged$set));
if (is.null(out$merged$set)) cnt <- c(cnt, contig=0) else
cnt <- c(cnt, contig=sum(sapply(out$assembled, function(o) length(o$consensus))));
cnt
sum(sapply(out$assembled, function(o) length(o$consensus)
)
)
sapply(out$assembled, function(o) length(o$consensus)
)
unlist(sapply(out$assembled, function(o) length(o$consensus)))
length(sapply(out$assembled, function(o) o$consensus))
cnt <- c(cnt, contig=length(sapply(out$assembled, function(o) o$consensus)));
cnt
out$merged$set
o <- lapply(27:29, function(i) {print(i); DeNovoSeq2Seq(fns[i]); })
# Summarize alignment of consensus sequences to one consensus sequence
# require(GenomicAlignments);
# require(ShortRead);
# source('../AssembleHaplotype.R');
# source('FindLinkedSnp.R');
# source('FindLinkedIndel.R');
# source('MergeLinkage.R');
# source('AssembleAllele.R');
# fns <- readRDS('../../seq2seq/file_seq.rds');
# faln <- "/Users/zhangz/Google Drive/Projects/Chou/2021-05_ABO/PAClindrome/seq2seq/m54215_191214_162956/10027854.rds";
DeNovoSeq2Seq <- function(faln, verbose=FALSE) {
aln <- readRDS(faln);
ref <- aln[aln[, 1]==aln[, 3], 10][1];
id  <- aln[aln[, 1]==aln[, 3], 1][1];
aln <- aln[aln[, 1]!=aln[, 3], , drop=FALSE];
out <- list();
out$id        <- id;
out$sequence  <- ref;
out$filter    <- list();
if (nrow(aln) > 10) {
###############################################################################################
# remove reads multiple alignment
mlt <- unique(aln[duplicated(aln[, 1]), 1]);
out$filter$multiple <- aln[aln[, 1] %in% mlt, , drop=FALSE];
aln <- aln[!(aln[, 1] %in% mlt), , drop=FALSE];
###############################################################################################
if (nrow(aln) > 10) { # reads with multiple alignment removed
###############################################################################################
# remove reads with long indels
lcf <- 12;   ###### Length cutoff of individual INDELs
pcf <- 0.05; ###### Percent cutoff of total INDELs/total matches
ndl <- cigarRangesAlongPairwiseSpace(aln[, 6], ops=c('D', 'I'));
mth <- cigarRangesAlongReferenceSpace(aln[, 6], pos=aln[, 4], ops='=');
lmx <- max(width(ndl));
ct0 <- sum(width(mth));
ct1 <- sum(width(ndl));
pct <- ct1/ct0;
out$filter$indel <- aln[lmx>=lcf | pct>=pcf, , drop=FALSE];
aln <- aln[lmx<lcf & pct<pcf, , drop=FALSE];
###############################################################################################
if (nrow(aln) > 10) { # reads with long indel removed
###############################################################################################
# remove reads with high mismatch percentages
pcf <- 0.01; ###### Percent cutoff of total mismatches/total matches
mth <- cigarRangesAlongReferenceSpace(aln[, 6], pos=aln[, 4], ops='=');
mis <- cigarRangesAlongReferenceSpace(aln[, 6], pos=aln[, 4], ops='X');
pct <- sum(width(mis))/sum(width(mth));
out$filter$mismatch <- aln[pct>=pcf, , drop=FALSE];
aln <- aln[pct<pcf, , drop=FALSE];
out$filter$remaining <- aln;
###############################################################################################
if (nrow(aln) > 10) { # reads with high mismatch removed
###############################################################################################
# Base-to-base alignment of all reads
if (verbose) cat("base-to-base alignment\n");
b2b <- lapply(1:nrow(aln), function(i) { # print(i);
b <- AlignBaseToBaseViaCigar(ref, aln[i, 10], aln[i, 6], aln[i, 4]);
b <- RealignEnds(b, ref, aln[i, 10]);
b <- FinalizePositionIndex(b);
b <- TrimEnds(b);
b;
});
names(b2b) <- aln[, 1];
###############################################################################################
ind <- cbind(
first0=sapply(b2b, function(b) b[4, 3]), first1=sapply(b2b, function(b) b[4, 4]),
last0=sapply(b2b, function(b) b[nrow(b), 3]), last1=sapply(b2b, function(b) b[nrow(b), 4])
);
###############################################################################################
if (verbose) cat("merge base-to-base alignment\n");
mtx <- MergeBaseToBaseWithInsertion(b2b, ref, read.id = names(b2b));
frq <- CalculateBaseFrequencyFromMatrix(mtx[[1]][, -1]);
smm <- data.frame(frq, check.names = FALSE, stringsAsFactors = FALSE);
smm$ref <- mtx[[1]][, 1];
smm$con <- colnames(frq)[max.col(frq)];
smm$ind <- mtx[[2]][, 1];
smm$cmplx <- as.vector(CalculateMaxComplexity(ref)[smm$ind]);
smm$polyn <- as.vector(CountMaxPolyN(ref)[smm$ind]);
smm$max <- apply(frq, 1, max);
smm$total <- rowSums(frq);
###############################################################################################
######################################################################
if (verbose) cat("De Novo assembly ... \n");
out$summary   <- smm;
out$base2base <- mtx[[1]];
out$snp       <- FindLinkedSnp(out);    if (verbose) cat("link SNPs done\n");
out$indel     <- FindLinkedIndel(out);  if (verbose) cat("link INDELs done\n");
out$merged    <- MergeLinkage(out);     if (verbose) cat("merge SNPs/INDELs done\n");
out$assembled <- AssembleAllele(out);   if (verbose) cat("create contigs done\n");
######################################################################
}
}
}
}
# output summary
cnt <- c(read=nrow(aln));
if (is.null(out$snp$selected)) cnt <- c(cnt, snp=0) else cnt <- c(cnt, snp=nrow(out$snp$selected));
if (is.null(out$snp$linkage)) cnt <- c(cnt, set=0) else cnt <- c(cnt, set=length(out$snp$linkage));
if (is.null(out$snp$weaklink)) cnt <- c(cnt, weak=0) else cnt <- c(cnt, weak=nrow(out$snp$weaklink));
if (is.null(out$snp$orphan)) cnt <- c(cnt, orphan=0) else cnt <- c(cnt, orphan=length(out$snp$orphan));
if (is.null(out$indel$final)) cnt <- c(cnt, indel=0) else cnt <- c(cnt, indel=length(out$indel$final));
if (is.null(out$merged$set)) cnt <- c(cnt, merged=0) else cnt <- c(cnt, merged=length(out$merged$set));
if (is.null(out$merged$set)) cnt <- c(cnt, contig=0) else
cnt <- c(cnt, contig=length(sapply(out$assembled, function(o) o$consensus)));
out$count <- cnt;
out;
}
###############################################################################################
### Utility functions
CalculateMaxComplexity <- function(seq, ws=7) { # score each base by sequence complexity
# Calculate Dusty score for each subsequence
seq <- as.character(seq);
sub <- sapply(1:(nchar(seq)-(ws-1)), function(i) substr(seq, i, i+(ws-1)));
dsc <- dustyScore(DNAStringSet(sub));
# Max complexity scores each base got
tbl <- sapply(1:ws, function(i) i:(i+length(sub)-1));
dmx <- sapply(split(rep(dsc, ws), as.vector(tbl)), max);
as.vector(dmx);
};
CountMaxPolyN <- function(seq) { # Count max number of repetitive bases next to each base
b <- Rle(strsplit(seq, '')[[1]]);
x <- start(b);
y <- end(b)
w <- y-x+1;
m <- rep(w, w);
m <- pmax(m, c(m[-1], 0));
m <- pmax(m, c(0, m[-length(m)]));
m;
};
GroupLinked <- function(prs) { # Group linked SNPs
sgl <- names(prs)[sapply(prs, length)==0];
lnk <- prs[sapply(prs, length)>0];
if (length(lnk) > 0) {
for (i in 1:length(lnk)) lnk[[i]] <- c(names(lnk)[i], lnk[[i]]);
for (i in 1:length(lnk))
lnk <- lapply(lnk, function(l) sort(unique(unlist(prs[as.character(l)], use.names = FALSE))));
grp <- lnk[!duplicated(lnk)];
grp <- lapply(grp, function(g) g[order(as.numeric(g))]);
names(grp) <- paste0('set', 1:length(grp));
list(grouped=grp, orphan=sgl);
} else list(grouped=list(), orphan=sgl);
}
###############################################################################################
# DeNovoSeq2Seq
#   - id                    # Read ID of reference
#   - sequence              # Read full sequence of reference
#   - filter                # Removed alignment due to different reasons
#     - multiple            # Query sequences have multiple alignment to reference
#     - indel               # Alignment with long stretch or high percentage of INDELs
#     - mismatch            # Alignment with high mismatch/match percentage
#   - summary               # Summary matrix of base-to-base alignment
#   - base2base             # Full matrix of base-to-base alignment
#   - snp                   # SNPs in the alignment
#     - selected            # Summary of potential SNPs selected based on read counts
#     - pair                # Statistics table of all pairs of SNPs
#     - linkage             # A list of groups of linked SNPs
#       - set1              # First set of linked SNPs, 3-column table (ref/A/B), bases of SNP
#       - [set2]            # [Optionally], 2nd set, and so on ...
#     - weaklink            # Statistics table of linked pair of SNPs (non-significant due to small N)
#     - orphan              # Individual SNPs not linked to any other SNPs
#   - indel                 # INDELs in the alignment
#     - selected            # Summary of potential INDELs selected based on read counts
#     - final               # Final list of validated, high quality INDELs
#       - indel_id1         # First validated INDEL, named by the index before it in the summary matrix
#         - index           # The indexes before/after the INDEL in the summary matrix
#         - msa             # Multiple sequence alignment (MUSCLE) of the Ref and 2 alleles (to refine )
#         - seq             # The INDEL sequences (ref, A and B)
#         - read            # Read IDs
#           - A             # ID of reads containing allele A
#           - B             # ID of reads containing allele B
#         - linkage         # Linkage of INDEL to other variants
#           - snp           # Statistics table of the INDEL to all SNPs
#           - indel         # Statistics table of the INDEL to other INDELs
#       - [indel_id2]       # [Optionally], 2nd validated INDEL, and so on ...
#   - merged                # SNPs and INDELs merged
#     - pair                # Statistics table of all pairs of SNPs/INDELs
#     - set                 # A list of groups of linked SNPs/INDELs
#       - set1              # First set of linked SNPs, 6-column table
#       - [set2]            # [Optionally], 2nd set, and so on ...
#       - weaklink          # Statistics table of linked pair of SNPs/INDELs (non-significant due to small N)
#       - orphan            # Individual SNPs/INDELs not linked to any other SNPs
#   - count                 # Summary of output
#     - read                # Number of total reads after filtering
#     - snp                 # Number of selected het SNPs
#     - set                 # Number of SNP sets
#     - weak                # Number of SNP pairs with weak link (non-significant)
#     - orphan              # Number of orphan SNPs
#     - indel               # Number of selected INDELs linked to other SNPs/INDELs
#     - merged              # Number of SNP/INDEL sets after merging
# ---END OF DUCOMENT---
o <- lapply(27:29, function(i) {print(i); DeNovoSeq2Seq(fns[i]); })
sapply(out$assembled, function(o) length(o$consensus))
out$assembled
out$merged$set
out$snp$selected
out$snp$pair
require(GenomicAlignments);
require(ShortRead);
source('../AssembleHaplotype.R');
source('FindLinkedSnp.R');
source('FindLinkedIndel.R');
source('MergeLinkage.R');
source('AssembleAllele.R');
fns <- readRDS('../../seq2seq/file_seq.rds');
source('DeNovoSeq2Seq.R')
ls()
o <- lapply(1:200, function(i) {print(i); DeNovoSeq2Seq(fns[i]); }); saveRDS(o, '../../seq2seq/ex_first200.rds')
cnt <- sapply(o, function(o) o$count)
cnt <- t(cnt)
cnt
head(cnt)
dim(cnt)
table(cnt[, 8])
table(cnt[1:100, 8])
table(cnt[1:100, 7])
table(cnt[1:200, 7])
which(cnt[, 7]==4)
x <- o[[172]]
x$merged$set
x$sequence
seq <- Consensus2Fasta(o, '../../seq2seq/contig200.fasta')
source('Consensus2Fasta.R');
seq <- Consensus2Fasta(o, '../../seq2seq/contig200.fasta')
length(seq)
nc <- nchar(seq)
summary(nc)
hist(nc)
seq[nc==max(nc)][1]
source('~/Google Drive/Projects/Chou/2021-05_ABO/PAClindrome/R/DeNovo/DeNovoSeq2Seq.R', echo=TRUE)
source('DeNovoSeq2Seq.R')
# Summarize alignment of consensus sequences to one consensus sequence
require(Biostrings);
require(GenomicRanges);
# require(GenomicAlignments);
# require(ShortRead);# source('../AssembleHaplotype.R');
require(msa);
setwd("/Volumes/chou_lab/2021-05_ABO_Assembly/denovo/round1/Control1")
fseed <- readRDS('file_seed.rds');
length(fseed)
f <- fseed[1:10]
f
file.exists(f)
fseed <- readRDS('file_seed.rds')[1:10];
getwd()
fseed <- sub('/mnt/isilon/chou_lab/2021-05_ABO_Assembly/consensus2consensus/Control1/split', '/Volumes/chou_lab/2021-05_ABO_Assembly/denovo/round1/Control1', fseed)
file.exists(fseed)
cnt <- sapply(1:length(fseed), function(i) {
print(i);
out <- DeNovoSeq2Seq(fseed[i]);
fout <- sapply(strsplit(fseed[i], '/split/'), function(f) rev(f)[1]);
saveRDS(out, fout);
out$count;
});
cnt <- t(cnt);
rownames(cnt) <- sapply(strsplit(fseed, '/split/'), function(f) rev(f)[1]);
i
i <- 1
i
print(i);
out <- DeNovoSeq2Seq(fseed[i]);
fseed[i]
file.exists('/Volumes/chou_lab/2021-05_ABO_Assembly/denovo/round1/Control1/m54215_191214_162956/10027393.rds')
out <- DeNovoSeq2Seq(fseed[i]);
faln <- '/Volumes/chou_lab/2021-05_ABO_Assembly/denovo/round1/Control1/m54215_191214_162956/10027393.rds'
aln <- readRDS(faln);
dim(aln)
aln
fseed <- readRDS('file_seed.rds')[1:10];
fseed
getwd()
nchar('NGGCTGAAAGATGAAACACTGCCCCCCTCCCACTCACCCCTCAAAGGGAGGGACAGAAGCTGCTGCAGGGAAGGAGGGGAAACCCTCTAGAACCTCTTTCC')
nchar('NGGGATTTCACAGCTCCAGTTCCAGACCTCATCACAAGGAGAGTGGCTCCCTCCTTGCCAGAAATGGGGACACAGGGAGGATGAGGAGGTGGCCTGGAGGG')
l0 <- l[14:93]
3000*4
library("BSgenome.Hsapiens.UCSC.hg38", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
names(Hsapiens)
gr <- GRanges('chr1', IRanges(10^6:(10^6+1000), width=150))
gr
seq <- getSeq(Hsapiens, gr)
seq
?countMatches
?countMatches
args(countMatches)
require(Biostrings)
?vmatchPDict
vcountPDict(gr, Hsapiens)
vcountPDict(gr, Hsapiens[[1]])
chr1 <- Hsapiens$chr1
chr1
cnt <- countPDict(seq, chr1)
cnt
table(cnt)
cnt <- countPDict(seq[1:10], chr1)
cnt
cnt <- countPDict(seq[1:10], Hsapiens)
cnt <- vcountPDict(seq[1:10], Hsapiens)
dim(cnt)
head(cnt)
table(cnt[, 1])
cnt2 <- vcountPDict(seq[11:20], Hsapiens)
cnt2
length(Hsapiens)
table(cnt2$index)
split(cnt2$count, cnt2$index)
sum(split(cnt2$count, cnt2$index))
seq
10^9/1000
10^9/10000
gr
cnt <- vcountPDict(substr(seq[1:10], 1, 20), Hsapiens)
10*300
p <- readRDS("/Users/zhangz/Google Drive/Projects/Chou/manuscript/table/linkage/plot.rds")
length(p)
names(p)
p[[1]]
names(p[[1]])
head(p[[1]])
head(p[[1]][[1]])
p[[1]][[1]][1]
str(p[[1]][[1]])
af <- readRDS("/Users/zhangz/Google Drive/Projects/Chou/manuscript/figure/linkage/af_all.rds")
names(af)
head(af)
af[af[, 2]=='rs61740966', ]
af[af[, 2]=='rs61740966' & af[, 3]=='1000Genomes', ]
af[af[, 2]=='rs61740966' & af[, 3]=='gnomAD - Exomes', ]
dim(af)
table(af[, 3])
sort(table(af[, 3]))
af[af[, 2]=='rs61740966' & af[, 3]=='gnomAD - Genomes', ]
af[af[, 2]=='rs28453868' & af[, 3]=='gnomAD - Exomes', ]
af[af[, 2]=='rs28453868', ]
af[af[, 2]=='rs61740966' & af[, 3]=='gnomAD - Exomes', ]
af[af[, 2]=='rs61740966' & af[, 3]=='gnomAD - Exomes', ]
af[af[, 2]=='rs61740966' & grep('gnomAD', af[, 3]), ]
af[af[, 2]=='rs61740966' & grepl('gnomAD', af[, 3]), ]
af[af[, 2]=='rs28453868' & grepl('gnomAD', af[, 3]), ]
af <- af[order(af$Population), ]
af[af[, 2]=='rs61740966' & grepl('gnomAD', af[, 3]), ]
af[af[, 2]=='rs28453868' & grepl('gnomAD', af[, 3]), ]
af[af[, 2]=='rs28654325' & grepl('gnomAD', af[, 3]), ]
af[af[, 2]=='rs1801096' & grepl('gnomAD', af[, 3]), ]
nchar('AGTCGGTCTCTCAAAGGCAACAACACCGGAGGAGGAGGAAAAAGAGAGAGAT')
nchar('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
x <- '44,111,0,70,0,7,2,35,75,15,31,4,4,70,0,116,1,40,13,26,48,14,30,71,39,26,50,40,13,98,8,57,54,8,19,17,19,128,55,105,22,39,65,86,19,29,133,3,36,31,3,112'
x <- strsplit(x, ',')[[1]]
x
length(x)
y <- '24,4,35,32,5,17,6,42,4,52,8,17,9,8,9,11,7,15,4,5,2,3,15,3,17,2,3,8,10,11,15,16,15,3,10,16,9,5,27,10,7,16,27,7,6,6,12,12,13,6,7,17'
(strsplit(y, ','))
(strsplit(y, ','))[[1]]
length((strsplit(y, ','))[[1]])
?which.max
x <- rep(1:2, each=3)
x
which.max(x)
which.min(x)
which.min(rev(x))
rev(x)
350*17
ann <- readRDS('/Users/zhangz/Google Drive/Projects/Chou/2021-08_AMKL_TMD/R/deseq/anno.rds')
ann <- readRDS('/Users/zhangz/Google Drive/Projects/Chou/2021-08_AMKL_TMD/result/deseq/anno.rds')
dim(ann)
head9ann
head(ann)
smp <- readRDS('/Users/zhangz/Google Drive/Projects/Chou/2021-08_AMKL_TMD/result/deseq/sample.rds')
dim(smp)
smp
getwd()
setwd('~/Development/RoCA/')
f <- dir(rec=TRUE)
sz <- file.size(f)
head(sz)
which(f[sz==max(sz)])
f[sz==max(sz)]
names(sz) <- f
sz <- sz[order(sz)]
sz <- rev(sz)
head(sz)
sz[7:12]
file.remove('template/rh/evaluate_primer/random10k.rd')
dir()
file.remove('template/rh/evaluate_primer/random10k.rds')
file.remove('template/rh/evaluate_primer/examples/UPID_380/input/read.rds')
file.remove('template/rh/evaluate_primer/examples/UPID_380/input/random10k.rds ')
file.remove('template/rh/evaluate_primer/examples/UPID_380/input/random10k.rds')