forked from bioxfu/circRNAFinder
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTutorial.sh
executable file
·114 lines (92 loc) · 5.02 KB
/
Tutorial.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#! /usr/bin/bash
# This tutorial helps you get started with circRNAFinder by demonstrating how to detect, annotate and visualize circRNAs using public RNA-Seq data. It also could be used as a template to build a pipeline for your own data.
# Step1: Create a working directory
mkdir example
cd example
# Step2: Download and build human genome/transcript sequences and gene annotations. (You only have to do it once.)
# The genome/transcript sequences are downloaded from USCS Genome Browser or Ensembl database in FASTA format and built into Bowtie indices. The latest version of gene annotations are downloaded from Ensembl database in GTF format. The longest transcript for each gene are extracted and converted into a custom format. Currently, SeqAnnoDownloadBuild.py supports the genome of human(hg19), mouse(mm10) and Arabidopsis(TAIR10).
mkdir build
cd build
SeqAnnoDownloadBuild.py hg19 --download --genome
SeqAnnoDownloadBuild.py hg19 --build --genome
SeqAnnoDownloadBuild.py hg19 --download --transcriptome
SeqAnnoDownloadBuild.py hg19 --build --transcriptome
SeqAnnoDownloadBuild.py hg19 --download --annotation
SeqAnnoDownloadBuild.py hg19 --build --annotation
cd ..
## Step3: Create fasta folder, download the public RNA-Seq data from SRA database
mkdir fasta
cd fasta
## Download .sra files manually at
# http://www.ncbi.nlm.nih.gov/sra/SRX218203 (RIBOMINUS_HEK293)
# http://www.ncbi.nlm.nih.gov/sra/SRX105931 (CD19)
# http://www.ncbi.nlm.nih.gov/sra/SRX105932 (CD34)
# http://www.ncbi.nlm.nih.gov/sra/SRX105933 (neutrophils)
# Or if you've installed the Aspera, you can download them using the following commands in terminal:
# ~/.aspera/connect/bin/ascp -QTr -k1 -l640M -L log --retry-timeout=10 -i ~/.aspera/connect/etc/asperaweb_id_dsa.putty [email protected]:sra/sra-instant/reads/ByRun/sra/SRR/SRR650/SRR650317/SRR650317.sra .
# ~/.aspera/connect/bin/ascp -QTr -k1 -l640M -L log --retry-timeout=10 -i ~/.aspera/connect/etc/asperaweb_id_dsa.putty [email protected]:sra/sra-instant/reads/ByRun/sra/SRR/SRR364/SRR364679/SRR364679.sra .
# ~/.aspera/connect/bin/ascp -QTr -k1 -l640M -L log --retry-timeout=10 -i ~/.aspera/connect/etc/asperaweb_id_dsa.putty [email protected]:sra/sra-instant/reads/ByRun/sra/SRR/SRR364/SRR364680/SRR364680.sra .
# ~/.aspera/connect/bin/ascp -QTr -k1 -l640M -L log --retry-timeout=10 -i ~/.aspera/connect/etc/asperaweb_id_dsa.putty [email protected]:sra/sra-instant/reads/ByRun/sra/SRR/SRR364/SRR364681/SRR364681.sra .
# ~/.aspera/connect/bin/ascp -QTr -k1 -l640M -L log --retry-timeout=10 -i ~/.aspera/connect/etc/asperaweb_id_dsa.putty [email protected]:sra/sra-instant/reads/ByRun/sra/SRR/SRR384/SRR384963/SRR384963.sra .
# ~/.aspera/connect/bin/ascp -QTr -k1 -l640M -L log --retry-timeout=10 -i ~/.aspera/connect/etc/asperaweb_id_dsa.putty [email protected]:sra/sra-instant/reads/ByRun/sra/SRR/SRR384/SRR384964/SRR384964.sra .
# ~/.aspera/connect/bin/ascp -QTr -k1 -l640M -L log --retry-timeout=10 -i ~/.aspera/connect/etc/asperaweb_id_dsa.putty [email protected]:sra/sra-instant/reads/ByRun/sra/SRR/SRR384/SRR384965/SRR384965.sra .
## Step4: Convert .sra files into FASTA format, collapse the identical reads
# HEK293 (Paired-end reads) #
sra2fasta.py -p SRR650317.sra
fastx_collapser.py -f SRR650317_1.fasta.gz,SRR650317_2.fasta.gz HEK293.fa
# CD19 (Single-end reads) #
sra2fasta.py -s SRR364679.sra
sra2fasta.py -s SRR384963.sra
fastx_collapser.py -f SRR364679.fasta.gz,SRR384963.fasta.gz CD19.fa
# CD34 (Single-end reads) #
sra2fasta.py -s SRR364680.sra
sra2fasta.py -s SRR384964.sra
fastx_collapser.py -f SRR364680.fasta.gz,SRR384964.fasta.gz CD34.fa
# neutrophils (Single-end reads) #
sra2fasta.py -s SRR364681.sra
sra2fasta.py -s SRR384965.sra
fastx_collapser.py -f SRR364681.fasta.gz,SRR384965.fasta.gz neutrophils.fa
cd ..
## Step5: Edit configure files
# Example circRNAFind.cfg file
#[lib1]
#sample_name = HEK293
#reads_file = ./fasta/HEK293.fa
#genome_index = ./build/hg19_dna
#trans_index = ./build/hg19_trans
#genome_seq = ./build/hg19_dna.fa
#[lib2]
#sample_name = CD34
#reads_file = ./fasta/CD34.fa
#genome_index = ./build/hg19_dna
#trans_index = ./build/hg19_trans
#genome_seq = ./build/hg19_dna.fa
#[lib3]
#sample_name = CD19
#reads_file = ./fasta/CD19.fa
#genome_index = ./build/hg19_dna
#trans_index = ./build/hg19_trans
#genome_seq = ./build/hg19_dna.fa
#[lib4]
#sample_name = neutrophils
#reads_file = ./fasta/neutrophils.fa
#genome_index = ./build/hg19_dna
#trans_index = ./build/hg19_trans
#genome_seq = ./build/hg19_dna.fa
# Example circRNAAnno.cfg file
#[group1]
#sample_names = CD19,CD34,neutrophils,HEK293
#genome_index = ./build/hg19_dna
#genome_seq = ./build/hg19_dna.fa
#gene_bed = ./build/hg19.gtf.build
#id_prefix = hsa
# Example circRNAView.cfg file
#[group1]
#gff_file = ./group1/table/combLib_annoTab.gff
#png_folder = ./group1/IGV/
#Genome = hg19
## Step6: Run circRNAFinder
circRNAFind.py circRNAFind.cfg -s 0
circRNAAnno.py circRNAAnno.cfg -s 0
igv.sh &
circRNAView.py circRNAView.cfg