-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
106 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Generated by Django 3.2.1 on 2021-08-24 11:21 | ||
|
||
from django.db import migrations, models | ||
import django.db.models.deletion | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('genes', '0032_alter_geneannotationrelease_version'), | ||
] | ||
|
||
operations = [ | ||
migrations.AlterField( | ||
model_name='geneversion', | ||
name='gene_symbol', | ||
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='genes.genesymbol'), | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
4 changes: 2 additions & 2 deletions
4
genes/scripts/ensembl/download_ensembl_gene_annotation_grch38.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,12 @@ | ||
#!/bin/bash | ||
|
||
ftp://ftp.ensembl.org/pub/release-78/gtf/homo_sapiens/Homo_sapiens.GRCh38.78.gtf.gz | ||
wget ftp://ftp.ensembl.org/pub/release-78/gtf/homo_sapiens/Homo_sapiens.GRCh38.78.gtf.gz | ||
|
||
for release in 76 77 78 79 80; do | ||
wget ftp://ftp.ensembl.org/pub/release-${release}/gtf/homo_sapiens/Homo_sapiens.GRCh38.${release}.gtf.gz | ||
done | ||
|
||
#81 is first GFF3 for GRCh38 | ||
for release in 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101; do | ||
for release in 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104; do | ||
wget ftp://ftp.ensembl.org/pub/release-${release}/gff3/homo_sapiens/Homo_sapiens.GRCh38.${release}.gff3.gz | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
#!/bin/bash | ||
|
||
set -e | ||
|
||
# VG dir | ||
if [ -z $VG_DIR ]; then | ||
echo "You need to define 'VG_DIR'" | ||
exit 1; | ||
fi | ||
|
||
GFF3_TO_GENEPRED=$(which gff3ToGenePred) | ||
if [ -z ${GFF3_TO_GENEPRED} ]; then | ||
echo "Downloading gff3ToGenePred command line tool" | ||
wget hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/gff3ToGenePred | ||
chmod a+x gff3ToGenePred | ||
GFF3_TO_GENEPRED=./gff3ToGenePred | ||
fi | ||
|
||
|
||
# Ensembl | ||
echo "Downloading ENSEMBL" | ||
mkdir -p ensembl | ||
cd ensembl | ||
|
||
if false; then | ||
for release in 102 103 104; do | ||
gff=Homo_sapiens.GRCh38.${release}.gff3.gz; | ||
GENEPRED="$(basename ${gff} .gff.gz).genePred" | ||
echo "GenePred = ${GENEPRED}"; | ||
|
||
if [[ ! -e ${gff} ]]; then | ||
wget ftp://ftp.ensembl.org/pub/release-${release}/gff3/homo_sapiens/${gff} | ||
fi | ||
|
||
if [[ ! -e ${GENEPRED} ]]; then | ||
${GFF3_TO_GENEPRED} -processAllGeneChildren ${gff} ${GENEPRED} | ||
fi | ||
|
||
echo "Inserting gene annotation" | ||
|
||
python3.8 ${VG_DIR}/manage.py import_gene_annotation --genome-build=GRCh38 --replace --annotation-consortium=Ensembl \ | ||
--gff ${gff} \ | ||
--genePred ${GENEPRED} | ||
|
||
done | ||
fi | ||
|
||
cd .. | ||
|
||
# RefSeq | ||
echo "Downloading RefSeq" | ||
|
||
mkdir -p refseq | ||
cd refseq | ||
|
||
for release in 109.20210226 109.20210514; do | ||
gff=GCF_000001405.39_GRCh38.p13_genomic.${release}.gff.gz | ||
GENEPRED="$(basename ${gff} .gff.gz).genePred" | ||
|
||
if [[ ! -e ${gff} ]]; then | ||
echo "Downloading '${gff}'" | ||
# FTP is corrupt, trying http | ||
|
||
wget http://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/annotation/annotation_releases/${release}/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_genomic.gff.gz | ||
mv GCF_000001405.39_GRCh38.p13_genomic.gff.gz ${gff} | ||
fi | ||
|
||
if [[ ! -e ${GENEPRED} ]]; then | ||
${GFF3_TO_GENEPRED} -processAllGeneChildren -maxParseErrors=-1 -geneNameAttr=Name -rnaNameAttr=transcript_id ${gff} ${GENEPRED} | ||
fi | ||
|
||
python3.8 ${VG_DIR}/manage.py import_gene_annotation --genome-build=GRCh38 --replace --annotation-consortium=RefSeq \ | ||
--gff ${gff} \ | ||
--genePred ${GENEPRED} | ||
|
||
done | ||
|
||
cd .. |