Skip to content

Commit

Permalink
#51 - disable by default for now
Browse files Browse the repository at this point in the history
  • Loading branch information
davmlaw committed Aug 14, 2023
1 parent b28a84d commit fe2c079
Showing 1 changed file with 25 additions and 21 deletions.
46 changes: 25 additions & 21 deletions generate_transcript_data/refseq_transcripts_grch38.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,31 +29,35 @@ if [[ ! -z ${UTA_TRANSCRIPTS} ]]; then
merge_args+=(${uta_cdot_file})
fi

# Historical - these are stored in separate files for annotation/alignments
url=https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/GCF_000001405.40-RS_2023_03/RefSeq_historical_alignments/GCF_000001405.40-RS_2023_03_genomic.gff.gz
annotation_filename=$(basename $url)
if [[ ! -e ${annotation_filename} ]]; then
wget ${url} --output-document=${annotation_filename}
fi
if [[ -z ${GRCH38_REFSEQ_HISTORICAL} ]]; then
echo "Not including RefSeq GRCh38 historical transcripts. Set env variable GRCH38_REFSEQ_HISTORICAL=True to do so"
else
echo "Adding RefSeq GRCh38 historical transcripts"
# Historical - these are stored in separate files for annotation/alignments
url=https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/GCF_000001405.40-RS_2023_03/RefSeq_historical_alignments/GCF_000001405.40-RS_2023_03_genomic.gff.gz
annotation_filename=$(basename $url)
if [[ ! -e ${annotation_filename} ]]; then
wget ${url} --output-document=${annotation_filename}
fi

url=https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/GCF_000001405.40-RS_2023_03/RefSeq_historical_alignments/GCF_000001405.40-RS_2023_03_knownrefseq_alns.gff.gz
alignments_filename=$(basename $url)
if [[ ! -e ${alignments_filename} ]]; then
wget ${url} --output-document=${alignments_filename}
fi
url=https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/GCF_000001405.40-RS_2023_03/RefSeq_historical_alignments/GCF_000001405.40-RS_2023_03_knownrefseq_alns.gff.gz
alignments_filename=$(basename $url)
if [[ ! -e ${alignments_filename} ]]; then
wget ${url} --output-document=${alignments_filename}
fi

filename=GCF_000001405.40-RS_2023_03_combined_annotation_alignments.gff.gz
cdot_file=cdot-${CDOT_VERSION}.$(basename $filename .gz).json.gz
filename=GCF_000001405.40-RS_2023_03_combined_annotation_alignments.gff.gz
cdot_file=cdot-${CDOT_VERSION}.$(basename $filename .gz).json.gz

if [[ ! -e ${filename} ]]; then
echo "Combining historical annotations and alignments..."
cat ${annotation_filename} ${alignments_filename} > ${filename}
fi
if [[ ! -e ${cdot_file} ]]; then
${BASE_DIR}/cdot_json.py gff3_to_json "${filename}" --url "${url}" --genome-build=GRCh38 --output "${cdot_file}" --gene-info-json="${GENE_INFO_JSON}" --skip-missing-parents
if [[ ! -e ${filename} ]]; then
echo "Combining historical annotations and alignments..."
cat ${annotation_filename} ${alignments_filename} > ${filename}
fi
if [[ ! -e ${cdot_file} ]]; then
${BASE_DIR}/cdot_json.py gff3_to_json "${filename}" --url "${url}" --genome-build=GRCh38 --output "${cdot_file}" --gene-info-json="${GENE_INFO_JSON}" --skip-missing-parents
fi
merge_args+=(${cdot_file})
fi
merge_args+=(${cdot_file})


filename=ref_GRCh38_top_level.gff3.gz
url=http://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/ANNOTATION_RELEASE.106/GFF/${filename}
Expand Down

0 comments on commit fe2c079

Please sign in to comment.