-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update findorfstest.jl and runtests.jl
- Loading branch information
1 parent
4def50e
commit 49e5493
Showing
5 changed files
with
177 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
>ORF01 id=01 start=5 stop=22 strand=- frame=2 | ||
MKQND* | ||
>ORF02 id=02 start=37 stop=156 strand=+ frame=1 | ||
MYFSCISGGIVPSSIASRYPLIEVRGERKSWDTLATNFF* | ||
>ORF03 id=03 start=107 stop=136 strand=- frame=2 | ||
MYPTISVPL* | ||
>ORF04 id=04 start=140 stop=160 strand=- frame=2 | ||
MIRKSL* | ||
>ORF05 id=05 start=194 stop=223 strand=- frame=2 | ||
MKSVIWLPV* | ||
>ORF06 id=06 start=194 stop=247 strand=- frame=2 | ||
MNFRSIVRMKSVIWLPV* | ||
>ORF07 id=07 start=194 stop=268 strand=- frame=2 | ||
MLPVICIMNFRSIVRMKSVIWLPV* | ||
>ORF08 id=08 start=194 stop=283 strand=- frame=2 | ||
MPQSSMLPVICIMNFRSIVRMKSVIWLPV* | ||
>ORF09 id=09 start=249 stop=347 strand=+ frame=3 | ||
MHITGSILLCGMSDLPQRNIYDLSEKDQNDQG* | ||
>ORF10 id=10 start=282 stop=347 strand=+ frame=3 | ||
MSDLPQRNIYDLSEKDQNDQG* | ||
>ORF11 id=11 start=334 stop=375 strand=+ frame=1 | ||
MIRDSRNRIARVI* | ||
>ORF12 id=12 start=383 stop=415 strand=- frame=2 | ||
MIFRNPVTTC* | ||
>ORF13 id=13 start=426 stop=590 strand=+ frame=3 | ||
MDDYIAFYLVVGGNGSKYSKYIRIKITEEITNNVIGAAGYGRIKILNDNHILHI* | ||
>ORF14 id=14 start=430 stop=441 strand=+ frame=1 | ||
MTT* | ||
>ORF15 id=15 start=434 stop=439 strand=- frame=2 | ||
M* | ||
>ORF16 id=16 start=446 stop=490 strand=- frame=2 | ||
MYLLYLLPLPPTTR* | ||
>ORF17 id=17 start=466 stop=528 strand=+ frame=1 | ||
MGANTVSTSESKLPKKSPTM* | ||
>ORF18 id=18 start=523 stop=528 strand=+ frame=1 | ||
M* | ||
>ORF19 id=19 start=542 stop=577 strand=- frame=2 | ||
MWLSLRILIRP* | ||
>ORF20 id=20 start=542 stop=586 strand=- frame=2 | ||
MWRMWLSLRILIRP* | ||
>ORF21 id=21 start=565 stop=657 strand=+ frame=1 | ||
MTTTFSTSKGLLVSSTSRPEGLMMRISEFR* | ||
>ORF22 id=22 start=631 stop=657 strand=+ frame=1 | ||
MMRISEFR* | ||
>ORF23 id=23 start=634 stop=657 strand=+ frame=1 | ||
MRISEFR* | ||
>ORF24 id=24 start=650 stop=727 strand=- frame=2 | ||
MPADCTPVKQLWRPLKCSWTPWRST* | ||
>ORF25 id=25 start=786 stop=872 strand=+ frame=3 | ||
MFFGHIGNKSCNNDETKYTKNKICEYKF* | ||
>ORF26 id=26 start=823 stop=876 strand=+ frame=1 | ||
MMKPNTPKIKYASINFR* | ||
>ORF27 id=27 start=826 stop=876 strand=+ frame=1 | ||
MKPNTPKIKYASINFR* | ||
>ORF28 id=28 start=854 stop=865 strand=+ frame=2 | ||
MRV* | ||
>ORF29 id=29 start=887 stop=976 strand=- frame=2 | ||
MPTGRSAPSGASAINSKYVIKSYQKQDLL* | ||
>ORF30 id=30 start=910 stop=918 strand=+ frame=1 | ||
MT* | ||
>ORF31 id=31 start=943 stop=978 strand=+ frame=1 | ||
MPQMVLIFQLA* | ||
>ORF32 id=32 start=952 stop=978 strand=+ frame=1 | ||
MVLIFQLA* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
>ORF01 id=01 start=5 stop=22 strand=- frame=2 | ||
ATGAAACAGAACGACTGA | ||
>ORF02 id=02 start=37 stop=156 strand=+ frame=1 | ||
ATGTATTTTTCGTGCATTTCCGGTGGAATCGTGCCGTCCAGCATAGCCTCCAGATATCCCCTTATAGAGGTCAGAGGGGAACGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGA | ||
>ORF03 id=03 start=107 stop=136 strand=- frame=2 | ||
ATGTATCCCACGATTTCCGTTCCCCTCTGA | ||
>ORF04 id=04 start=140 stop=160 strand=- frame=2 | ||
ATGATCAGAAAAAGTTTGTAG | ||
>ORF05 id=05 start=194 stop=223 strand=- frame=2 | ||
ATGAAATCGGTTATCTGGCTTCCTGTCTGA | ||
>ORF06 id=06 start=194 stop=247 strand=- frame=2 | ||
ATGAATTTCAGGTCGATAGTGAGGATGAAATCGGTTATCTGGCTTCCTGTCTGA | ||
>ORF07 id=07 start=194 stop=268 strand=- frame=2 | ||
ATGCTGCCGGTAATATGCATTATGAATTTCAGGTCGATAGTGAGGATGAAATCGGTTATCTGGCTTCCTGTCTGA | ||
>ORF08 id=08 start=194 stop=283 strand=- frame=2 | ||
ATGCCACAGAGCAGTATGCTGCCGGTAATATGCATTATGAATTTCAGGTCGATAGTGAGGATGAAATCGGTTATCTGGCTTCCTGTCTGA | ||
>ORF09 id=09 start=249 stop=347 strand=+ frame=3 | ||
ATGCATATTACCGGCAGCATACTGCTCTGTGGCATGAGTGATCTTCCTCAGAGGAATATATACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAG | ||
>ORF10 id=10 start=282 stop=347 strand=+ frame=3 | ||
ATGAGTGATCTTCCTCAGAGGAATATATACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAG | ||
>ORF11 id=11 start=334 stop=375 strand=+ frame=1 | ||
ATGATCAGGGATAGCAGGAACAGGATTGCCAGGGTGATATAG | ||
>ORF12 id=12 start=383 stop=415 strand=- frame=2 | ||
ATGATATTCAGAAATCCTGTAACAACCTGCTGA | ||
>ORF13 id=13 start=426 stop=590 strand=+ frame=3 | ||
ATGGATGACTACATAGCCTTTTACCTTGTAGTTGGAGGTAATGGGAGCAAATACAGTAAGTACATCCGAATCAAAATTACCGAAGAAATCACCAACAATGTAATAGGAGCCGCTGGTTACGGTCGAATCAAAATTCTCAATGACAACCACATTCTCCACATCTAA | ||
>ORF14 id=14 start=430 stop=441 strand=+ frame=1 | ||
ATGACTACATAG | ||
>ORF15 id=15 start=434 stop=439 strand=- frame=2 | ||
ATGTAG | ||
>ORF16 id=16 start=446 stop=490 strand=- frame=2 | ||
ATGTACTTACTGTATTTGCTCCCATTACCTCCAACTACAAGGTAA | ||
>ORF17 id=17 start=466 stop=528 strand=+ frame=1 | ||
ATGGGAGCAAATACAGTAAGTACATCCGAATCAAAATTACCGAAGAAATCACCAACAATGTAA | ||
>ORF18 id=18 start=523 stop=528 strand=+ frame=1 | ||
ATGTAA | ||
>ORF19 id=19 start=542 stop=577 strand=- frame=2 | ||
ATGTGGTTGTCATTGAGAATTTTGATTCGACCGTAA | ||
>ORF20 id=20 start=542 stop=586 strand=- frame=2 | ||
ATGTGGAGAATGTGGTTGTCATTGAGAATTTTGATTCGACCGTAA | ||
>ORF21 id=21 start=565 stop=657 strand=+ frame=1 | ||
ATGACAACCACATTCTCCACATCTAAGGGACTATTGGTATCCAGTACCAGTCGTCCGGAGGGATTGATGATGCGAATCTCGGAATTCAGGTAG | ||
>ORF22 id=22 start=631 stop=657 strand=+ frame=1 | ||
ATGATGCGAATCTCGGAATTCAGGTAG | ||
>ORF23 id=23 start=634 stop=657 strand=+ frame=1 | ||
ATGCGAATCTCGGAATTCAGGTAG | ||
>ORF24 id=24 start=650 stop=727 strand=- frame=2 | ||
ATGCCGGCGGATTGTACACCAGTGAAACAACTTTGGAGACCGTTAAAATGCAGCTGGACTCCCTGGCGGTCTACCTGA | ||
>ORF25 id=25 start=786 stop=872 strand=+ frame=3 | ||
ATGTTCTTTGGTCATATTGGGAACAAAAGTTGTAACAATGATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAG | ||
>ORF26 id=26 start=823 stop=876 strand=+ frame=1 | ||
ATGATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAGATAA | ||
>ORF27 id=27 start=826 stop=876 strand=+ frame=1 | ||
ATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAGATAA | ||
>ORF28 id=28 start=854 stop=865 strand=+ frame=2 | ||
ATGCGAGTATAA | ||
>ORF29 id=29 start=887 stop=976 strand=- frame=2 | ||
ATGCCAACTGGAAGATCAGCACCATCTGGGGCATCGGCTATAAATTCGAAGTACGTAATTAAGTCATACCAAAAGCAGGATTTGTTATGA | ||
>ORF30 id=30 start=910 stop=918 strand=+ frame=1 | ||
ATGACTTAA | ||
>ORF31 id=31 start=943 stop=978 strand=+ frame=1 | ||
ATGCCCCAGATGGTGCTGATCTTCCAGTTGGCATGA | ||
>ORF32 id=32 start=952 stop=978 strand=+ frame=1 | ||
ATGGTGCTGATCTTCCAGTTGGCATGA |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# Test the write_orfs_* methods | ||
@testset "write_orfs_f* " begin | ||
|
||
# seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA" | ||
# seq02 = dna"AACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAACAGCACTGGCAATCTGACTGTGGGCGGTGTTACCAACGGCACTGCTACTACTGGCAACATCGCACTGACCGGTAACAATGCGCTGAGCGGTCCGGTCAATCTGAATGCGTCGAATGGCACGGTGACCTTGAACACGACCGGCAATACCACGCTCGGTAACGTGACGGCACAAGGCAATGTGACGACCAATGTGTCCAACGGCAGTCTGACGGTTACCGGCAATACGACAGGTGCCAACACCAACCTCAGTGCCAGCGGCAACCTGACCGTGGGTAACCAGGGCAATATCAGTACCGCAGGCAATGCAACCCTGACGGCCGGCGACAACCTGACGAGCACTGGCAATCTGACTGTGGGCGGCGTCACCAACGGCACGGCCACCACCGGCAACATCGCGCTGACCGGTAACAATGCACTGGCTGGTCCTGTCAATCTGAACGCGCCGAACGGCACCGTGACCCTGAACACAACCGGCAATACCACGCTGGGTAATGTCACCGCACAAGGCAATGTGACGACTAATGTGTCCAACGGCAGCCTGACAGTCGCTGGCAATACCACAGGTGCCAACACCAACCTGAGTGCCAGCGGCAATCTGACCGTGGGCAACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAGC" | ||
|
||
# Test case 1 | ||
|
||
# From pyrodigal issue #13 link: https://github.com/althonos/pyrodigal/blob/1f939b0913b48dbaa55d574b20e124f1b8323825/pyrodigal/tests/test_orf_finder.py#L271 | ||
# Pyrodigal predicts 2 genes from this sequence: | ||
# 1) An alternative start codon (GTG) sequence at 48:347 | ||
# 2) A common start codon sequence at 426:590 | ||
# On the other hand, the NCBI ORFfinder program predicts 9 ORFs whose length is greater than 75 nt, from which one has an "outbound" stop | ||
seq03 = dna"TTCGTCAGTCGTTCTGTTTCATTCAATACGATAGTAATGTATTTTTCGTGCATTTCCGGTGGAATCGTGCCGTCCAGCATAGCCTCCAGATATCCCCTTATAGAGGTCAGAGGGGAACGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGATCATCCTCGGAACGGGCAATTTCGCTTGCCATATAATTCAGACAGGAAGCCAGATAACCGATTTCATCCTCACTATCGACCTGAAATTCATAATGCATATTACCGGCAGCATACTGCTCTGTGGCATGAGTGATCTTCCTCAGAGGAATATATACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAGCAGGAACAGGATTGCCAGGGTGATATAGGAAATATTCAGCAGGTTGTTACAGGATTTCTGAATATCATTCATATCAGTATGGATGACTACATAGCCTTTTACCTTGTAGTTGGAGGTAATGGGAGCAAATACAGTAAGTACATCCGAATCAAAATTACCGAAGAAATCACCAACAATGTAATAGGAGCCGCTGGTTACGGTCGAATCAAAATTCTCAATGACAACCACATTCTCCACATCTAAGGGACTATTGGTATCCAGTACCAGTCGTCCGGAGGGATTGATGATGCGAATCTCGGAATTCAGGTAGACCGCCAGGGAGTCCAGCTGCATTTTAACGGTCTCCAAAGTTGTTTCACTGGTGTACAATCCGCCGGCATAGGTTCCGGCGATCAGGGTTGCTTCGGAATAGAGACTTTCTGCCTTTTCCCGGATCAGATGTTCTTTGGTCATATTGGGAACAAAAGTTGTAACAATGATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAGATAAAGTGTTTTTTTCATAACAAATCCTGCTTTTGGTATGACTTAATTACGTACTTCGAATTTATAGCCGATGCCCCAGATGGTGCTGATCTTCCAGTTGGCATGATCCTTGATCTTCTC" | ||
|
||
seq03fna = "data/out-seq03.fna" | ||
open(seq03fna, "w") do io | ||
write_orfs_fna(seq03, io) | ||
end | ||
|
||
seq03fnarecords = open(collect, FASTAReader, "data/out-seq03.fna") | ||
|
||
@test seq03fnarecords[1] == FASTX.FASTA.Record("ORF01 id=01 start=5 stop=22 strand=- frame=2", "ATGAAACAGAACGACTGA") | ||
@test length(seq03fnarecords) == 32 | ||
@test identifier(seq03fnarecords[1]) == "ORF01" | ||
@test description(seq03fnarecords[1]) == "ORF01 id=01 start=5 stop=22 strand=- frame=2" | ||
@test sequence(seq03fnarecords[1]) == "ATGAAACAGAACGACTGA" | ||
|
||
# Test case 2 | ||
|
||
seq03faa = "data/out-seq03.faa" | ||
open(seq03faa, "w") do io | ||
write_orfs_faa(seq03, io) | ||
end | ||
|
||
seq03faarecords = open(collect, FASTAReader, "data/out-seq03.faa") | ||
|
||
@test seq03faarecords[2] == FASTX.FASTA.Record("ORF02 id=02 start=37 stop=156 strand=+ frame=1", "MYFSCISGGIVPSSIASRYPLIEVRGERKSWDTLATNFF*") | ||
@test length(seq03faarecords) == 32 | ||
@test identifier(seq03faarecords[2]) == "ORF02" | ||
@test description(seq03faarecords[2]) == "ORF02 id=02 start=37 stop=156 strand=+ frame=1" | ||
@test sequence(seq03faarecords[2]) == "MYFSCISGGIVPSSIASRYPLIEVRGERKSWDTLATNFF*" | ||
|
||
# Test case 3 | ||
|
||
@test seq03faarecords[3] == FASTX.FASTA.Record("ORF03 id=03 start=107 stop=136 strand=- frame=2", "MYPTISVPL*") | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters