Skip to content

Commit

Permalink
Update findorfstest.jl and runtests.jl
Browse files Browse the repository at this point in the history
  • Loading branch information
camilogarciabotero committed Jan 11, 2024
1 parent 4def50e commit 49e5493
Show file tree
Hide file tree
Showing 5 changed files with 177 additions and 2 deletions.
64 changes: 64 additions & 0 deletions test/data/out-seq03.faa
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
>ORF01 id=01 start=5 stop=22 strand=- frame=2
MKQND*
>ORF02 id=02 start=37 stop=156 strand=+ frame=1
MYFSCISGGIVPSSIASRYPLIEVRGERKSWDTLATNFF*
>ORF03 id=03 start=107 stop=136 strand=- frame=2
MYPTISVPL*
>ORF04 id=04 start=140 stop=160 strand=- frame=2
MIRKSL*
>ORF05 id=05 start=194 stop=223 strand=- frame=2
MKSVIWLPV*
>ORF06 id=06 start=194 stop=247 strand=- frame=2
MNFRSIVRMKSVIWLPV*
>ORF07 id=07 start=194 stop=268 strand=- frame=2
MLPVICIMNFRSIVRMKSVIWLPV*
>ORF08 id=08 start=194 stop=283 strand=- frame=2
MPQSSMLPVICIMNFRSIVRMKSVIWLPV*
>ORF09 id=09 start=249 stop=347 strand=+ frame=3
MHITGSILLCGMSDLPQRNIYDLSEKDQNDQG*
>ORF10 id=10 start=282 stop=347 strand=+ frame=3
MSDLPQRNIYDLSEKDQNDQG*
>ORF11 id=11 start=334 stop=375 strand=+ frame=1
MIRDSRNRIARVI*
>ORF12 id=12 start=383 stop=415 strand=- frame=2
MIFRNPVTTC*
>ORF13 id=13 start=426 stop=590 strand=+ frame=3
MDDYIAFYLVVGGNGSKYSKYIRIKITEEITNNVIGAAGYGRIKILNDNHILHI*
>ORF14 id=14 start=430 stop=441 strand=+ frame=1
MTT*
>ORF15 id=15 start=434 stop=439 strand=- frame=2
M*
>ORF16 id=16 start=446 stop=490 strand=- frame=2
MYLLYLLPLPPTTR*
>ORF17 id=17 start=466 stop=528 strand=+ frame=1
MGANTVSTSESKLPKKSPTM*
>ORF18 id=18 start=523 stop=528 strand=+ frame=1
M*
>ORF19 id=19 start=542 stop=577 strand=- frame=2
MWLSLRILIRP*
>ORF20 id=20 start=542 stop=586 strand=- frame=2
MWRMWLSLRILIRP*
>ORF21 id=21 start=565 stop=657 strand=+ frame=1
MTTTFSTSKGLLVSSTSRPEGLMMRISEFR*
>ORF22 id=22 start=631 stop=657 strand=+ frame=1
MMRISEFR*
>ORF23 id=23 start=634 stop=657 strand=+ frame=1
MRISEFR*
>ORF24 id=24 start=650 stop=727 strand=- frame=2
MPADCTPVKQLWRPLKCSWTPWRST*
>ORF25 id=25 start=786 stop=872 strand=+ frame=3
MFFGHIGNKSCNNDETKYTKNKICEYKF*
>ORF26 id=26 start=823 stop=876 strand=+ frame=1
MMKPNTPKIKYASINFR*
>ORF27 id=27 start=826 stop=876 strand=+ frame=1
MKPNTPKIKYASINFR*
>ORF28 id=28 start=854 stop=865 strand=+ frame=2
MRV*
>ORF29 id=29 start=887 stop=976 strand=- frame=2
MPTGRSAPSGASAINSKYVIKSYQKQDLL*
>ORF30 id=30 start=910 stop=918 strand=+ frame=1
MT*
>ORF31 id=31 start=943 stop=978 strand=+ frame=1
MPQMVLIFQLA*
>ORF32 id=32 start=952 stop=978 strand=+ frame=1
MVLIFQLA*
64 changes: 64 additions & 0 deletions test/data/out-seq03.fna
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
>ORF01 id=01 start=5 stop=22 strand=- frame=2
ATGAAACAGAACGACTGA
>ORF02 id=02 start=37 stop=156 strand=+ frame=1
ATGTATTTTTCGTGCATTTCCGGTGGAATCGTGCCGTCCAGCATAGCCTCCAGATATCCCCTTATAGAGGTCAGAGGGGAACGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGA
>ORF03 id=03 start=107 stop=136 strand=- frame=2
ATGTATCCCACGATTTCCGTTCCCCTCTGA
>ORF04 id=04 start=140 stop=160 strand=- frame=2
ATGATCAGAAAAAGTTTGTAG
>ORF05 id=05 start=194 stop=223 strand=- frame=2
ATGAAATCGGTTATCTGGCTTCCTGTCTGA
>ORF06 id=06 start=194 stop=247 strand=- frame=2
ATGAATTTCAGGTCGATAGTGAGGATGAAATCGGTTATCTGGCTTCCTGTCTGA
>ORF07 id=07 start=194 stop=268 strand=- frame=2
ATGCTGCCGGTAATATGCATTATGAATTTCAGGTCGATAGTGAGGATGAAATCGGTTATCTGGCTTCCTGTCTGA
>ORF08 id=08 start=194 stop=283 strand=- frame=2
ATGCCACAGAGCAGTATGCTGCCGGTAATATGCATTATGAATTTCAGGTCGATAGTGAGGATGAAATCGGTTATCTGGCTTCCTGTCTGA
>ORF09 id=09 start=249 stop=347 strand=+ frame=3
ATGCATATTACCGGCAGCATACTGCTCTGTGGCATGAGTGATCTTCCTCAGAGGAATATATACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAG
>ORF10 id=10 start=282 stop=347 strand=+ frame=3
ATGAGTGATCTTCCTCAGAGGAATATATACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAG
>ORF11 id=11 start=334 stop=375 strand=+ frame=1
ATGATCAGGGATAGCAGGAACAGGATTGCCAGGGTGATATAG
>ORF12 id=12 start=383 stop=415 strand=- frame=2
ATGATATTCAGAAATCCTGTAACAACCTGCTGA
>ORF13 id=13 start=426 stop=590 strand=+ frame=3
ATGGATGACTACATAGCCTTTTACCTTGTAGTTGGAGGTAATGGGAGCAAATACAGTAAGTACATCCGAATCAAAATTACCGAAGAAATCACCAACAATGTAATAGGAGCCGCTGGTTACGGTCGAATCAAAATTCTCAATGACAACCACATTCTCCACATCTAA
>ORF14 id=14 start=430 stop=441 strand=+ frame=1
ATGACTACATAG
>ORF15 id=15 start=434 stop=439 strand=- frame=2
ATGTAG
>ORF16 id=16 start=446 stop=490 strand=- frame=2
ATGTACTTACTGTATTTGCTCCCATTACCTCCAACTACAAGGTAA
>ORF17 id=17 start=466 stop=528 strand=+ frame=1
ATGGGAGCAAATACAGTAAGTACATCCGAATCAAAATTACCGAAGAAATCACCAACAATGTAA
>ORF18 id=18 start=523 stop=528 strand=+ frame=1
ATGTAA
>ORF19 id=19 start=542 stop=577 strand=- frame=2
ATGTGGTTGTCATTGAGAATTTTGATTCGACCGTAA
>ORF20 id=20 start=542 stop=586 strand=- frame=2
ATGTGGAGAATGTGGTTGTCATTGAGAATTTTGATTCGACCGTAA
>ORF21 id=21 start=565 stop=657 strand=+ frame=1
ATGACAACCACATTCTCCACATCTAAGGGACTATTGGTATCCAGTACCAGTCGTCCGGAGGGATTGATGATGCGAATCTCGGAATTCAGGTAG
>ORF22 id=22 start=631 stop=657 strand=+ frame=1
ATGATGCGAATCTCGGAATTCAGGTAG
>ORF23 id=23 start=634 stop=657 strand=+ frame=1
ATGCGAATCTCGGAATTCAGGTAG
>ORF24 id=24 start=650 stop=727 strand=- frame=2
ATGCCGGCGGATTGTACACCAGTGAAACAACTTTGGAGACCGTTAAAATGCAGCTGGACTCCCTGGCGGTCTACCTGA
>ORF25 id=25 start=786 stop=872 strand=+ frame=3
ATGTTCTTTGGTCATATTGGGAACAAAAGTTGTAACAATGATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAG
>ORF26 id=26 start=823 stop=876 strand=+ frame=1
ATGATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAGATAA
>ORF27 id=27 start=826 stop=876 strand=+ frame=1
ATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAGATAA
>ORF28 id=28 start=854 stop=865 strand=+ frame=2
ATGCGAGTATAA
>ORF29 id=29 start=887 stop=976 strand=- frame=2
ATGCCAACTGGAAGATCAGCACCATCTGGGGCATCGGCTATAAATTCGAAGTACGTAATTAAGTCATACCAAAAGCAGGATTTGTTATGA
>ORF30 id=30 start=910 stop=918 strand=+ frame=1
ATGACTTAA
>ORF31 id=31 start=943 stop=978 strand=+ frame=1
ATGCCCCAGATGGTGCTGATCTTCCAGTTGGCATGA
>ORF32 id=32 start=952 stop=978 strand=+ frame=1
ATGGTGCTGATCTTCCAGTTGGCATGA
2 changes: 0 additions & 2 deletions test/findorfstest.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
# findorfs (GeneFinder.jl) --> 885
# NCBI ORFfinder --> 375 ORFs
# orfipy --> 375 (`orfipy NC_001416.1.fasta --start ATG --include-stop --min 75`)
# NC_001416 = fasta_to_dna("../../test/data/NC_001416.1.fasta")[1]
NC_001416 = fasta_to_dna("data/NC_001416.1.fasta")[1]
NC_001416_orfs = findorfs(NC_001416, min_len=75)
@test length(NC_001416_orfs) == 885
Expand All @@ -42,7 +41,6 @@ end
@testset "get_orfs_dna" begin

seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
sseq01 = "ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
orfseqs = get_orfs_dna(seq01)

@test length(orfseqs) == 5
Expand Down
48 changes: 48 additions & 0 deletions test/iotest.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Test the write_orfs_* methods
@testset "write_orfs_f* " begin

# seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
# seq02 = dna"AACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAACAGCACTGGCAATCTGACTGTGGGCGGTGTTACCAACGGCACTGCTACTACTGGCAACATCGCACTGACCGGTAACAATGCGCTGAGCGGTCCGGTCAATCTGAATGCGTCGAATGGCACGGTGACCTTGAACACGACCGGCAATACCACGCTCGGTAACGTGACGGCACAAGGCAATGTGACGACCAATGTGTCCAACGGCAGTCTGACGGTTACCGGCAATACGACAGGTGCCAACACCAACCTCAGTGCCAGCGGCAACCTGACCGTGGGTAACCAGGGCAATATCAGTACCGCAGGCAATGCAACCCTGACGGCCGGCGACAACCTGACGAGCACTGGCAATCTGACTGTGGGCGGCGTCACCAACGGCACGGCCACCACCGGCAACATCGCGCTGACCGGTAACAATGCACTGGCTGGTCCTGTCAATCTGAACGCGCCGAACGGCACCGTGACCCTGAACACAACCGGCAATACCACGCTGGGTAATGTCACCGCACAAGGCAATGTGACGACTAATGTGTCCAACGGCAGCCTGACAGTCGCTGGCAATACCACAGGTGCCAACACCAACCTGAGTGCCAGCGGCAATCTGACCGTGGGCAACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAGC"

# Test case 1

# From pyrodigal issue #13 link: https://github.com/althonos/pyrodigal/blob/1f939b0913b48dbaa55d574b20e124f1b8323825/pyrodigal/tests/test_orf_finder.py#L271
# Pyrodigal predicts 2 genes from this sequence:
# 1) An alternative start codon (GTG) sequence at 48:347
# 2) A common start codon sequence at 426:590
# On the other hand, the NCBI ORFfinder program predicts 9 ORFs whose length is greater than 75 nt, from which one has an "outbound" stop
seq03 = dna"TTCGTCAGTCGTTCTGTTTCATTCAATACGATAGTAATGTATTTTTCGTGCATTTCCGGTGGAATCGTGCCGTCCAGCATAGCCTCCAGATATCCCCTTATAGAGGTCAGAGGGGAACGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGATCATCCTCGGAACGGGCAATTTCGCTTGCCATATAATTCAGACAGGAAGCCAGATAACCGATTTCATCCTCACTATCGACCTGAAATTCATAATGCATATTACCGGCAGCATACTGCTCTGTGGCATGAGTGATCTTCCTCAGAGGAATATATACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAGCAGGAACAGGATTGCCAGGGTGATATAGGAAATATTCAGCAGGTTGTTACAGGATTTCTGAATATCATTCATATCAGTATGGATGACTACATAGCCTTTTACCTTGTAGTTGGAGGTAATGGGAGCAAATACAGTAAGTACATCCGAATCAAAATTACCGAAGAAATCACCAACAATGTAATAGGAGCCGCTGGTTACGGTCGAATCAAAATTCTCAATGACAACCACATTCTCCACATCTAAGGGACTATTGGTATCCAGTACCAGTCGTCCGGAGGGATTGATGATGCGAATCTCGGAATTCAGGTAGACCGCCAGGGAGTCCAGCTGCATTTTAACGGTCTCCAAAGTTGTTTCACTGGTGTACAATCCGCCGGCATAGGTTCCGGCGATCAGGGTTGCTTCGGAATAGAGACTTTCTGCCTTTTCCCGGATCAGATGTTCTTTGGTCATATTGGGAACAAAAGTTGTAACAATGATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAGATAAAGTGTTTTTTTCATAACAAATCCTGCTTTTGGTATGACTTAATTACGTACTTCGAATTTATAGCCGATGCCCCAGATGGTGCTGATCTTCCAGTTGGCATGATCCTTGATCTTCTC"

seq03fna = "data/out-seq03.fna"
open(seq03fna, "w") do io
write_orfs_fna(seq03, io)
end

seq03fnarecords = open(collect, FASTAReader, "data/out-seq03.fna")

@test seq03fnarecords[1] == FASTX.FASTA.Record("ORF01 id=01 start=5 stop=22 strand=- frame=2", "ATGAAACAGAACGACTGA")
@test length(seq03fnarecords) == 32
@test identifier(seq03fnarecords[1]) == "ORF01"
@test description(seq03fnarecords[1]) == "ORF01 id=01 start=5 stop=22 strand=- frame=2"
@test sequence(seq03fnarecords[1]) == "ATGAAACAGAACGACTGA"

# Test case 2

seq03faa = "data/out-seq03.faa"
open(seq03faa, "w") do io
write_orfs_faa(seq03, io)
end

seq03faarecords = open(collect, FASTAReader, "data/out-seq03.faa")

@test seq03faarecords[2] == FASTX.FASTA.Record("ORF02 id=02 start=37 stop=156 strand=+ frame=1", "MYFSCISGGIVPSSIASRYPLIEVRGERKSWDTLATNFF*")
@test length(seq03faarecords) == 32
@test identifier(seq03faarecords[2]) == "ORF02"
@test description(seq03faarecords[2]) == "ORF02 id=02 start=37 stop=156 strand=+ frame=1"
@test sequence(seq03faarecords[2]) == "MYFSCISGGIVPSSIASRYPLIEVRGERKSWDTLATNFF*"

# Test case 3

@test seq03faarecords[3] == FASTX.FASTA.Record("ORF03 id=03 start=107 stop=136 strand=- frame=2", "MYPTISVPL*")

end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ using GeneFinder
using Aqua

include("findorfstest.jl")
include("iotest.jl")
include("aquatest.jl")

end
Expand Down

0 comments on commit 49e5493

Please sign in to comment.