Skip to content

Commit

Permalink
Merge pull request #44 from jakobnissen/noautoma
Browse files Browse the repository at this point in the history
Removed Automa parsing
  • Loading branch information
jakobnissen authored May 9, 2021
2 parents 6f0180f + 460ad17 commit 54bff1a
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 72 deletions.
4 changes: 1 addition & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
name = "BioSymbols"
uuid = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"
authors = ["Ben J. Ward <[email protected]"]
version = "4.0.4"
version = "4.0.5"

[deps]
Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"

[compat]
Automa = "0.8"
julia = "1"

[extras]
Expand Down
2 changes: 0 additions & 2 deletions src/BioSymbols.jl
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,6 @@ export
encoded_data,
encode

import Automa
import Automa.RegExp: @re_str

"""
The BioSymbol type is an abstract type that represents
Expand Down
78 changes: 11 additions & 67 deletions src/aminoacid.jl
Original file line number Diff line number Diff line change
Expand Up @@ -142,73 +142,6 @@ const threeletter_to_aa = Dict(
"PYL" => AA_O, "SEC" => AA_U,
)

# Generate an amino acid parser.
let
re = Automa.RegExp
function aapat(three, aa)
one = convert(Char, aa)
pat = re.alt(
re.cat([re.alt(lowercase(x), uppercase(x)) for x in three]...),
lowercase(one),
uppercase(one))
pat.actions[:exit] = [Symbol(three)]
return pat
end
aminoacids = [
("ALA", AA_A),
("ARG", AA_R),
("ASN", AA_N),
("ASP", AA_D),
("CYS", AA_C),
("GLN", AA_Q),
("GLU", AA_E),
("GLY", AA_G),
("HIS", AA_H),
("ILE", AA_I),
("LEU", AA_L),
("LYS", AA_K),
("MET", AA_M),
("PHE", AA_F),
("PRO", AA_P),
("SER", AA_S),
("THR", AA_T),
("TRP", AA_W),
("TYR", AA_Y),
("VAL", AA_V),
("ASX", AA_B),
("XLE", AA_J),
("GLX", AA_Z),
("XAA", AA_X),
("PYL", AA_O),
("SEC", AA_U),
]
aa_term = re"\*"
aa_term.actions[:exit] = [:Term]
aa_gap = re"-"
aa_gap.actions[:exit] = [:Gap]
whitespaces = re"[ \t\r\n]*"
aminoacid = re.cat(
whitespaces,
re.alt(vcat((aapat(three, aa) for (three, aa) in aminoacids)..., aa_term, aa_gap)...),
whitespaces)
machine = Automa.compile(aminoacid)
actions = Dict(Symbol(three) => :(aa = $(aa)) for (three, aa) in aminoacids)
actions[:Term] = :(aa = AA_Term)
actions[:Gap] = :(aa = AA_Gap)

ctx = Automa.CodeGenContext(checkbounds=false)
@eval function Base.tryparse(::Type{AminoAcid}, data::AbstractString)
$(Automa.generate_init_code(ctx, machine))
p_end = p_eof = sizeof(data)
aa = AA_INVALID
$(Automa.generate_exec_code(ctx, machine, actions))
if cs != 0
return nothing
end
return aa
end
end

function Base.tryparse(::Type{AminoAcid}, c::Char)
@inbounds aa = c <= '\x7f' ? char_to_aa[Int(c)+1] : AA_INVALID
if aa == AA_INVALID
Expand All @@ -218,6 +151,17 @@ function Base.tryparse(::Type{AminoAcid}, c::Char)
end
end

function Base.tryparse(::Type{AminoAcid}, s::AbstractString)
if sizeof(s) == 1
tryparse(AminoAcid, first(s))
else
stripped = strip(s)
sizeof(stripped) == 1 && return tryparse(AminoAcid, first(stripped))
sizeof(stripped) == 3 || return nothing
return get(threeletter_to_aa, uppercase(stripped), nothing)
end
end

function Base.parse(::Type{AminoAcid}, c::Union{AbstractString,Char})
aa = tryparse(AminoAcid, c)
if aa === nothing
Expand Down

0 comments on commit 54bff1a

Please sign in to comment.