diff --git a/Project.toml b/Project.toml index 9ee6311..b891a7c 100644 --- a/Project.toml +++ b/Project.toml @@ -1,9 +1,7 @@ name = "BioSymbols" uuid = "3c28c6f8-a34d-59c4-9654-267d177fcfa9" authors = ["Ben J. Ward "] -version = "5.0.0" - -[deps] +version = "5.1.0" [compat] julia = "1" diff --git a/src/BioSymbols.jl b/src/BioSymbols.jl index 8172f66..20ae7bd 100644 --- a/src/BioSymbols.jl +++ b/src/BioSymbols.jl @@ -116,7 +116,8 @@ export compatbits, alphabet, encoded_data, - encode + encode, + stringbyte """ @@ -146,6 +147,42 @@ Base.broadcastable(x::BioSymbol) = (x,) include("nucleicacid.jl") include("aminoacid.jl") +# Less efficient fallback. Should only be called for symbols of AsciiAlphabet +""" + stringbyte(::BioSymbol)::UInt8 + +For biosymbol types that can be represented as ASCII characters, `stringbyte(x)` +returns the printable ASCII byte that represents the character in a string. + +# Examples +```julia +julia> stringbyte(DNA_A) == UInt8('A') +true + +julia> stringbyte(AA_Gap) == UInt8('-') +true +``` +""" +function stringbyte end + +# Create a lookup table from biosymbol to the UInt8 for the character that would +# represent it in a string, e.g. DNA_G -> UInt8('G') +for alphabettype in ("DNA", "RNA", "AminoAcid") + tablename = Symbol(uppercase(alphabettype), "_TO_BYTE") + typ = Symbol(alphabettype) + @eval begin + const $(tablename) = let + alph = alphabet($(typ)) + bytes = zeros(UInt8, length(alph)) + @inbounds for letter in alph + bytes[reinterpret(UInt8, letter) + 1] = UInt8(Char(letter)) + end + Tuple(bytes) + end + stringbyte(x::$(typ)) = @inbounds $(tablename)[reinterpret(UInt8, x) + 1] + end +end + """ isgap(symbol::BioSymbol) diff --git a/test/runtests.jl b/test/runtests.jl index 4877d8f..505e7a2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -89,6 +89,14 @@ end @test encoded_data(RNA_B) === 0b1110 @test encoded_data(RNA_N) === 0b1111 end + + @testset "stringbyte" begin + for T in (DNA, RNA) + @test all(alphabet(DNA)) do i + UInt8(Char(i)) == stringbyte(i) + end + end + end end @testset "Char" begin @@ -409,6 +417,12 @@ end @test_throws InexactError convert(AminoAcid, '亜') end + @testset "stringbyte" begin + @test all(alphabet(AminoAcid)) do i + UInt8(Char(i)) == stringbyte(i) + end + end + @testset "isvalid" begin for aa in alphabet(AminoAcid) @test isvalid(aa)