Skip to content

Commit

Permalink
Add stringbyte function (#48)
Browse files Browse the repository at this point in the history
Add stringbyte function
  • Loading branch information
jakobnissen authored Oct 30, 2021
1 parent f07b9d7 commit c8e893b
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 4 deletions.
4 changes: 1 addition & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
name = "BioSymbols"
uuid = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"
authors = ["Ben J. Ward <[email protected]>"]
version = "5.0.0"

[deps]
version = "5.1.0"

[compat]
julia = "1"
Expand Down
39 changes: 38 additions & 1 deletion src/BioSymbols.jl
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ export
compatbits,
alphabet,
encoded_data,
encode
encode,
stringbyte


"""
Expand Down Expand Up @@ -146,6 +147,42 @@ Base.broadcastable(x::BioSymbol) = (x,)
include("nucleicacid.jl")
include("aminoacid.jl")

# Less efficient fallback. Should only be called for symbols of AsciiAlphabet
"""
stringbyte(::BioSymbol)::UInt8
For biosymbol types that can be represented as ASCII characters, `stringbyte(x)`
returns the printable ASCII byte that represents the character in a string.
# Examples
```julia
julia> stringbyte(DNA_A) == UInt8('A')
true
julia> stringbyte(AA_Gap) == UInt8('-')
true
```
"""
function stringbyte end

# Create a lookup table from biosymbol to the UInt8 for the character that would
# represent it in a string, e.g. DNA_G -> UInt8('G')
for alphabettype in ("DNA", "RNA", "AminoAcid")
tablename = Symbol(uppercase(alphabettype), "_TO_BYTE")
typ = Symbol(alphabettype)
@eval begin
const $(tablename) = let
alph = alphabet($(typ))
bytes = zeros(UInt8, length(alph))
@inbounds for letter in alph
bytes[reinterpret(UInt8, letter) + 1] = UInt8(Char(letter))
end
Tuple(bytes)
end
stringbyte(x::$(typ)) = @inbounds $(tablename)[reinterpret(UInt8, x) + 1]
end
end

"""
isgap(symbol::BioSymbol)
Expand Down
14 changes: 14 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,14 @@ end
@test encoded_data(RNA_B) === 0b1110
@test encoded_data(RNA_N) === 0b1111
end

@testset "stringbyte" begin
for T in (DNA, RNA)
@test all(alphabet(DNA)) do i
UInt8(Char(i)) == stringbyte(i)
end
end
end
end

@testset "Char" begin
Expand Down Expand Up @@ -409,6 +417,12 @@ end
@test_throws InexactError convert(AminoAcid, '')
end

@testset "stringbyte" begin
@test all(alphabet(AminoAcid)) do i
UInt8(Char(i)) == stringbyte(i)
end
end

@testset "isvalid" begin
for aa in alphabet(AminoAcid)
@test isvalid(aa)
Expand Down

2 comments on commit c8e893b

@jakobnissen
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/47793

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v5.1.0 -m "<description of version>" c8e893b3577763a0e65cf0807ca4fc3f0c2e85f2
git push origin v5.1.0

Please sign in to comment.