From 2d309fa765834c9d38d51348717427c75ab05e09 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Tue, 22 Oct 2024 07:58:11 +0200 Subject: [PATCH] More consistently throw EncodeError Add a fallback method to `encode`, such that encoding into an incompatible alphabet will more consistently throw EncodeError. This will help users catch these errors. --- src/alphabet.jl | 25 +++++++++++++++++++++---- test/alphabet.jl | 12 +++++++++++- test/runtests.jl | 2 +- 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/src/alphabet.jl b/src/alphabet.jl index 06b7ffa6..aaa269d0 100644 --- a/src/alphabet.jl +++ b/src/alphabet.jl @@ -79,14 +79,31 @@ iscomplete(A::Alphabet) = Val(length(symbols(A)) === 1 << bits_per_symbol(A)) ## Encoders & Decoders """ - encode(::Alphabet, x::S) + encode(::Alphabet, s::BioSymbol) - -Encode BioSymbol `S` to an internal representation using an `Alphabet`. +Internal function, do not use in user code. +Encode BioSymbol `s` to an internal representation using an [`Alphabet`](@ref). This decoding is checked to enforce valid data element. +If `s` cannot be encoded to the given alphabet, throw an `EncodeError` + """ -function encode end +encode(A::Alphabet, s::BioSymbol) = throw(EncodeError(A, s)) +""" + EncodeError + +Exception thrown when a `BioSymbol` cannot be encoded to a given [`Alphabet`](@ref). + +# Examples +``` +julia> try + BioSequences.encode(DNAAlphabet{2}(), DNA_N) + catch err + println(err isa BioSequences.EncodeError) + end +true +``` +""" struct EncodeError{A<:Alphabet,T} <: Exception val::T end diff --git a/test/alphabet.jl b/test/alphabet.jl index e95028bd..f74edaa4 100644 --- a/test/alphabet.jl +++ b/test/alphabet.jl @@ -48,6 +48,16 @@ encode = BioSequences.encode EncodeError = BioSequences.EncodeError decode = BioSequences.decode +@testset "EncodeError" begin + @test_throws EncodeError encode(DNAAlphabet{4}(), RNA_U) + @test_throws EncodeError encode(DNAAlphabet{2}(), DNA_M) + @test_throws EncodeError encode(DNAAlphabet{4}(), AA_C) + @test_throws EncodeError encode(AminoAcidAlphabet(), DNA_C) + @test_throws EncodeError encode(AminoAcidAlphabet(), RNA_N) + @test_throws EncodeError encode(RNAAlphabet{2}(), DNA_C) + @test_throws EncodeError encode(RNAAlphabet{2}(), RNA_K) +end + # NOTE: See the docs for the interface of Alphabet struct ReducedAAAlphabet <: Alphabet end @@ -185,4 +195,4 @@ end @test BioSequences.has_interface(Alphabet, RNAAlphabet{2}()) @test BioSequences.has_interface(Alphabet, RNAAlphabet{4}()) @test BioSequences.has_interface(Alphabet, AminoAcidAlphabet()) -end \ No newline at end of file +end diff --git a/test/runtests.jl b/test/runtests.jl index 54f298b1..b21b0c84 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,7 +6,7 @@ using Documenter using Random using StableRNGs using LinearAlgebra: normalize -import BioSymbols +using BioSymbols using BioSequences using StatsBase using YAML