From 8993aba545dfa296db8a4484ff5cae25a0eda1a4 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 22 Mar 2024 14:22:28 +0100 Subject: [PATCH 1/2] Update `ispalindromic` Expand the documentation to explain that it is palindromic with respect to the reverse-complement. Also implement a fast-path for 2-bit nucleotides. --- src/biosequence/predicates.jl | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/src/biosequence/predicates.jl b/src/biosequence/predicates.jl index 37c2e9d9..e75b9161 100644 --- a/src/biosequence/predicates.jl +++ b/src/biosequence/predicates.jl @@ -59,20 +59,36 @@ end """ - ispalindromic(seq::BioSequence) + ispalindromic(seq::NucSeq) -> Bool + +Check if `seq` is palindromic. A palindromic sequence is identical to its +reverse-complement, so this should be equivalent to checking if +`seq == reverse_complement(seq)`. + +# Examples +```jldoctest +julia> ispalindromic(dna"TGCA") +true + +julia> ispalindromic(dna"TCCT") +false + +julia> ispalindromic(rna"ACGGU") +false +``` Return `true` if `seq` is a palindromic sequence; otherwise return `false`. """ function ispalindromic(seq::BioSequence{<:NucleicAcidAlphabet}) - for i in 1:cld(length(seq), 2) - if seq[i] != complement(seq[end - i + 1]) - return false - end - end + _ispalindromic(seq) +end - return true +# For two-bit alphabets, all odd-length sequences are not palindromic. +function ispalindromic(seq::BioSequence{<:Union{DNAAlphabet{2}, RNAAlphabet{2}}}) + isodd(length(seq)) ? false : _ispalindromic(seq) end +@inline _ispalindromic(seq) = all(seq[i] == complement(seq[end - i + 1]) for i in 1:cld(length(seq), 2)) """ hasambiguity(seq::BioSequence) From 8ebf46109e6615af2e305d3c5b08d67b0ad13156 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 22 Mar 2024 14:34:47 +0100 Subject: [PATCH 2/2] Fixup: Performance --- src/biosequence/predicates.jl | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/biosequence/predicates.jl b/src/biosequence/predicates.jl index e75b9161..c8b3cfea 100644 --- a/src/biosequence/predicates.jl +++ b/src/biosequence/predicates.jl @@ -80,15 +80,21 @@ false Return `true` if `seq` is a palindromic sequence; otherwise return `false`. """ function ispalindromic(seq::BioSequence{<:NucleicAcidAlphabet}) - _ispalindromic(seq) + _ispalindromic(seq) end # For two-bit alphabets, all odd-length sequences are not palindromic. function ispalindromic(seq::BioSequence{<:Union{DNAAlphabet{2}, RNAAlphabet{2}}}) - isodd(length(seq)) ? false : _ispalindromic(seq) + isodd(length(seq)) ? false : _ispalindromic(seq) end -@inline _ispalindromic(seq) = all(seq[i] == complement(seq[end - i + 1]) for i in 1:cld(length(seq), 2)) +@inline function _ispalindromic(seq) + L = lastindex(seq) + for i in 1:cld(length(seq), 2) + seq[i] == complement(seq[L - i + 1]) || return false + end + true +end """ hasambiguity(seq::BioSequence)