Skip to content

Commit

Permalink
WIP: Tryparse
Browse files Browse the repository at this point in the history
Still needs to implement tryconvert of biosymbol, then trysetindex! in order
to have tryparse work generically.
  • Loading branch information
jakobnissen committed Jan 21, 2024
1 parent 97112a3 commit c24e4aa
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 13 deletions.
27 changes: 21 additions & 6 deletions src/longsequences/constructors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -75,17 +75,32 @@ end

Base.parse(::Type{T}, s::AbstractString) where {T <: LongSequence} = parse(T, String(s))

function Base.parse(::Type{LongSequence{A}}, seq::ASCIILike) where {A<:Alphabet}
_parse(LongSequence{A}, seq, codetype(A()))
function Base.parse(T::Type{LongSequence{A}}, s::ASCIILike) where {A<:Alphabet}
C = codetype(A())
src = C isa AsciiAlphabet ? codeunits(s) : s
n = _tryparse(T, s, C)
if n isa Int
throw_encode_error(A(), src, n)
else
n
end
end

function _parse(::Type{LongSequence{A}}, s::ASCIILike, ::AlphabetCode) where {A<:Alphabet}
Base.tryparse(::Type{T}, s::AbstractString) where {T <: LongSequence} = tryparse(T, String(s))

Check warning on line 89 in src/longsequences/constructors.jl

View check run for this annotation

Codecov / codecov/patch

src/longsequences/constructors.jl#L89

Added line #L89 was not covered by tests

function Base.tryparse(::Type{LongSequence{A}}, s::ASCIILike) where {A <: Alphabet}
n = _tryparse(LongSequence{A}, s, codetype(A()))
n isa Int ? nothing : n

Check warning on line 93 in src/longsequences/constructors.jl

View check run for this annotation

Codecov / codecov/patch

src/longsequences/constructors.jl#L91-L93

Added lines #L91 - L93 were not covered by tests
end

function _tryparse(::Type{LongSequence{A}}, s::ASCIILike, ::AlphabetCode) where {A<:Alphabet}
len = length(s)
seq = LongSequence{A}(undef, len)
# TODO!
return copyto!(seq, 1, s, 1, len)
end

function _parse(::Type{LongSequence{A}}, s::ASCIILike, ::AsciiAlphabet) where {A<:Alphabet}
function _tryparse(::Type{LongSequence{A}}, s::ASCIILike, ::AsciiAlphabet) where {A<:Alphabet}
seq = LongSequence{A}(undef, ncodeunits(s))
return encode_chunks!(seq, 1, codeunits(s), 1, ncodeunits(s))
end
try_encode_chunks!(seq, 1, codeunits(s), 1, ncodeunits(s))
end
44 changes: 37 additions & 7 deletions src/longsequences/copying.jl
Original file line number Diff line number Diff line change
Expand Up @@ -165,34 +165,64 @@ end
@assert false "Expected error in encoding"
end

@inline function encode_chunk(A::Alphabet, src::AbstractArray{UInt8}, soff::Integer, N::Integer)
@inline function encode_chunk(
A::Alphabet,
src::AbstractArray{UInt8},
soff::Integer,
N::Integer
)::Union{UInt64, Int}
chunk = zero(UInt64)
check = 0x00
@inbounds for i in 1:N
enc = ascii_encode(A, src[soff+i-1])
check |= enc
chunk |= UInt64(enc) << (bits_per_symbol(A) * (i-1))
end
check & 0x80 == 0x00 || throw_encode_error(A, src, soff)
return chunk
check & 0x80 == 0x00 || return Int(soff)::Int
return chunk::UInt64
end

# Use this for AsiiAlphabet alphabets only, internal use only, no boundschecks.
# This is preferential to `copyto!` if none of the sequence's original content
# needs to be kept, since this is faster.
function encode_chunks!(dst::SeqOrView{A}, startindex::Integer, src::AbstractVector{UInt8},
soff::Integer, N::Integer) where {A <: Alphabet}
function try_encode_chunks!(
dst::SeqOrView{A},
startindex::Integer,
src::AbstractVector{UInt8},
soff::Integer,
N::Integer
)::Union{Int, SeqOrView} where {A <: Alphabet}
chunks, rest = divrem(N, symbols_per_data_element(dst))
@inbounds for i in startindex:startindex+chunks-1
dst.data[i] = encode_chunk(A(), src, soff, symbols_per_data_element(dst))
chunk = encode_chunk(A(), src, soff, symbols_per_data_element(dst))
if chunk isa Int
return chunk
else
dst.data[i] = chunk
end
soff += symbols_per_data_element(dst)
end
@inbounds if !iszero(rest)
dst.data[startindex+chunks] = encode_chunk(A(), src, soff, rest)
chunk = encode_chunk(A(), src, soff, rest)
if chunk isa Int
return chunk
else
dst.data[startindex+chunks] = chunk
end
end
return dst
end

function encode_chunks!(dst::SeqOrView{A},
startindex::Integer,
src::AbstractVector{UInt8},
soff::Integer,
N::Integer
)::SeqOrView where {A <: Alphabet}
s = try_encode_chunks!(dst, startindex, src, soff, N)
s isa Int ? throw_encode_error(A(), src, s) : s
end

#########

# Two-argument method
Expand Down

0 comments on commit c24e4aa

Please sign in to comment.