Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pure-haskell "memchr" #555

Merged
merged 5 commits into from
Feb 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions src/Data/Text.hs
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ import Data.Binary (Binary(get, put))
import Data.Monoid (Monoid(..))
import Data.Semigroup (Semigroup(..))
import Data.String (IsString(..))
import Data.Text.Internal.ArrayUtils (memchr)
import Data.Text.Internal.IsAscii (isAscii)
import Data.Text.Internal.Reverse (reverse)
import Data.Text.Internal.Measure (measure_off)
Expand All @@ -254,7 +255,7 @@ import qualified Data.Text.Lazy as L
#endif
import Data.Word (Word8)
import Foreign.C.Types
import GHC.Base (eqInt, neInt, gtInt, geInt, ltInt, leInt, ByteArray#)
import GHC.Base (eqInt, neInt, gtInt, geInt, ltInt, leInt)
import qualified GHC.Exts as Exts
import GHC.Int (Int8)
import GHC.Stack (HasCallStack)
Expand Down Expand Up @@ -1864,13 +1865,9 @@ lines (Text arr@(A.ByteArray arr#) off len) = go off
| delta < 0 = [Text arr n (len + off - n)]
| otherwise = Text arr n delta : go (n + delta + 1)
where
delta = cSsizeToInt $
memchr arr# (intToCSize n) (intToCSize (len + off - n)) 0x0A
delta = memchr arr# n (len + off - n) 0x0A
{-# INLINE lines #-}

foreign import ccall unsafe "_hs_text_memchr" memchr
:: ByteArray# -> CSize -> CSize -> Word8 -> CSsize

-- | /O(n)/ Joins lines, after appending a terminating newline to
-- each.
unlines :: [Text] -> Text
Expand Down
33 changes: 33 additions & 0 deletions src/Data/Text/Internal/ArrayUtils.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnliftedFFITypes #-}
{-# LANGUAGE CPP #-}

module Data.Text.Internal.ArrayUtils (memchr) where

#if defined(PURE_HASKELL)
import qualified Data.Text.Array as A
import Data.List (elemIndex)
#else
import Foreign.C.Types
import System.Posix.Types (CSsize(..))
#endif
import GHC.Exts (ByteArray#)
import Data.Word (Word8)

memchr :: ByteArray# -> Int -> Int -> Word8 -> Int
#if defined(PURE_HASKELL)
memchr arr# off len w =
let tempBa = A.ByteArray arr#
in case elemIndex w (A.toList tempBa off len) of
Nothing -> -1
Just i -> i
#else
memchr arr# off len w = fromIntegral $ c_memchr arr# (intToCSize off) (intToCSize len) w

intToCSize :: Int -> CSize
intToCSize = fromIntegral


foreign import ccall unsafe "_hs_text_memchr" c_memchr
:: ByteArray# -> CSize -> CSize -> Word8 -> CSsize
#endif
20 changes: 4 additions & 16 deletions src/Data/Text/Internal/Lazy/Search.hs
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,15 @@ module Data.Text.Internal.Lazy.Search
indices
) where

import Data.Bits (unsafeShiftL)
import Data.Bits (unsafeShiftL, (.|.), (.&.))
import qualified Data.Text.Array as A
import Data.Int (Int64)
import Data.Word (Word8, Word64)
import qualified Data.Text.Internal as T
import qualified Data.Text as T (concat, isPrefixOf)
import Data.Text.Internal.ArrayUtils (memchr)
import Data.Text.Internal.Fusion.Types (PairS(..))
import Data.Text.Internal.Lazy (Text(..), foldrChunks)
import Data.Bits ((.|.), (.&.))
import Foreign.C.Types
import GHC.Exts (ByteArray#)
import System.Posix.Types (CSsize(..))

-- | /O(n+m)/ Find the offsets of all non-overlapping indices of
-- @needle@ within @haystack@.
Expand Down Expand Up @@ -66,9 +63,9 @@ indices needle
delta | nextInPattern = nlen + 1
| c == z = skip + 1
| l >= i + nlen = case
memchr xarr# (intToCSize (xoff + i + nlen)) (intToCSize (l - i - nlen)) z of
memchr xarr# (xoff + i + nlen) (l - i - nlen) z of
-1 -> max 1 (l - i - nlen)
s -> cSsizeToInt s + 1
s -> s + 1
| otherwise = 1
nextInPattern = mask .&. swizzle (index xxs (i + nlen)) == 0

Expand Down Expand Up @@ -133,12 +130,3 @@ intToInt64 = fromIntegral

word8ToInt :: Word8 -> Int
word8ToInt = fromIntegral

intToCSize :: Int -> CSize
intToCSize = fromIntegral

cSsizeToInt :: CSsize -> Int
cSsizeToInt = fromIntegral

foreign import ccall unsafe "_hs_text_memchr" memchr
:: ByteArray# -> CSize -> CSize -> Word8 -> CSsize
17 changes: 3 additions & 14 deletions src/Data/Text/Internal/Search.hs
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,7 @@ import qualified Data.Text.Array as A
import Data.Word (Word64, Word8)
import Data.Text.Internal (Text(..))
import Data.Bits ((.|.), (.&.), unsafeShiftL)
import Foreign.C.Types
import GHC.Exts (ByteArray#)
import System.Posix.Types (CSsize(..))
import Data.Text.Internal.ArrayUtils (memchr)

data T = {-# UNPACK #-} !Word64 :* {-# UNPACK #-} !Int

Expand Down Expand Up @@ -87,9 +85,9 @@ indices' (Text narr noff nlen) (Text harr@(A.ByteArray harr#) hoff hlen) = loop
| mask .&. swizzle (A.unsafeIndex harr i) == 0
= loop (i + nlen + 1)
| otherwise
= case memchr harr# (intToCSize i) (intToCSize (hlen + hoff - i)) z of
= case memchr harr# i (hlen + hoff - i) z of
-1 -> []
x -> loop (i + cSsizeToInt x + 1)
x -> loop (i + x + 1)
{-# INLINE indices' #-}

scanOne :: Word8 -> Text -> [Int]
Expand All @@ -103,12 +101,3 @@ scanOne c (Text harr hoff hlen) = loop 0

word8ToInt :: Word8 -> Int
word8ToInt = fromIntegral

intToCSize :: Int -> CSize
intToCSize = fromIntegral

cSsizeToInt :: CSsize -> Int
cSsizeToInt = fromIntegral

foreign import ccall unsafe "_hs_text_memchr" memchr
:: ByteArray# -> CSize -> CSize -> Word8 -> CSsize
2 changes: 1 addition & 1 deletion text.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ flag pure-haskell
library
if arch(javascript) || flag(pure-haskell)
cpp-options: -DPURE_HASKELL
c-sources: cbits/utils.c
else
c-sources: cbits/is_ascii.c
cbits/measure_off.c
Expand Down Expand Up @@ -162,6 +161,7 @@ library
Data.Text.IO
Data.Text.IO.Utf8
Data.Text.Internal
Data.Text.Internal.ArrayUtils
Data.Text.Internal.Builder
Data.Text.Internal.Builder.Functions
Data.Text.Internal.Builder.Int.Digits
Expand Down
Loading