diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index fa33ff60..4f039dc8 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -75,7 +75,7 @@ jobs: - name: Install run: | apt-get update -y - apt-get install -y autoconf build-essential zlib1g-dev libgmp-dev curl libncurses5 libtinfo5 libncurses5-dev libtinfo-dev + apt-get install -y autoconf build-essential zlib1g-dev libgmp-dev curl libncurses5 libtinfo5 libncurses5-dev libtinfo-dev git curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 sh - uses: actions/checkout@v1 - name: Test diff --git a/Generate.hs b/Generate.hs index 1cfde169..9e81be01 100755 --- a/Generate.hs +++ b/Generate.hs @@ -22,6 +22,7 @@ main = do , "{-# LANGUAGE ViewPatterns #-}" #endif , "{-# LANGUAGE CPP #-}" + , "{-# LANGUAGE PackageImports #-}" , "{-# OPTIONS_GHC -Wno-name-shadowing #-}" , "{-# OPTIONS_GHC -Wno-orphans #-}" ,"module TestGen(tests) where" @@ -34,11 +35,11 @@ main = do ,"import GHC.IO.Encoding.Failure ( CodingFailureMode(..) )" ,"import GHC.IO.Encoding.UTF16 ( mkUTF16le )" ,"import GHC.IO.Encoding.UTF8 ( mkUTF8 )" - ,"import System.OsString.Internal.Types" - ,"import System.OsPath.Encoding.Internal" + ,"import \"os-string\" System.OsString.Internal.Types" + ,"import \"os-string\" System.OsString.Encoding.Internal" ,"import qualified Data.Char as C" - ,"import qualified System.OsPath.Data.ByteString.Short as SBS" - ,"import qualified System.OsPath.Data.ByteString.Short.Word16 as SBS16" + ,"import qualified \"os-string\" System.OsString.Data.ByteString.Short as SBS" + ,"import qualified \"os-string\" System.OsString.Data.ByteString.Short.Word16 as SBS16" ,"import qualified System.FilePath.Windows as W" ,"import qualified System.FilePath.Posix as P" #ifdef GHC_MAKE diff --git a/System/FilePath/Internal.hs b/System/FilePath/Internal.hs index df454328..eb367e94 100644 --- a/System/FilePath/Internal.hs +++ b/System/FilePath/Internal.hs @@ -138,8 +138,8 @@ import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) import GHC.IO.Encoding.UTF16 ( mkUTF16le ) import qualified GHC.Foreign as GHC import Data.Word ( Word16 ) -import System.OsPath.Data.ByteString.Short.Word16 -import System.OsPath.Data.ByteString.Short ( packCStringLen ) +import "os-string" System.OsString.Data.ByteString.Short.Word16 +import "os-string" System.OsString.Data.ByteString.Short ( packCStringLen ) #define CHAR Word16 #define STRING ShortByteString #define FILEPATH ShortByteString @@ -148,7 +148,7 @@ import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) import qualified GHC.Foreign as GHC import GHC.IO.Encoding.UTF8 ( mkUTF8 ) import Data.Word ( Word8 ) -import System.OsPath.Data.ByteString.Short +import "os-string" System.OsString.Data.ByteString.Short #define CHAR Word8 #define STRING ShortByteString #define FILEPATH ShortByteString diff --git a/System/OsPath/Common.hs b/System/OsPath/Common.hs index 0af0ed60..e3718563 100644 --- a/System/OsPath/Common.hs +++ b/System/OsPath/Common.hs @@ -1,4 +1,5 @@ {-# LANGUAGE TypeApplications #-} +{-# LANGUAGE PackageImports #-} -- This template expects CPP definitions for: -- -- WINDOWS defined? = no | yes | no @@ -109,7 +110,7 @@ where #ifdef WINDOWS import System.OsPath.Types -import System.OsString.Windows as PS +import "os-string" System.OsString.Windows as PS ( unsafeFromChar , toChar , decodeUtf @@ -141,7 +142,7 @@ import Language.Haskell.TH.Syntax import GHC.IO.Encoding.UTF8 ( mkUTF8 ) import System.OsPath.Types -import System.OsString.Posix as PS +import "os-string" System.OsString.Posix as PS ( unsafeFromChar , toChar , decodeUtf @@ -171,7 +172,7 @@ import System.OsPath.Internal as PS ) import System.OsPath.Types ( OsPath ) -import System.OsString ( unsafeFromChar, toChar ) +import "os-string" System.OsString ( unsafeFromChar, toChar ) #if defined(mingw32_HOST_OS) || defined(__MINGW32__) import qualified System.OsPath.Windows as C @@ -182,7 +183,7 @@ import qualified System.OsPath.Posix as C import Data.Bifunctor ( bimap ) #endif -import System.OsString.Internal.Types +import "os-string" System.OsString.Internal.Types ------------------------ diff --git a/System/OsPath/Data/ByteString/Short.hs b/System/OsPath/Data/ByteString/Short.hs index f3a666ff..3e833e0c 100644 --- a/System/OsPath/Data/ByteString/Short.hs +++ b/System/OsPath/Data/ByteString/Short.hs @@ -23,7 +23,7 @@ -- -- > import qualified Data.ByteString.Short as B.Short -- -module System.OsPath.Data.ByteString.Short ( +module System.OsPath.Data.ByteString.Short {-# DEPRECATED "Use System.OsString.Data.ByteString.Short from os-string package instead" #-} ( -- * The @ShortByteString@ type @@ -174,18 +174,5 @@ module System.OsPath.Data.ByteString.Short ( useAsCStringLen, ) where -import Data.ByteString.Short.Internal -import System.OsPath.Data.ByteString.Short.Internal - -import Prelude (Maybe(..), Ord(..), Num(..), ($), otherwise) -import Data.Word (Word8) - -uncons2 :: ShortByteString -> Maybe (Word8, Word8, ShortByteString) -uncons2 = \sbs -> - let l = length sbs - nl = l - 2 - in if | l <= 1 -> Nothing - | otherwise -> let h = indexWord8Array (asBA sbs) 0 - h' = indexWord8Array (asBA sbs) 1 - t = create nl $ \mba -> copyByteArray (asBA sbs) 1 mba 0 nl - in Just (h, h', t) +import System.OsString.Data.ByteString.Short + diff --git a/System/OsPath/Data/ByteString/Short/Internal.hs b/System/OsPath/Data/ByteString/Short/Internal.hs index 5003f998..f24fc8e0 100644 --- a/System/OsPath/Data/ByteString/Short/Internal.hs +++ b/System/OsPath/Data/ByteString/Short/Internal.hs @@ -1,481 +1,9 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE UnboxedTuples #-} -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE ViewPatterns #-} -{-# LANGUAGE UnliftedFFITypes #-} +{-# LANGUAGE PackageImports #-} --- | --- Module : System.OsPath.Data.ByteString.Short.Internal --- Copyright : © 2022 Julian Ospald --- License : MIT --- --- Maintainer : Julian Ospald --- Stability : experimental --- Portability : portable --- --- Internal low-level utilities mostly for 'System.OsPath.Data.ByteString.Short.Word16', --- such as byte-array operations and other stuff not meant to be exported from Word16 module. -module System.OsPath.Data.ByteString.Short.Internal where - -import Control.Monad.ST -import Control.Exception (assert, throwIO) -import Data.Bits (Bits(..)) -import Data.ByteString.Short.Internal (ShortByteString(..), length) -#if !MIN_VERSION_base(4,11,0) -import Data.Semigroup - ( Semigroup((<>)) ) -import Foreign.C.Types - ( CSize(..) - , CInt(..) - ) -import Data.ByteString.Internal - ( accursedUnutterablePerformIO +module System.OsPath.Data.ByteString.Short.Internal {-# DEPRECATED "Use System.OsString.Data.ByteString.Short.Internal from os-string package instead" #-} ( + module W ) -#endif -#if !MIN_VERSION_bytestring(0,10,9) -import Foreign.Marshal.Alloc (allocaBytes) -import Foreign.C.String ( CString, CStringLen ) -import Foreign.C.Types ( CSize(..) ) -import Foreign.Storable (pokeByteOff) -#endif -import Foreign.Marshal.Array (withArray0, peekArray0, newArray0, withArrayLen, peekArray) -import GHC.Exts -import GHC.Word -import GHC.ST - ( ST (ST) ) -import GHC.Stack ( HasCallStack ) -import Prelude hiding - ( length ) - -import qualified Data.ByteString.Short.Internal as BS -import qualified Data.Char as C -import qualified Data.List as List - - -_nul :: Word16 -_nul = 0x00 - -isSpace :: Word16 -> Bool -isSpace = C.isSpace . word16ToChar - --- | Total conversion to char. -word16ToChar :: Word16 -> Char -word16ToChar = C.chr . fromIntegral - -create :: Int -> (forall s. MBA s -> ST s ()) -> ShortByteString -create len fill = - runST $ do - mba <- newByteArray len - fill mba - BA# ba# <- unsafeFreezeByteArray mba - return (SBS ba#) -{-# INLINE create #-} - - -asBA :: ShortByteString -> BA -asBA (SBS ba#) = BA# ba# - - - -data BA = BA# ByteArray# -data MBA s = MBA# (MutableByteArray# s) - - -newPinnedByteArray :: Int -> ST s (MBA s) -newPinnedByteArray (I# len#) = - ST $ \s -> case newPinnedByteArray# len# s of - (# s', mba# #) -> (# s', MBA# mba# #) - -newByteArray :: Int -> ST s (MBA s) -newByteArray (I# len#) = - ST $ \s -> case newByteArray# len# s of - (# s', mba# #) -> (# s', MBA# mba# #) - -copyByteArray :: BA -> Int -> MBA s -> Int -> Int -> ST s () -copyByteArray (BA# src#) (I# src_off#) (MBA# dst#) (I# dst_off#) (I# len#) = - ST $ \s -> case copyByteArray# src# src_off# dst# dst_off# len# s of - s' -> (# s', () #) - -unsafeFreezeByteArray :: MBA s -> ST s BA -unsafeFreezeByteArray (MBA# mba#) = - ST $ \s -> case unsafeFreezeByteArray# mba# s of - (# s', ba# #) -> (# s', BA# ba# #) - -copyAddrToByteArray :: Ptr a -> MBA RealWorld -> Int -> Int -> ST RealWorld () -copyAddrToByteArray (Ptr src#) (MBA# dst#) (I# dst_off#) (I# len#) = - ST $ \s -> case copyAddrToByteArray# src# dst# dst_off# len# s of - s' -> (# s', () #) - - --- this is a copy-paste from bytestring -#if !MIN_VERSION_bytestring(0,10,9) ------------------------------------------------------------------------- --- Primop replacements - --- --------------------------------------------------------------------- --- --- Standard C functions --- - -foreign import ccall unsafe "string.h strlen" c_strlen - :: CString -> IO CSize - - --- --------------------------------------------------------------------- --- --- Uses our C code --- - --- | /O(n)./ Construct a new @ShortByteString@ from a @CString@. The --- resulting @ShortByteString@ is an immutable copy of the original --- @CString@, and is managed on the Haskell heap. The original --- @CString@ must be null terminated. --- --- @since 0.10.10.0 -packCString :: CString -> IO ShortByteString -packCString cstr = do - len <- c_strlen cstr - packCStringLen (cstr, fromIntegral len) - --- | /O(n)./ Construct a new @ShortByteString@ from a @CStringLen@. The --- resulting @ShortByteString@ is an immutable copy of the original @CStringLen@. --- The @ShortByteString@ is a normal Haskell value and will be managed on the --- Haskell heap. --- --- @since 0.10.10.0 -packCStringLen :: CStringLen -> IO ShortByteString -packCStringLen (cstr, len) | len >= 0 = BS.createFromPtr cstr len -packCStringLen (_, len) = - moduleErrorIO "packCStringLen" ("negative length: " ++ show len) - --- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a --- null-terminated @CString@. The @CString@ is a copy and will be freed --- automatically; it must not be stored or used after the --- subcomputation finishes. --- --- @since 0.10.10.0 -useAsCString :: ShortByteString -> (CString -> IO a) -> IO a -useAsCString bs action = - allocaBytes (l+1) $ \buf -> do - BS.copyToPtr bs 0 buf (fromIntegral l) - pokeByteOff buf l (0::Word8) - action buf - where l = length bs - --- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a 'CStringLen'. --- As for 'useAsCString' this function makes a copy of the original @ShortByteString@. --- It must not be stored or used after the subcomputation finishes. --- --- Beware that this function does not add a terminating @\NUL@ byte at the end of 'CStringLen'. --- If you need to construct a pointer to a null-terminated sequence, use 'useAsCString' --- (and measure length independently if desired). --- --- @since 0.10.10.0 -useAsCStringLen :: ShortByteString -> (CStringLen -> IO a) -> IO a -useAsCStringLen bs action = - allocaBytes l $ \buf -> do - BS.copyToPtr bs 0 buf (fromIntegral l) - action (buf, l) - where l = length bs - - -#endif - - --- | /O(n)./ Construct a new @ShortByteString@ from a @CWString@. The --- resulting @ShortByteString@ is an immutable copy of the original --- @CWString@, and is managed on the Haskell heap. The original --- @CWString@ must be null terminated. --- --- @since 0.10.10.0 -packCWString :: Ptr Word16 -> IO ShortByteString -packCWString cwstr = do - cs <- peekArray0 _nul cwstr - return (packWord16 cs) - --- | /O(n)./ Construct a new @ShortByteString@ from a @CWStringLen@. The --- resulting @ShortByteString@ is an immutable copy of the original @CWStringLen@. --- The @ShortByteString@ is a normal Haskell value and will be managed on the --- Haskell heap. --- --- @since 0.10.10.0 -packCWStringLen :: (Ptr Word16, Int) -> IO ShortByteString -packCWStringLen (cp, len) = do - cs <- peekArray len cp - return (packWord16 cs) - - --- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a --- null-terminated @CWString@. The @CWString@ is a copy and will be freed --- automatically; it must not be stored or used after the --- subcomputation finishes. --- --- @since 0.10.10.0 -useAsCWString :: ShortByteString -> (Ptr Word16 -> IO a) -> IO a -useAsCWString = withArray0 _nul . unpackWord16 - --- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a @CWStringLen@. --- As for @useAsCWString@ this function makes a copy of the original @ShortByteString@. --- It must not be stored or used after the subcomputation finishes. --- --- @since 0.10.10.0 -useAsCWStringLen :: ShortByteString -> ((Ptr Word16, Int) -> IO a) -> IO a -useAsCWStringLen bs action = withArrayLen (unpackWord16 bs) $ \ len ptr -> action (ptr, len) - --- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a @CWStringLen@. --- As for @useAsCWString@ this function makes a copy of the original @ShortByteString@. --- It must not be stored or used after the subcomputation finishes. --- --- @since 0.10.10.0 -newCWString :: ShortByteString -> IO (Ptr Word16) -newCWString = newArray0 _nul . unpackWord16 - - - - - -- --------------------------------------------------------------------- --- Internal utilities - -moduleErrorIO :: String -> String -> IO a -moduleErrorIO fun msg = throwIO . userError $ moduleErrorMsg fun msg -{-# NOINLINE moduleErrorIO #-} - -moduleErrorMsg :: String -> String -> String -moduleErrorMsg fun msg = "System.OsPath.Data.ByteString.Short." ++ fun ++ ':':' ':msg - -packWord16 :: [Word16] -> ShortByteString -packWord16 cs = packLenWord16 (List.length cs) cs - -packLenWord16 :: Int -> [Word16] -> ShortByteString -packLenWord16 len ws0 = - create (len * 2) (\mba -> go mba 0 ws0) - where - go :: MBA s -> Int -> [Word16] -> ST s () - go !_ !_ [] = return () - go !mba !i (w:ws) = do - writeWord16Array mba i w - go mba (i+2) ws - - -unpackWord16 :: ShortByteString -> [Word16] -unpackWord16 sbs = go len [] - where - len = length sbs - go !i !acc - | i < 1 = acc - | otherwise = let !w = indexWord16Array (asBA sbs) (i - 2) - in go (i - 2) (w:acc) - -packWord16Rev :: [Word16] -> ShortByteString -packWord16Rev cs = packLenWord16Rev (List.length cs * 2) cs - -packLenWord16Rev :: Int -> [Word16] -> ShortByteString -packLenWord16Rev len ws0 = - create len (\mba -> go mba len ws0) - where - go :: MBA s -> Int -> [Word16] -> ST s () - go !_ !_ [] = return () - go !mba !i (w:ws) = do - writeWord16Array mba (i - 2) w - go mba (i - 2) ws - - --- | This isn't strictly Word16 array write. Instead it's two consecutive Word8 array --- writes to avoid endianness issues due to primops doing automatic alignment based --- on host platform. We want to always write LE to the byte array. -writeWord16Array :: MBA s - -> Int -- ^ Word8 index (not Word16) - -> Word16 - -> ST s () -writeWord16Array (MBA# mba#) (I# i#) (W16# w#) = - case encodeWord16LE# w# of - (# lsb#, msb# #) -> - ST (\s -> case writeWord8Array# mba# i# lsb# s of - s' -> (# s', () #)) >> - ST (\s -> case writeWord8Array# mba# (i# +# 1#) msb# s of - s' -> (# s', () #)) - -indexWord8Array :: BA - -> Int -- ^ Word8 index - -> Word8 -indexWord8Array (BA# ba#) (I# i#) = W8# (indexWord8Array# ba# i#) - --- | This isn't strictly Word16 array read. Instead it's two Word8 array reads --- to avoid endianness issues due to primops doing automatic alignment based --- on host platform. We expect the byte array to be LE always. -indexWord16Array :: BA - -> Int -- ^ Word8 index (not Word16) - -> Word16 -indexWord16Array ba i = fromIntegral lsb .|. (fromIntegral msb `shiftL` 8) - where - lsb = indexWord8Array ba i - msb = indexWord8Array ba (i + 1) - -#if !MIN_VERSION_base(4,16,0) - -encodeWord16LE# :: Word# -- ^ Word16 - -> (# Word#, Word# #) -- ^ Word8 (LSB, MSB) -encodeWord16LE# x# = (# x# `and#` int2Word# 0xff# - , x# `and#` int2Word# 0xff00# `shiftRL#` 8# #) - -decodeWord16LE# :: (# Word#, Word# #) -- ^ Word8 (LSB, MSB) - -> Word# -- ^ Word16 -decodeWord16LE# (# lsb#, msb# #) = msb# `shiftL#` 8# `or#` lsb# - -#else - -encodeWord16LE# :: Word16# -- ^ Word16 - -> (# Word8#, Word8# #) -- ^ Word8 (LSB, MSB) -encodeWord16LE# x# = (# word16ToWord8# x# - , word16ToWord8# (x# `uncheckedShiftRLWord16#` 8#) #) - where - word16ToWord8# y = wordToWord8# (word16ToWord# y) - -decodeWord16LE# :: (# Word8#, Word8# #) -- ^ Word8 (LSB, MSB) - -> Word16# -- ^ Word16 -decodeWord16LE# (# lsb#, msb# #) = ((word8ToWord16# msb# `uncheckedShiftLWord16#` 8#) `orWord16#` word8ToWord16# lsb#) - where - word8ToWord16# y = wordToWord16# (word8ToWord# y) - -#endif - -setByteArray :: MBA s -> Int -> Int -> Int -> ST s () -setByteArray (MBA# dst#) (I# off#) (I# len#) (I# c#) = - ST $ \s -> case setByteArray# dst# off# len# c# s of - s' -> (# s', () #) - -copyMutableByteArray :: MBA s -> Int -> MBA s -> Int -> Int -> ST s () -copyMutableByteArray (MBA# src#) (I# src_off#) (MBA# dst#) (I# dst_off#) (I# len#) = - ST $ \s -> case copyMutableByteArray# src# src_off# dst# dst_off# len# s of - s' -> (# s', () #) - --- | Given the maximum size needed and a function to make the contents --- of a ShortByteString, createAndTrim makes the 'ShortByteString'. --- The generating function is required to return the actual final size --- (<= the maximum size) and the result value. The resulting byte array --- is realloced to this size. -createAndTrim :: Int -> (forall s. MBA s -> ST s (Int, a)) -> (ShortByteString, a) -createAndTrim l fill = - runST $ do - mba <- newByteArray l - (l', res) <- fill mba - if assert (l' <= l) $ l' >= l - then do - BA# ba# <- unsafeFreezeByteArray mba - return (SBS ba#, res) - else do - mba2 <- newByteArray l' - copyMutableByteArray mba 0 mba2 0 l' - BA# ba# <- unsafeFreezeByteArray mba2 - return (SBS ba#, res) -{-# INLINE createAndTrim #-} - -createAndTrim' :: Int -> (forall s. MBA s -> ST s Int) -> ShortByteString -createAndTrim' l fill = - runST $ do - mba <- newByteArray l - l' <- fill mba - if assert (l' <= l) $ l' >= l - then do - BA# ba# <- unsafeFreezeByteArray mba - return (SBS ba#) - else do - mba2 <- newByteArray l' - copyMutableByteArray mba 0 mba2 0 l' - BA# ba# <- unsafeFreezeByteArray mba2 - return (SBS ba#) -{-# INLINE createAndTrim' #-} - -createAndTrim'' :: Int -> (forall s. MBA s -> MBA s -> ST s (Int, Int)) -> (ShortByteString, ShortByteString) -createAndTrim'' l fill = - runST $ do - mba1 <- newByteArray l - mba2 <- newByteArray l - (l1, l2) <- fill mba1 mba2 - sbs1 <- freeze' l1 mba1 - sbs2 <- freeze' l2 mba2 - pure (sbs1, sbs2) - where - freeze' :: Int -> MBA s -> ST s ShortByteString - freeze' l' mba = - if assert (l' <= l) $ l' >= l - then do - BA# ba# <- unsafeFreezeByteArray mba - return (SBS ba#) - else do - mba2 <- newByteArray l' - copyMutableByteArray mba 0 mba2 0 l' - BA# ba# <- unsafeFreezeByteArray mba2 - return (SBS ba#) -{-# INLINE createAndTrim'' #-} - --- Returns the index of the first match or the length of the whole --- bytestring if nothing matched. -findIndexOrLength :: (Word16 -> Bool) -> ShortByteString -> Int -findIndexOrLength k (assertEven -> sbs) = go 0 - where - l = BS.length sbs - ba = asBA sbs - w = indexWord16Array ba - go !n | n >= l = l `div` 2 - | k (w n) = n `div` 2 - | otherwise = go (n + 2) -{-# INLINE findIndexOrLength #-} - - --- | Returns the length of the substring matching, not the index. --- If no match, returns 0. -findFromEndUntil :: (Word16 -> Bool) -> ShortByteString -> Int -findFromEndUntil k sbs = go (BS.length sbs - 2) - where - ba = asBA sbs - w = indexWord16Array ba - go !n | n < 0 = 0 - | k (w n) = (n `div` 2) + 1 - | otherwise = go (n - 2) -{-# INLINE findFromEndUntil #-} - - -assertEven :: ShortByteString -> ShortByteString -assertEven sbs@(SBS barr#) - | even (I# (sizeofByteArray# barr#)) = sbs - | otherwise = error ("Uneven number of bytes: " <> show (BS.length sbs) <> ". This is not a Word16 bytestream.") - - --- Common up near identical calls to `error' to reduce the number --- constant strings created when compiled: -errorEmptySBS :: HasCallStack => String -> a -errorEmptySBS fun = moduleError fun "empty ShortByteString" -{-# NOINLINE errorEmptySBS #-} - -moduleError :: HasCallStack => String -> String -> a -moduleError fun msg = error (moduleErrorMsg fun msg) -{-# NOINLINE moduleError #-} - -compareByteArraysOff :: BA -- ^ array 1 - -> Int -- ^ offset for array 1 - -> BA -- ^ array 2 - -> Int -- ^ offset for array 2 - -> Int -- ^ length to compare - -> Int -- ^ like memcmp -#if MIN_VERSION_base(4,11,0) -compareByteArraysOff (BA# ba1#) (I# ba1off#) (BA# ba2#) (I# ba2off#) (I# len#) = - I# (compareByteArrays# ba1# ba1off# ba2# ba2off# len#) -#else -compareByteArraysOff (BA# ba1#) ba1off (BA# ba2#) ba2off len = - assert (ba1off + len <= (I# (sizeofByteArray# ba1#))) - $ assert (ba2off + len <= (I# (sizeofByteArray# ba2#))) - $ fromIntegral $ accursedUnutterablePerformIO $ - c_memcmp_ByteArray ba1# - ba1off - ba2# - ba2off - (fromIntegral len) - +where -foreign import ccall unsafe "static sbs_memcmp_off" - c_memcmp_ByteArray :: ByteArray# -> Int -> ByteArray# -> Int -> CSize -> IO CInt -#endif +import "os-string" System.OsString.Data.ByteString.Short.Word16 as W diff --git a/System/OsPath/Data/ByteString/Short/Word16.hs b/System/OsPath/Data/ByteString/Short/Word16.hs index 6ad8134b..1aa1386c 100644 --- a/System/OsPath/Data/ByteString/Short/Word16.hs +++ b/System/OsPath/Data/ByteString/Short/Word16.hs @@ -29,7 +29,7 @@ -- -- All functions will error out if the input string is not a valid UTF16 stream (uneven number of bytes). -- So use this module with caution. -module System.OsPath.Data.ByteString.Short.Word16 ( +module System.OsPath.Data.ByteString.Short.Word16 {-# DEPRECATED "Use System.OsString.Data.ByteString.Short.Word16 from os-string package instead" #-} ( -- * The @ShortByteString@ type and representation ShortByteString(..), @@ -139,757 +139,13 @@ module System.OsPath.Data.ByteString.Short.Word16 ( packCWString, packCWStringLen, newCWString, - + -- ** Using ShortByteStrings as 'CString's useAsCWString, useAsCWStringLen ) where -import System.OsPath.Data.ByteString.Short ( append, intercalate, concat, stripSuffix, stripPrefix, isPrefixOf, isSuffixOf, length, empty, null, ShortByteString(..), fromShort, toShort ) -import System.OsPath.Data.ByteString.Short.Internal -import Data.Bits - ( shiftR - ) -import Data.Word -import Prelude hiding - ( Foldable(..) - , all - , any - , reverse - , break - , concat - , drop - , dropWhile - , filter - , head - , init - , last - , map - , replicate - , span - , splitAt - , tail - , take - , takeWhile - ) -import qualified Data.Foldable as Foldable -import GHC.ST ( ST ) -import GHC.Stack ( HasCallStack ) -import GHC.Exts ( inline ) - -import qualified Data.ByteString.Short.Internal as BS -import qualified Data.List as List - - --- ----------------------------------------------------------------------------- --- Introducing and eliminating 'ShortByteString's - --- | /O(1)/ Convert a 'Word16' into a 'ShortByteString' -singleton :: Word16 -> ShortByteString -singleton = \w -> create 2 (\mba -> writeWord16Array mba 0 w) - - --- | /O(n)/. Convert a list into a 'ShortByteString' -pack :: [Word16] -> ShortByteString -pack = packWord16 - - --- | /O(n)/. Convert a 'ShortByteString' into a list. -unpack :: ShortByteString -> [Word16] -unpack = unpackWord16 . assertEven - - --- --------------------------------------------------------------------- --- Basic interface - --- | This is like 'length', but the number of 'Word16', not 'Word8'. -numWord16 :: ShortByteString -> Int -numWord16 = (`shiftR` 1) . BS.length . assertEven - -infixr 5 `cons` --same as list (:) -infixl 5 `snoc` - --- | /O(n)/ Append a Word16 to the end of a 'ShortByteString' --- --- Note: copies the entire byte array -snoc :: ShortByteString -> Word16 -> ShortByteString -snoc = \(assertEven -> sbs) c -> let l = BS.length sbs - nl = l + 2 - in create nl $ \mba -> do - copyByteArray (asBA sbs) 0 mba 0 l - writeWord16Array mba l c - --- | /O(n)/ 'cons' is analogous to (:) for lists. --- --- Note: copies the entire byte array -cons :: Word16 -> ShortByteString -> ShortByteString -cons c = \(assertEven -> sbs) -> let l = BS.length sbs - nl = l + 2 - in create nl $ \mba -> do - writeWord16Array mba 0 c - copyByteArray (asBA sbs) 0 mba 2 l - --- | /O(1)/ Extract the last element of a ShortByteString, which must be finite and at least one Word16. --- An exception will be thrown in the case of an empty ShortByteString. -last :: HasCallStack => ShortByteString -> Word16 -last = \(assertEven -> sbs) -> case null sbs of - True -> errorEmptySBS "last" - False -> indexWord16Array (asBA sbs) (BS.length sbs - 2) - --- | /O(n)/ Extract the elements after the head of a ShortByteString, which must at least one Word16. --- An exception will be thrown in the case of an empty ShortByteString. --- --- Note: copies the entire byte array -tail :: HasCallStack => ShortByteString -> ShortByteString -tail = \(assertEven -> sbs) -> - let l = BS.length sbs - nl = l - 2 - in if - | l <= 0 -> errorEmptySBS "tail" - | otherwise -> create nl $ \mba -> copyByteArray (asBA sbs) 2 mba 0 nl - --- | /O(n)/ Extract the head and tail of a ByteString, returning Nothing --- if it is empty. -uncons :: ShortByteString -> Maybe (Word16, ShortByteString) -uncons = \(assertEven -> sbs) -> - let l = BS.length sbs - nl = l - 2 - in if | l <= 0 -> Nothing - | otherwise -> let h = indexWord16Array (asBA sbs) 0 - t = create nl $ \mba -> copyByteArray (asBA sbs) 2 mba 0 nl - in Just (h, t) - --- | /O(n)/ Extract first two elements and the rest of a ByteString, --- returning Nothing if it is shorter than two elements. -uncons2 :: ShortByteString -> Maybe (Word16, Word16, ShortByteString) -uncons2 = \(assertEven -> sbs) -> - let l = BS.length sbs - nl = l - 4 - in if | l <= 2 -> Nothing - | otherwise -> let h = indexWord16Array (asBA sbs) 0 - h' = indexWord16Array (asBA sbs) 2 - t = create nl $ \mba -> copyByteArray (asBA sbs) 4 mba 0 nl - in Just (h, h', t) - --- | /O(1)/ Extract the first element of a ShortByteString, which must be at least one Word16. --- An exception will be thrown in the case of an empty ShortByteString. -head :: HasCallStack => ShortByteString -> Word16 -head = \(assertEven -> sbs) -> case null sbs of - True -> errorEmptySBS "last" - False -> indexWord16Array (asBA sbs) 0 - --- | /O(n)/ Return all the elements of a 'ShortByteString' except the last one. --- An exception will be thrown in the case of an empty ShortByteString. --- --- Note: copies the entire byte array -init :: HasCallStack => ShortByteString -> ShortByteString -init = \(assertEven -> sbs) -> - let l = BS.length sbs - nl = l - 2 - in if - | l <= 0 -> errorEmptySBS "tail" - | otherwise -> create nl $ \mba -> copyByteArray (asBA sbs) 0 mba 0 nl - --- | /O(n)/ Extract the 'init' and 'last' of a ByteString, returning Nothing --- if it is empty. -unsnoc :: ShortByteString -> Maybe (ShortByteString, Word16) -unsnoc = \(assertEven -> sbs) -> - let l = BS.length sbs - nl = l - 2 - in if | l <= 0 -> Nothing - | otherwise -> let l' = indexWord16Array (asBA sbs) (l - 2) - i = create nl $ \mba -> copyByteArray (asBA sbs) 0 mba 0 nl - in Just (i, l') - - --- --------------------------------------------------------------------- --- Transformations - --- | /O(n)/ 'map' @f xs@ is the ShortByteString obtained by applying @f@ to each --- element of @xs@. -map :: (Word16 -> Word16) -> ShortByteString -> ShortByteString -map f = \(assertEven -> sbs) -> - let l = BS.length sbs - ba = asBA sbs - in create l (\mba -> go ba mba 0 l) - where - go :: BA -> MBA s -> Int -> Int -> ST s () - go !ba !mba !i !l - | i >= l = return () - | otherwise = do - let w = indexWord16Array ba i - writeWord16Array mba i (f w) - go ba mba (i+2) l - --- TODO: implement more efficiently --- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. -reverse :: ShortByteString -> ShortByteString -reverse = \(assertEven -> sbs) -> - let l = BS.length sbs - ba = asBA sbs - in create l (\mba -> go ba mba 0 l) - where - go :: BA -> MBA s -> Int -> Int -> ST s () - go !ba !mba !i !l - | i >= l = return () - | otherwise = do - let w = indexWord16Array ba i - writeWord16Array mba (l - 2 - i) w - go ba mba (i+2) l - - --- --------------------------------------------------------------------- --- Special folds - --- | /O(n)/ Applied to a predicate and a 'ShortByteString', 'all' determines --- if all elements of the 'ShortByteString' satisfy the predicate. -all :: (Word16 -> Bool) -> ShortByteString -> Bool -all k = \(assertEven -> sbs) -> - let l = BS.length sbs - ba = asBA sbs - w = indexWord16Array ba - go !n | n >= l = True - | otherwise = k (w n) && go (n + 2) - in go 0 - - --- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if --- any element of the 'ByteString' satisfies the predicate. -any :: (Word16 -> Bool) -> ShortByteString -> Bool -any k = \(assertEven -> sbs) -> - let l = BS.length sbs - ba = asBA sbs - w = indexWord16Array ba - go !n | n >= l = False - | otherwise = k (w n) || go (n + 2) - in go 0 - - --- --------------------------------------------------------------------- --- Unfolds and replicates - - --- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@ --- the value of every element. The following holds: --- --- > replicate w c = unfoldr w (\u -> Just (u,u)) c -replicate :: Int -> Word16 -> ShortByteString -replicate w c - | w <= 0 = empty - -- can't use setByteArray here, because we write UTF-16LE - | otherwise = create (w * 2) (`go` 0) - where - go mba ix - | ix < 0 || ix >= w * 2 = pure () - | otherwise = writeWord16Array mba ix c >> go mba (ix + 2) - --- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' --- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a --- ShortByteString from a seed value. The function takes the element and --- returns 'Nothing' if it is done producing the ShortByteString or returns --- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string, --- and @b@ is the seed value for further production. --- --- This function is not efficient/safe. It will build a list of @[Word16]@ --- and run the generator until it returns `Nothing`, otherwise recurse infinitely, --- then finally create a 'ShortByteString'. --- --- Examples: --- --- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 --- > == pack [0, 1, 2, 3, 4, 5] --- -unfoldr :: (a -> Maybe (Word16, a)) -> a -> ShortByteString -unfoldr f x0 = packWord16Rev $ go x0 mempty - where - go x words' = case f x of - Nothing -> words' - Just (w, x') -> go x' (w:words') - --- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ShortByteString from a seed --- value. However, the length of the result is limited by the first --- argument to 'unfoldrN'. This function is more efficient than 'unfoldr' --- when the maximum length of the result is known. --- --- The following equation relates 'unfoldrN' and 'unfoldr': --- --- > fst (unfoldrN n f s) == take n (unfoldr f s) --- -unfoldrN :: forall a. - Int -- ^ number of 'Word16' - -> (a -> Maybe (Word16, a)) - -> a - -> (ShortByteString, Maybe a) -unfoldrN i f = \x0 -> - if | i < 0 -> (empty, Just x0) - | otherwise -> createAndTrim (i * 2) $ \mba -> go mba x0 0 - - where - go :: forall s. MBA s -> a -> Int -> ST s (Int, Maybe a) - go !mba !x !n = go' x n - where - go' :: a -> Int -> ST s (Int, Maybe a) - go' !x' !n' - | n' == i * 2 = return (n', Just x') - | otherwise = case f x' of - Nothing -> return (n', Nothing) - Just (w, x'') -> do - writeWord16Array mba n' w - go' x'' (n'+2) - - --- -------------------------------------------------------------------- --- Predicates - - - --- --------------------------------------------------------------------- --- Substrings - --- | /O(n)/ 'take' @n@, applied to a ShortByteString @xs@, returns the prefix --- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. --- --- Note: copies the entire byte array -take :: Int -- ^ number of Word16 - -> ShortByteString - -> ShortByteString -take = \n (assertEven -> sbs) -> - let sl = numWord16 sbs - len8 = n * 2 - in if | n >= sl -> sbs - | n <= 0 -> empty - | otherwise -> - create len8 $ \mba -> copyByteArray (asBA sbs) 0 mba 0 len8 - - --- | /O(1)/ @'takeEnd' n xs@ is equivalent to @'drop' ('length' xs - n) xs@. --- Takes @n@ elements from end of bytestring. --- --- >>> takeEnd 3 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" --- "e\NULf\NULg\NUL" --- >>> takeEnd 0 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" --- "" --- >>> takeEnd 4 "a\NULb\NULc\NUL" --- "a\NULb\NULc\NUL" -takeEnd :: Int -- ^ number of 'Word16' - -> ShortByteString - -> ShortByteString -takeEnd n = \(assertEven -> sbs) -> - let sl = BS.length sbs - n2 = n * 2 - in if | n2 >= sl -> sbs - | n2 <= 0 -> empty - | otherwise -> create n2 $ \mba -> copyByteArray (asBA sbs) (max 0 (sl - n2)) mba 0 n2 - --- | Similar to 'P.takeWhile', --- returns the longest (possibly empty) prefix of elements --- satisfying the predicate. -takeWhile :: (Word16 -> Bool) -> ShortByteString -> ShortByteString -takeWhile f ps = take (findIndexOrLength (not . f) ps) ps - --- | Returns the longest (possibly empty) suffix of elements --- satisfying the predicate. --- --- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@. -takeWhileEnd :: (Word16 -> Bool) -> ShortByteString -> ShortByteString -takeWhileEnd f ps = drop (findFromEndUntil (not . f) ps) ps - - --- | /O(n)/ 'drop' @n@ @xs@ returns the suffix of @xs@ after the first n elements, or @[]@ if @n > 'length' xs@. --- --- Note: copies the entire byte array -drop :: Int -- ^ number of 'Word16' - -> ShortByteString - -> ShortByteString -drop = \n' (assertEven -> sbs) -> - let len = BS.length sbs - n = n' * 2 - in if | n <= 0 -> sbs - | n >= len -> empty - | otherwise -> - let newLen = len - n - in create newLen $ \mba -> copyByteArray (asBA sbs) n mba 0 newLen - --- | /O(1)/ @'dropEnd' n xs@ is equivalent to @'take' ('length' xs - n) xs@. --- Drops @n@ elements from end of bytestring. --- --- >>> dropEnd 3 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" --- "a\NULb\NULc\NULd\NUL" --- >>> dropEnd 0 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" --- "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" --- >>> dropEnd 4 "a\NULb\NULc\NUL" --- "" -dropEnd :: Int -- ^ number of 'Word16' - -> ShortByteString - -> ShortByteString -dropEnd n' = \(assertEven -> sbs) -> - let sl = BS.length sbs - nl = sl - n - n = n' * 2 - in if | n >= sl -> empty - | n <= 0 -> sbs - | otherwise -> create nl $ \mba -> copyByteArray (asBA sbs) 0 mba 0 nl - --- | Similar to 'P.dropWhile', --- drops the longest (possibly empty) prefix of elements --- satisfying the predicate and returns the remainder. --- --- Note: copies the entire byte array -dropWhile :: (Word16 -> Bool) -> ShortByteString -> ShortByteString -dropWhile f = \(assertEven -> ps) -> drop (findIndexOrLength (not . f) ps) ps - --- | Similar to 'P.dropWhileEnd', --- drops the longest (possibly empty) suffix of elements --- satisfying the predicate and returns the remainder. --- --- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@. --- --- @since 0.10.12.0 -dropWhileEnd :: (Word16 -> Bool) -> ShortByteString -> ShortByteString -dropWhileEnd f = \(assertEven -> ps) -> take (findFromEndUntil (not . f) ps) ps - --- | Returns the longest (possibly empty) suffix of elements which __do not__ --- satisfy the predicate and the remainder of the string. --- --- 'breakEnd' @p@ is equivalent to @'spanEnd' (not . p)@ and to @('takeWhileEnd' (not . p) &&& 'dropWhileEnd' (not . p))@. -breakEnd :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) -breakEnd p = \(assertEven -> sbs) -> splitAt (findFromEndUntil p sbs) sbs - --- | Similar to 'P.break', --- returns the longest (possibly empty) prefix of elements which __do not__ --- satisfy the predicate and the remainder of the string. --- --- 'break' @p@ is equivalent to @'span' (not . p)@ and to @('takeWhile' (not . p) &&& 'dropWhile' (not . p))@. -break :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) -break = \p (assertEven -> ps) -> case findIndexOrLength p ps of n -> splitAt n ps - --- | Similar to 'P.span', --- returns the longest (possibly empty) prefix of elements --- satisfying the predicate and the remainder of the string. --- --- 'span' @p@ is equivalent to @'break' (not . p)@ and to @('takeWhile' p &&& 'dropWhile' p)@. --- -span :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) -{- HLINT ignore "Use span" -} -span p = break (not . p) . assertEven - --- | Returns the longest (possibly empty) suffix of elements --- satisfying the predicate and the remainder of the string. --- --- 'spanEnd' @p@ is equivalent to @'breakEnd' (not . p)@ and to @('takeWhileEnd' p &&& 'dropWhileEnd' p)@. --- --- We have --- --- > spanEnd (not . isSpace) "x y z" == ("x y ", "z") --- --- and --- --- > spanEnd (not . isSpace) ps --- > == --- > let (x, y) = span (not . isSpace) (reverse ps) in (reverse y, reverse x) --- -spanEnd :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) -spanEnd p = \(assertEven -> ps) -> splitAt (findFromEndUntil (not.p) ps) ps - --- | /O(n)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@. --- --- Note: copies the substrings -splitAt :: Int -- ^ number of Word16 - -> ShortByteString - -> (ShortByteString, ShortByteString) -splitAt n' = \(assertEven -> sbs) -> if - | n <= 0 -> (empty, sbs) - | otherwise -> - let slen = BS.length sbs - in if | n >= BS.length sbs -> (sbs, empty) - | otherwise -> - let llen = min slen (max 0 n) - rlen = max 0 (slen - max 0 n) - lsbs = create llen $ \mba -> copyByteArray (asBA sbs) 0 mba 0 llen - rsbs = create rlen $ \mba -> copyByteArray (asBA sbs) n mba 0 rlen - in (lsbs, rsbs) - where - n = n' * 2 - --- | /O(n)/ Break a 'ShortByteString' into pieces separated by the byte --- argument, consuming the delimiter. I.e. --- --- > split 10 "a\nb\nd\ne" == ["a","b","d","e"] -- fromEnum '\n' == 10 --- > split 97 "aXaXaXa" == ["","X","X","X",""] -- fromEnum 'a' == 97 --- > split 120 "x" == ["",""] -- fromEnum 'x' == 120 --- > split undefined "" == [] -- and not [""] --- --- and --- --- > intercalate [c] . split c == id --- > split == splitWith . (==) --- --- Note: copies the substrings -split :: Word16 -> ShortByteString -> [ShortByteString] -split w = splitWith (== w) . assertEven - - --- | /O(n)/ Splits a 'ShortByteString' into components delimited by --- separators, where the predicate returns True for a separator element. --- The resulting components do not contain the separators. Two adjacent --- separators result in an empty component in the output. eg. --- --- > splitWith (==97) "aabbaca" == ["","","bb","c",""] -- fromEnum 'a' == 97 --- > splitWith undefined "" == [] -- and not [""] --- -splitWith :: (Word16 -> Bool) -> ShortByteString -> [ShortByteString] -splitWith p = \(assertEven -> sbs) -> if - | BS.null sbs -> [] - | otherwise -> go sbs - where - go sbs' - | BS.null sbs' = [mempty] - | otherwise = - case break p sbs' of - (a, b) - | BS.null b -> [a] - | otherwise -> a : go (tail b) - - --- | Check whether one string is a substring of another. -isInfixOf :: ShortByteString -> ShortByteString -> Bool -isInfixOf sbs = \s -> null sbs || not (null $ snd $ GHC.Exts.inline breakSubstring sbs s) - - --- algorithm: https://github.com/haskell/filepath/issues/195#issuecomment-1605633713 -breakSubstring :: ShortByteString -- ^ String to search for - -> ShortByteString -- ^ String to search in - -> (ShortByteString, ShortByteString) -- ^ Head and tail of string broken at substring -breakSubstring bPat@(asBA -> pat) bInp@(asBA -> inp) = go 0 - where - lpat = BS.length bPat - linp = BS.length bInp - go ix - | let ix' = ix * 2 - , linp >= ix' + lpat = - if | compareByteArraysOff pat 0 inp ix' lpat == 0 -> splitAt ix bInp - | otherwise -> go (ix + 1) - | otherwise - = (bInp, mempty) - - --- --------------------------------------------------------------------- --- Reducing 'ByteString's - --- | 'foldl', applied to a binary operator, a starting value (typically --- the left-identity of the operator), and a ShortByteString, reduces the --- ShortByteString using the binary operator, from left to right. --- -foldl :: (a -> Word16 -> a) -> a -> ShortByteString -> a -foldl f v = List.foldl f v . unpack . assertEven - --- | 'foldl'' is like 'foldl', but strict in the accumulator. --- -foldl' :: (a -> Word16 -> a) -> a -> ShortByteString -> a -foldl' f v = List.foldl' f v . unpack . assertEven - --- | 'foldr', applied to a binary operator, a starting value --- (typically the right-identity of the operator), and a ShortByteString, --- reduces the ShortByteString using the binary operator, from right to left. -foldr :: (Word16 -> a -> a) -> a -> ShortByteString -> a -foldr f v = List.foldr f v . unpack . assertEven - --- | 'foldr'' is like 'foldr', but strict in the accumulator. -foldr' :: (Word16 -> a -> a) -> a -> ShortByteString -> a -foldr' k v = Foldable.foldr' k v . unpack . assertEven - --- | 'foldl1' is a variant of 'foldl' that has no starting value --- argument, and thus must be applied to non-empty 'ShortByteString's. --- An exception will be thrown in the case of an empty ShortByteString. -foldl1 :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 -foldl1 k = List.foldl1 k . unpack . assertEven - --- | 'foldl1'' is like 'foldl1', but strict in the accumulator. --- An exception will be thrown in the case of an empty ShortByteString. -foldl1' :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 -foldl1' k = List.foldl1' k . unpack . assertEven - --- | 'foldr1' is a variant of 'foldr' that has no starting value argument, --- and thus must be applied to non-empty 'ShortByteString's --- An exception will be thrown in the case of an empty ShortByteString. -foldr1 :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 -foldr1 k = List.foldr1 k . unpack . assertEven - --- | 'foldr1'' is a variant of 'foldr1', but is strict in the --- accumulator. -foldr1' :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 -foldr1' k = \(assertEven -> sbs) -> if null sbs then errorEmptySBS "foldr1'" else foldr' k (last sbs) (init sbs) - - --- -------------------------------------------------------------------- --- Searching ShortByteString - --- | /O(1)/ 'ShortByteString' index (subscript) operator, starting from 0. -index :: HasCallStack - => ShortByteString - -> Int -- ^ number of 'Word16' - -> Word16 -index = \(assertEven -> sbs) i -> if - | i >= 0 && i < numWord16 sbs -> unsafeIndex sbs i - | otherwise -> indexError sbs i - --- | /O(1)/ 'ShortByteString' index, starting from 0, that returns 'Just' if: --- --- > 0 <= n < length bs --- --- @since 0.11.0.0 -indexMaybe :: ShortByteString - -> Int -- ^ number of 'Word16' - -> Maybe Word16 -indexMaybe = \(assertEven -> sbs) i -> if - | i >= 0 && i < numWord16 sbs -> Just $! unsafeIndex sbs i - | otherwise -> Nothing -{-# INLINE indexMaybe #-} - -unsafeIndex :: ShortByteString - -> Int -- ^ number of 'Word16' - -> Word16 -unsafeIndex sbs i = indexWord16Array (asBA sbs) (i * 2) - -indexError :: HasCallStack => ShortByteString -> Int -> a -indexError sbs i = - moduleError "index" $ "error in array index: " ++ show i - ++ " not in range [0.." ++ show (numWord16 sbs) ++ "]" - --- | /O(1)/ 'ShortByteString' index, starting from 0, that returns 'Just' if: --- --- > 0 <= n < length bs --- --- @since 0.11.0.0 -(!?) :: ShortByteString - -> Int -- ^ number of 'Word16' - -> Maybe Word16 -(!?) = indexMaybe -{-# INLINE (!?) #-} - --- | /O(n)/ 'elem' is the 'ShortByteString' membership predicate. -elem :: Word16 -> ShortByteString -> Bool -elem c = \(assertEven -> sbs) -> case elemIndex c sbs of Nothing -> False ; _ -> True - --- | /O(n)/ 'filter', applied to a predicate and a ByteString, --- returns a ByteString containing those characters that satisfy the --- predicate. -filter :: (Word16 -> Bool) -> ShortByteString -> ShortByteString -filter k = \(assertEven -> sbs) -> - let l = BS.length sbs - in if | l <= 0 -> sbs - | otherwise -> createAndTrim' l $ \mba -> go mba (asBA sbs) l - where - go :: forall s. MBA s -- mutable output bytestring - -> BA -- input bytestring - -> Int -- length of input bytestring - -> ST s Int - go !mba ba !l = go' 0 0 - where - go' :: Int -- bytes read - -> Int -- bytes written - -> ST s Int - go' !br !bw - | br >= l = return bw - | otherwise = do - let w = indexWord16Array ba br - if k w - then do - writeWord16Array mba bw w - go' (br+2) (bw+2) - else - go' (br+2) bw - --- | /O(n)/ The 'find' function takes a predicate and a ByteString, --- and returns the first element in matching the predicate, or 'Nothing' --- if there is no such element. --- --- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing --- -find :: (Word16 -> Bool) -> ShortByteString -> Maybe Word16 -find f = \(assertEven -> sbs) -> case findIndex f sbs of - Just n -> Just (sbs `index` n) - _ -> Nothing - --- | /O(n)/ The 'partition' function takes a predicate a ByteString and returns --- the pair of ByteStrings with elements which do and do not satisfy the --- predicate, respectively; i.e., --- --- > partition p bs == (filter p xs, filter (not . p) xs) --- -partition :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) -partition k = \(assertEven -> sbs) -> - let l = BS.length sbs - in if | l <= 0 -> (sbs, sbs) - | otherwise -> createAndTrim'' l $ \mba1 mba2 -> go mba1 mba2 (asBA sbs) l - where - go :: forall s. - MBA s -- mutable output bytestring1 - -> MBA s -- mutable output bytestring2 - -> BA -- input bytestring - -> Int -- length of input bytestring - -> ST s (Int, Int) -- (length mba1, length mba2) - go !mba1 !mba2 ba !l = go' 0 0 - where - go' :: Int -- bytes read - -> Int -- bytes written to bytestring 1 - -> ST s (Int, Int) -- (length mba1, length mba2) - go' !br !bw1 - | br >= l = return (bw1, br - bw1) - | otherwise = do - let w = indexWord16Array ba br - if k w - then do - writeWord16Array mba1 bw1 w - go' (br+2) (bw1+2) - else do - writeWord16Array mba2 (br - bw1) w - go' (br+2) bw1 - --- -------------------------------------------------------------------- --- Indexing ShortByteString - --- | /O(n)/ The 'elemIndex' function returns the index of the first --- element in the given 'ShortByteString' which is equal to the query --- element, or 'Nothing' if there is no such element. -elemIndex :: Word16 - -> ShortByteString - -> Maybe Int -- ^ number of 'Word16' -{- HLINT ignore "Use elemIndex" -} -elemIndex k = findIndex (==k) . assertEven - --- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning --- the indices of all elements equal to the query element, in ascending order. -elemIndices :: Word16 -> ShortByteString -> [Int] -{- HLINT ignore "Use elemIndices" -} -elemIndices k = findIndices (==k) . assertEven - --- | count returns the number of times its argument appears in the ShortByteString -count :: Word16 -> ShortByteString -> Int -count w = List.length . elemIndices w . assertEven - --- | /O(n)/ The 'findIndex' function takes a predicate and a 'ShortByteString' and --- returns the index of the first element in the ByteString --- satisfying the predicate. -findIndex :: (Word16 -> Bool) -> ShortByteString -> Maybe Int -findIndex k = \(assertEven -> sbs) -> - let l = BS.length sbs - ba = asBA sbs - w = indexWord16Array ba - go !n | n >= l = Nothing - | k (w n) = Just (n `shiftR` 1) - | otherwise = go (n + 2) - in go 0 --- | /O(n)/ The 'findIndices' function extends 'findIndex', by returning the --- indices of all elements satisfying the predicate, in ascending order. -findIndices :: (Word16 -> Bool) -> ShortByteString -> [Int] -findIndices k = \(assertEven -> sbs) -> - let l = BS.length sbs - ba = asBA sbs - w = indexWord16Array ba - go !n | n >= l = [] - | k (w n) = (n `shiftR` 1) : go (n + 2) - | otherwise = go (n + 2) - in go 0 +import "os-string" System.OsString.Data.ByteString.Short.Word16 +import Prelude () diff --git a/System/OsPath/Encoding.hs b/System/OsPath/Encoding.hs index fe81497f..b6a34d07 100644 --- a/System/OsPath/Encoding.hs +++ b/System/OsPath/Encoding.hs @@ -1,3 +1,16 @@ +-- | +-- Module : System.OsPath.Encoding +-- Copyright : © 2023 Julian Ospald +-- License : MIT +-- +-- Maintainer : Julian Ospald +-- Stability : experimental +-- Portability : portable +-- +-- Encoding helpers for 'OsPath'. It's advised to import this module qualified to +-- avoid name clashes with @System.OsString.Encoding@: +-- +-- > import qualified System.OsPath.Encoding as OSE module System.OsPath.Encoding ( -- * Types @@ -25,7 +38,44 @@ module System.OsPath.Encoding , decodeWithBasePosix , encodeWithBaseWindows , decodeWithBaseWindows + + -- * low-level functions + , withFilePathWin + , peekFilePathWin + , withFilePathPosix + , peekFilePathPosix ) where -import System.OsPath.Encoding.Internal +import Foreign.C (CStringLen) +import Foreign + +import System.OsString.Encoding.Internal + +-- | Suitable for executing ffi calls on a windows filepath. The callback +-- takes the length of the filepath and a 'Word16' pointer as arguments. +-- +-- @since 1.4.200.0 +withFilePathWin :: FilePath -> (Int -> Ptr Word16 -> IO a) -> IO a +withFilePathWin = withWindowsString + +-- | Read a windows filepath from a given memory location, assuming +-- the given length. +-- +-- @since 1.4.200.0 +peekFilePathWin :: (Ptr Word16, Int) -> IO FilePath +peekFilePathWin = peekWindowsString + +-- | Suitable for executing ffi calls on a unix filepath. +-- +-- @since 1.4.200.0 +withFilePathPosix :: FilePath -> (CStringLen -> IO a) -> IO a +withFilePathPosix = withPosixString + +-- | Read a windows filepath from a given memory location, assuming +-- the given length. +-- +-- @since 1.4.200.0 +peekFilePathPosix :: CStringLen -> IO FilePath +peekFilePathPosix = peekPosixString + diff --git a/System/OsPath/Encoding/Internal.hs b/System/OsPath/Encoding/Internal.hs index 1ae1c85a..a5ef35cd 100644 --- a/System/OsPath/Encoding/Internal.hs +++ b/System/OsPath/Encoding/Internal.hs @@ -1,349 +1,7 @@ -{-# LANGUAGE NoImplicitPrelude - , BangPatterns - , TypeApplications - , MultiWayIf - #-} -{-# OPTIONS_GHC -funbox-strict-fields #-} +module System.OsPath.Encoding.Internal {-# DEPRECATED "Use System.OsString.Encoding.Internal from os-string package instead. This module will be removed in filepath >= 1.5." #-} + ( + module System.OsString.Encoding.Internal + ) where +import "os-string" System.OsString.Encoding.Internal -module System.OsPath.Encoding.Internal where - -import qualified System.OsPath.Data.ByteString.Short as BS8 -import qualified System.OsPath.Data.ByteString.Short.Word16 as BS16 - -import GHC.Base -import GHC.Real -import GHC.Num --- import GHC.IO -import GHC.IO.Buffer -import GHC.IO.Encoding.Failure -import GHC.IO.Encoding.Types -import Data.Bits -import Control.Exception (SomeException, try, Exception (displayException), evaluate) -import qualified GHC.Foreign as GHC -import Data.Either (Either) -import GHC.IO (unsafePerformIO) -import Control.DeepSeq (force, NFData (rnf)) -import Data.Bifunctor (first) -import Data.Data (Typeable) -import GHC.Show (Show (show)) -import Numeric (showHex) -import Foreign.C (CStringLen) -import Data.Char (chr) -import Foreign -import Prelude (FilePath) -import GHC.IO.Encoding (getFileSystemEncoding) - --- ----------------------------------------------------------------------------- --- UCS-2 LE --- - -ucs2le :: TextEncoding -ucs2le = mkUcs2le ErrorOnCodingFailure - -mkUcs2le :: CodingFailureMode -> TextEncoding -mkUcs2le cfm = TextEncoding { textEncodingName = "UCS-2LE", - mkTextDecoder = ucs2le_DF cfm, - mkTextEncoder = ucs2le_EF cfm } - -ucs2le_DF :: CodingFailureMode -> IO (TextDecoder ()) -ucs2le_DF cfm = - return (BufferCodec { - encode = ucs2le_decode, - recover = recoverDecode cfm, - close = return (), - getState = return (), - setState = const $ return () - }) - -ucs2le_EF :: CodingFailureMode -> IO (TextEncoder ()) -ucs2le_EF cfm = - return (BufferCodec { - encode = ucs2le_encode, - recover = recoverEncode cfm, - close = return (), - getState = return (), - setState = const $ return () - }) - - -ucs2le_decode :: DecodeBuffer -ucs2le_decode - input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } - output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } - = let - loop !ir !ow - | ow >= os = done OutputUnderflow ir ow - | ir >= iw = done InputUnderflow ir ow - | ir + 1 == iw = done InputUnderflow ir ow - | otherwise = do - c0 <- readWord8Buf iraw ir - c1 <- readWord8Buf iraw (ir+1) - let x1 = fromIntegral c1 `shiftL` 8 + fromIntegral c0 - ow' <- writeCharBuf oraw ow (unsafeChr x1) - loop (ir+2) ow' - - -- lambda-lifted, to avoid thunks being built in the inner-loop: - done why !ir !ow = return (why, - if ir == iw then input{ bufL=0, bufR=0 } - else input{ bufL=ir }, - output{ bufR=ow }) - in - loop ir0 ow0 - - -ucs2le_encode :: EncodeBuffer -ucs2le_encode - input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } - output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } - = let - done why !ir !ow = return (why, - if ir == iw then input{ bufL=0, bufR=0 } - else input{ bufL=ir }, - output{ bufR=ow }) - loop !ir !ow - | ir >= iw = done InputUnderflow ir ow - | os - ow < 2 = done OutputUnderflow ir ow - | otherwise = do - (c,ir') <- readCharBuf iraw ir - case ord c of - x | x < 0x10000 -> do - writeWord8Buf oraw ow (fromIntegral x) - writeWord8Buf oraw (ow+1) (fromIntegral (x `shiftR` 8)) - loop ir' (ow+2) - | otherwise -> done InvalidSequence ir ow - in - loop ir0 ow0 - --- ----------------------------------------------------------------------------- --- UTF-16b --- - --- | Mimics the base encoding for filesystem operations. This should be total on all inputs (word16 byte arrays). --- --- Note that this has a subtle difference to 'encodeWithBaseWindows'/'decodeWithBaseWindows': it doesn't care for --- the @0x0000@ end marker and will as such produce different results. Use @takeWhile (/= '\NUL')@ on the input --- to recover this behavior. -utf16le_b :: TextEncoding -utf16le_b = mkUTF16le_b ErrorOnCodingFailure - -mkUTF16le_b :: CodingFailureMode -> TextEncoding -mkUTF16le_b cfm = TextEncoding { textEncodingName = "UTF-16LE_b", - mkTextDecoder = utf16le_b_DF cfm, - mkTextEncoder = utf16le_b_EF cfm } - -utf16le_b_DF :: CodingFailureMode -> IO (TextDecoder ()) -utf16le_b_DF cfm = - return (BufferCodec { - encode = utf16le_b_decode, - recover = recoverDecode cfm, - close = return (), - getState = return (), - setState = const $ return () - }) - -utf16le_b_EF :: CodingFailureMode -> IO (TextEncoder ()) -utf16le_b_EF cfm = - return (BufferCodec { - encode = utf16le_b_encode, - recover = recoverEncode cfm, - close = return (), - getState = return (), - setState = const $ return () - }) - - -utf16le_b_decode :: DecodeBuffer -utf16le_b_decode - input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } - output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } - = let - loop !ir !ow - | ow >= os = done OutputUnderflow ir ow - | ir >= iw = done InputUnderflow ir ow - | ir + 1 == iw = done InputUnderflow ir ow - | otherwise = do - c0 <- readWord8Buf iraw ir - c1 <- readWord8Buf iraw (ir+1) - let x1 = fromIntegral c1 `shiftL` 8 + fromIntegral c0 - if | iw - ir >= 4 -> do - c2 <- readWord8Buf iraw (ir+2) - c3 <- readWord8Buf iraw (ir+3) - let x2 = fromIntegral c3 `shiftL` 8 + fromIntegral c2 - if | 0xd800 <= x1 && x1 <= 0xdbff - , 0xdc00 <= x2 && x2 <= 0xdfff -> do - ow' <- writeCharBuf oraw ow (unsafeChr ((x1 - 0xd800)*0x400 + (x2 - 0xdc00) + 0x10000)) - loop (ir+4) ow' - | otherwise -> do - ow' <- writeCharBuf oraw ow (unsafeChr x1) - loop (ir+2) ow' - | iw - ir >= 2 -> do - ow' <- writeCharBuf oraw ow (unsafeChr x1) - loop (ir+2) ow' - | otherwise -> done InputUnderflow ir ow - - -- lambda-lifted, to avoid thunks being built in the inner-loop: - done why !ir !ow = return (why, - if ir == iw then input{ bufL=0, bufR=0 } - else input{ bufL=ir }, - output{ bufR=ow }) - in - loop ir0 ow0 - - -utf16le_b_encode :: EncodeBuffer -utf16le_b_encode - input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } - output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } - = let - done why !ir !ow = return (why, - if ir == iw then input{ bufL=0, bufR=0 } - else input{ bufL=ir }, - output{ bufR=ow }) - loop !ir !ow - | ir >= iw = done InputUnderflow ir ow - | os - ow < 2 = done OutputUnderflow ir ow - | otherwise = do - (c,ir') <- readCharBuf iraw ir - case ord c of - x | x < 0x10000 -> do - writeWord8Buf oraw ow (fromIntegral x) - writeWord8Buf oraw (ow+1) (fromIntegral (x `shiftR` 8)) - loop ir' (ow+2) - | otherwise -> - if os - ow < 4 then done OutputUnderflow ir ow else do - let x' = x - 0x10000 - w1 = x' `div` 0x400 + 0xd800 - w2 = x' `mod` 0x400 + 0xdc00 - writeWord8Buf oraw ow (fromIntegral w1) - writeWord8Buf oraw (ow+1) (fromIntegral (w1 `shiftR` 8)) - writeWord8Buf oraw (ow+2) (fromIntegral w2) - writeWord8Buf oraw (ow+3) (fromIntegral (w2 `shiftR` 8)) - loop ir' (ow+4) - in - loop ir0 ow0 - --- ----------------------------------------------------------------------------- --- Windows encoding (ripped off from base) --- - -cWcharsToChars_UCS2 :: [Word16] -> [Char] -cWcharsToChars_UCS2 = map (chr . fromIntegral) - - --- On Windows, wchar_t is 16 bits wide and CWString uses the UTF-16 encoding. - --- coding errors generate Chars in the surrogate range -cWcharsToChars :: [Word16] -> [Char] -cWcharsToChars = map chr . fromUTF16 . map fromIntegral - where - fromUTF16 :: [Int] -> [Int] - fromUTF16 (c1:c2:wcs) - | 0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff = - ((c1 - 0xd800)*0x400 + (c2 - 0xdc00) + 0x10000) : fromUTF16 wcs - fromUTF16 (c:wcs) = c : fromUTF16 wcs - fromUTF16 [] = [] - -charsToCWchars :: [Char] -> [Word16] -charsToCWchars = foldr (utf16Char . ord) [] - where - utf16Char :: Int -> [Word16] -> [Word16] - utf16Char c wcs - | c < 0x10000 = fromIntegral c : wcs - | otherwise = let c' = c - 0x10000 in - fromIntegral (c' `div` 0x400 + 0xd800) : - fromIntegral (c' `mod` 0x400 + 0xdc00) : wcs - --- ----------------------------------------------------------------------------- - --- ----------------------------------------------------------------------------- --- FFI --- - -withFilePathWin :: FilePath -> (Int -> Ptr Word16 -> IO a) -> IO a -withFilePathWin = withArrayLen . charsToCWchars - -peekFilePathWin :: (Ptr Word16, Int) -> IO FilePath -peekFilePathWin (cp, l) = do - cs <- peekArray l cp - return (cWcharsToChars cs) - -withFilePathPosix :: FilePath -> (CStringLen -> IO a) -> IO a -withFilePathPosix fp f = getFileSystemEncoding >>= \enc -> GHC.withCStringLen enc fp f - -peekFilePathPosix :: CStringLen -> IO FilePath -peekFilePathPosix fp = getFileSystemEncoding >>= \enc -> GHC.peekCStringLen enc fp - --- | Decode with the given 'TextEncoding'. -decodeWithTE :: TextEncoding -> BS8.ShortByteString -> Either EncodingException String -decodeWithTE enc ba = unsafePerformIO $ do - r <- try @SomeException $ BS8.useAsCStringLen ba $ \fp -> GHC.peekCStringLen enc fp - evaluate $ force $ first (flip EncodingError Nothing . displayException) r - --- | Encode with the given 'TextEncoding'. -encodeWithTE :: TextEncoding -> String -> Either EncodingException BS8.ShortByteString -encodeWithTE enc str = unsafePerformIO $ do - r <- try @SomeException $ GHC.withCStringLen enc str $ \cstr -> BS8.packCStringLen cstr - evaluate $ force $ first (flip EncodingError Nothing . displayException) r - --- ----------------------------------------------------------------------------- --- Encoders / decoders --- - --- | This mimics the filepath decoder base uses on unix, --- with the small distinction that we're not truncating at NUL bytes (because we're not at --- the outer FFI layer). -decodeWithBasePosix :: BS8.ShortByteString -> IO String -decodeWithBasePosix ba = BS8.useAsCStringLen ba $ \fp -> peekFilePathPosix fp - --- | This mimics the filepath dencoder base uses on unix, --- with the small distinction that we're not truncating at NUL bytes (because we're not at --- the outer FFI layer). -encodeWithBasePosix :: String -> IO BS8.ShortByteString -encodeWithBasePosix str = withFilePathPosix str $ \cstr -> BS8.packCStringLen cstr - --- | This mimics the filepath decoder base uses on windows, --- with the small distinction that we're not truncating at NUL bytes (because we're not at --- the outer FFI layer). -decodeWithBaseWindows :: BS16.ShortByteString -> IO String -decodeWithBaseWindows ba = BS16.useAsCWStringLen ba $ \fp -> peekFilePathWin fp - --- | This mimics the filepath dencoder base uses on windows, --- with the small distinction that we're not truncating at NUL bytes (because we're not at --- the outer FFI layer). -encodeWithBaseWindows :: String -> IO BS16.ShortByteString -encodeWithBaseWindows str = withFilePathWin str $ \l cstr -> BS16.packCWStringLen (cstr, l) - - --- ----------------------------------------------------------------------------- --- Types --- - -data EncodingException = - EncodingError String (Maybe Word8) - -- ^ Could not decode a byte sequence because it was invalid under - -- the given encoding, or ran out of input in mid-decode. - deriving (Eq, Typeable) - - -showEncodingException :: EncodingException -> String -showEncodingException (EncodingError desc (Just w)) - = "Cannot decode byte '\\x" ++ showHex w ("': " ++ desc) -showEncodingException (EncodingError desc Nothing) - = "Cannot decode input: " ++ desc - -instance Show EncodingException where - show = showEncodingException - -instance Exception EncodingException - -instance NFData EncodingException where - rnf (EncodingError desc w) = rnf desc `seq` rnf w - - --- ----------------------------------------------------------------------------- --- Words --- - -wNUL :: Word16 -wNUL = 0x00 diff --git a/System/OsPath/Internal.hs b/System/OsPath/Internal.hs index 3bdf5318..5ba76331 100644 --- a/System/OsPath/Internal.hs +++ b/System/OsPath/Internal.hs @@ -7,7 +7,7 @@ module System.OsPath.Internal where import {-# SOURCE #-} System.OsPath ( isValid ) import System.OsPath.Types -import qualified System.OsString.Internal as OS +import qualified "os-string" System.OsString.Internal as OS import Control.Monad.Catch ( MonadThrow ) @@ -19,7 +19,7 @@ import Language.Haskell.TH.Syntax ( Lift (..), lift ) import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) -import System.OsString.Internal.Types +import "os-string" System.OsString.Internal.Types import System.OsPath.Encoding import Control.Monad (when) import System.IO diff --git a/System/OsPath/Types.hs b/System/OsPath/Types.hs index 6bf1b774..8d4790ff 100644 --- a/System/OsPath/Types.hs +++ b/System/OsPath/Types.hs @@ -18,7 +18,7 @@ module System.OsPath.Types ) where -import System.OsString.Internal.Types +import "os-string" System.OsString.Internal.Types -- | Filepaths are @wchar_t*@ data on windows as passed to syscalls. diff --git a/System/OsString.hs b/System/OsString.hs index c11a4bdf..d4ab8aa0 100644 --- a/System/OsString.hs +++ b/System/OsString.hs @@ -14,7 +14,7 @@ -- -- It captures the notion of syscall specific encoding (or the lack thereof) to avoid roundtrip issues -- and memory fragmentation by using unpinned byte arrays. Bytes are not touched or interpreted. -module System.OsString +module System.OsString {-# DEPRECATED "Use System.OsString from os-string package instead. This module will be removed in filepath >= 1.5." #-} ( -- * String types OsString @@ -24,6 +24,8 @@ module System.OsString , encodeWith , encodeFS , osstr + , empty + , singleton , pack -- * OsString deconstruction @@ -40,21 +42,89 @@ module System.OsString -- * Word deconstruction , toChar + + -- * Basic interface + , snoc + , cons + , last + , tail + , uncons + , head + , init + , unsnoc + , null + , length + + -- * Transforming OsString + , map + , reverse + , intercalate + + -- * Reducing OsStrings (folds) + , foldl + , foldl' + , foldl1 + , foldl1' + , foldr + , foldr' + , foldr1 + , foldr1' + + -- * Special folds + , all + , any + , concat + + -- * Generating and unfolding OsStrings + , replicate + , unfoldr + , unfoldrN + + -- * Substrings + -- ** Breaking strings + , take + , takeEnd + , takeWhileEnd + , takeWhile + , drop + , dropEnd + , dropWhileEnd + , dropWhile + , break + , breakEnd + , span + , spanEnd + , splitAt + , split + , splitWith + , stripSuffix + , stripPrefix + + -- * Predicates + , isInfixOf + , isPrefixOf + , isSuffixOf + -- ** Search for arbitrary susbstrings + , breakSubstring + + -- * Searching OsStrings + -- ** Searching by equality + , elem + , find + , filter + , partition + + -- * Indexing OsStrings + , index + , indexMaybe + , (!?) + , elemIndex + , elemIndices + , count + , findIndex + , findIndices ) where -import System.OsString.Internal - ( unsafeFromChar - , toChar - , encodeUtf - , encodeWith - , encodeFS - , osstr - , pack - , decodeUtf - , decodeWith - , decodeFS - , unpack - ) -import System.OsString.Internal.Types - ( OsString, OsChar ) +import "os-string" System.OsString +import Prelude () diff --git a/System/OsString/Common.hs b/System/OsString/Common.hs deleted file mode 100644 index 80eb69b5..00000000 --- a/System/OsString/Common.hs +++ /dev/null @@ -1,315 +0,0 @@ -{- HLINT ignore "Unused LANGUAGE pragma" -} -{-# LANGUAGE TypeApplications #-} -{-# LANGUAGE PatternSynonyms #-} --- This template expects CPP definitions for: --- MODULE_NAME = Posix | Windows --- IS_WINDOWS = False | True --- -#if defined(WINDOWS) -#define WINDOWS_DOC -#else -#define POSIX_DOC -#endif - -module System.OsString.MODULE_NAME - ( - -- * Types -#ifdef WINDOWS - WindowsString - , WindowsChar -#else - PosixString - , PosixChar -#endif - - -- * String construction - , encodeUtf - , encodeWith - , encodeFS - , fromBytes - , pstr - , pack - - -- * String deconstruction - , decodeUtf - , decodeWith - , decodeFS - , unpack - - -- * Word construction - , unsafeFromChar - - -- * Word deconstruction - , toChar - ) -where - - - -import System.OsString.Internal.Types ( -#ifdef WINDOWS - WindowsString(..), WindowsChar(..) -#else - PosixString(..), PosixChar(..) -#endif - ) - -import Data.Char -import Control.Monad.Catch - ( MonadThrow, throwM ) -import Data.ByteString.Internal - ( ByteString ) -import Control.Exception - ( SomeException, try, displayException ) -import Control.DeepSeq ( force ) -import Data.Bifunctor ( first ) -import GHC.IO - ( evaluate, unsafePerformIO ) -import qualified GHC.Foreign as GHC -import Language.Haskell.TH.Quote - ( QuasiQuoter (..) ) -import Language.Haskell.TH.Syntax - ( Lift (..), lift ) - - -import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) -#ifdef WINDOWS -import System.OsPath.Encoding -import System.IO - ( TextEncoding, utf16le ) -import GHC.IO.Encoding.UTF16 ( mkUTF16le ) -import qualified System.OsPath.Data.ByteString.Short.Word16 as BS16 -import qualified System.OsPath.Data.ByteString.Short as BS8 -#else -import System.OsPath.Encoding -import System.IO - ( TextEncoding, utf8 ) -import GHC.IO.Encoding.UTF8 ( mkUTF8 ) -import qualified System.OsPath.Data.ByteString.Short as BS -#endif - - - -#ifdef WINDOWS_DOC --- | Partial unicode friendly encoding. --- --- This encodes as UTF16-LE (strictly), which is a pretty good guess. --- --- Throws an 'EncodingException' if encoding fails. -#else --- | Partial unicode friendly encoding. --- --- This encodes as UTF8 (strictly), which is a good guess. --- --- Throws an 'EncodingException' if encoding fails. -#endif -encodeUtf :: MonadThrow m => String -> m PLATFORM_STRING -#ifdef WINDOWS -encodeUtf = either throwM pure . encodeWith utf16le -#else -encodeUtf = either throwM pure . encodeWith utf8 -#endif - --- | Encode a 'String' with the specified encoding. -encodeWith :: TextEncoding - -> String - -> Either EncodingException PLATFORM_STRING -encodeWith enc str = unsafePerformIO $ do -#ifdef WINDOWS - r <- try @SomeException $ GHC.withCStringLen enc str $ \cstr -> WindowsString <$> BS8.packCStringLen cstr - evaluate $ force $ first (flip EncodingError Nothing . displayException) r -#else - r <- try @SomeException $ GHC.withCStringLen enc str $ \cstr -> PosixString <$> BS.packCStringLen cstr - evaluate $ force $ first (flip EncodingError Nothing . displayException) r -#endif - -#ifdef WINDOWS_DOC --- | This mimics the behavior of the base library when doing filesystem --- operations, which does permissive UTF-16 encoding, where coding errors generate --- Chars in the surrogate range. --- --- The reason this is in IO is because it unifies with the Posix counterpart, --- which does require IO. This is safe to 'unsafePerformIO'/'unsafeDupablePerformIO'. -#else --- | This mimics the behavior of the base library when doing filesystem --- operations, which uses shady PEP 383 style encoding (based on the current locale, --- but PEP 383 only works properly on UTF-8 encodings, so good luck). --- --- Looking up the locale requires IO. If you're not worried about calls --- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure --- to deeply evaluate the result to catch exceptions). -#endif -encodeFS :: String -> IO PLATFORM_STRING -#ifdef WINDOWS -encodeFS = fmap WindowsString . encodeWithBaseWindows -#else -encodeFS = fmap PosixString . encodeWithBasePosix -#endif - - -#ifdef WINDOWS_DOC --- | Partial unicode friendly decoding. --- --- This decodes as UTF16-LE (strictly), which is a pretty good. --- --- Throws a 'EncodingException' if decoding fails. -#else --- | Partial unicode friendly decoding. --- --- This decodes as UTF8 (strictly), which is a good guess. Note that --- filenames on unix are encoding agnostic char arrays. --- --- Throws a 'EncodingException' if decoding fails. -#endif -decodeUtf :: MonadThrow m => PLATFORM_STRING -> m String -#ifdef WINDOWS -decodeUtf = either throwM pure . decodeWith utf16le -#else -decodeUtf = either throwM pure . decodeWith utf8 -#endif - -#ifdef WINDOWS --- | Decode a 'WindowsString' with the specified encoding. --- --- The String is forced into memory to catch all exceptions. -decodeWith :: TextEncoding - -> PLATFORM_STRING - -> Either EncodingException String -decodeWith winEnc (WindowsString ba) = unsafePerformIO $ do - r <- try @SomeException $ BS8.useAsCStringLen ba $ \fp -> GHC.peekCStringLen winEnc fp - evaluate $ force $ first (flip EncodingError Nothing . displayException) r -#else --- | Decode a 'PosixString' with the specified encoding. --- --- The String is forced into memory to catch all exceptions. -decodeWith :: TextEncoding - -> PLATFORM_STRING - -> Either EncodingException String -decodeWith unixEnc (PosixString ba) = unsafePerformIO $ do - r <- try @SomeException $ BS.useAsCStringLen ba $ \fp -> GHC.peekCStringLen unixEnc fp - evaluate $ force $ first (flip EncodingError Nothing . displayException) r -#endif - - -#ifdef WINDOWS_DOC --- | Like 'decodeUtf', except this mimics the behavior of the base library when doing filesystem --- operations, which does permissive UTF-16 encoding, where coding errors generate --- Chars in the surrogate range. --- --- The reason this is in IO is because it unifies with the Posix counterpart, --- which does require IO. 'unsafePerformIO'/'unsafeDupablePerformIO' are safe, however. -#else --- | This mimics the behavior of the base library when doing filesystem --- operations, which uses shady PEP 383 style encoding (based on the current locale, --- but PEP 383 only works properly on UTF-8 encodings, so good luck). --- --- Looking up the locale requires IO. If you're not worried about calls --- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure --- to deeply evaluate the result to catch exceptions). -#endif -decodeFS :: PLATFORM_STRING -> IO String -#ifdef WINDOWS -decodeFS (WindowsString ba) = decodeWithBaseWindows ba -#else -decodeFS (PosixString ba) = decodeWithBasePosix ba -#endif - - -#ifdef WINDOWS_DOC --- | Constructs a platform string from a ByteString. --- --- This ensures valid UCS-2LE. --- Note that this doesn't expand Word8 to Word16 on windows, so you may get invalid UTF-16. --- --- Throws 'EncodingException' on invalid UCS-2LE (although unlikely). -#else --- | Constructs a platform string from a ByteString. --- --- This is a no-op. -#endif -fromBytes :: MonadThrow m - => ByteString - -> m PLATFORM_STRING -#ifdef WINDOWS -fromBytes bs = - let ws = WindowsString . BS16.toShort $ bs - in either throwM (const . pure $ ws) $ decodeWith ucs2le ws -#else -fromBytes = pure . PosixString . BS.toShort -#endif - - -#ifdef WINDOWS_DOC --- | QuasiQuote a 'WindowsString'. This accepts Unicode characters --- and encodes as UTF-16LE on windows. -#else --- | QuasiQuote a 'PosixString'. This accepts Unicode characters --- and encodes as UTF-8 on unix. -#endif -pstr :: QuasiQuoter -pstr = - QuasiQuoter -#ifdef WINDOWS - { quoteExp = \s -> do - ps <- either (fail . show) pure $ encodeWith (mkUTF16le ErrorOnCodingFailure) s - lift ps - , quotePat = \_ -> - fail "illegal QuasiQuote (allowed as expression only, used as a pattern)" - , quoteType = \_ -> - fail "illegal QuasiQuote (allowed as expression only, used as a type)" - , quoteDec = \_ -> - fail "illegal QuasiQuote (allowed as expression only, used as a declaration)" - } -#else - { quoteExp = \s -> do - ps <- either (fail . show) pure $ encodeWith (mkUTF8 ErrorOnCodingFailure) s - lift ps - , quotePat = \_ -> - fail "illegal QuasiQuote (allowed as expression only, used as a pattern)" - , quoteType = \_ -> - fail "illegal QuasiQuote (allowed as expression only, used as a type)" - , quoteDec = \_ -> - fail "illegal QuasiQuote (allowed as expression only, used as a declaration)" - } -#endif - - --- | Unpack a platform string to a list of platform words. -unpack :: PLATFORM_STRING -> [PLATFORM_WORD] -#ifdef WINDOWS -unpack (WindowsString ba) = WindowsChar <$> BS16.unpack ba -#else -unpack (PosixString ba) = PosixChar <$> BS.unpack ba -#endif - - --- | Pack a list of platform words to a platform string. --- --- Note that using this in conjunction with 'unsafeFromChar' to --- convert from @[Char]@ to platform string is probably not what --- you want, because it will truncate unicode code points. -pack :: [PLATFORM_WORD] -> PLATFORM_STRING -#ifdef WINDOWS -pack = WindowsString . BS16.pack . fmap (\(WindowsChar w) -> w) -#else -pack = PosixString . BS.pack . fmap (\(PosixChar w) -> w) -#endif - - -#ifdef WINDOWS --- | Truncates to 2 octets. -unsafeFromChar :: Char -> PLATFORM_WORD -unsafeFromChar = WindowsChar . fromIntegral . fromEnum -#else --- | Truncates to 1 octet. -unsafeFromChar :: Char -> PLATFORM_WORD -unsafeFromChar = PosixChar . fromIntegral . fromEnum -#endif - --- | Converts back to a unicode codepoint (total). -toChar :: PLATFORM_WORD -> Char -#ifdef WINDOWS -toChar (WindowsChar w) = chr $ fromIntegral w -#else -toChar (PosixChar w) = chr $ fromIntegral w -#endif diff --git a/System/OsString/Internal.hs b/System/OsString/Internal.hs index f72fdcb7..e5d70b8f 100644 --- a/System/OsString/Internal.hs +++ b/System/OsString/Internal.hs @@ -1,174 +1,9 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE UnliftedFFITypes #-} +{-# LANGUAGE ScopedTypeVariables #-} -module System.OsString.Internal where +module System.OsString.Internal {-# DEPRECATED "Use System.OsString.Internal from os-string package instead. This module will be removed in filepath >= 1.5." #-} (module System.OsString.Internal) where -import System.OsString.Internal.Types - -import Control.Monad.Catch - ( MonadThrow ) -import Data.ByteString - ( ByteString ) -import Data.Char -import Language.Haskell.TH.Quote - ( QuasiQuoter (..) ) -import Language.Haskell.TH.Syntax - ( Lift (..), lift ) -import System.IO - ( TextEncoding ) - -import System.OsPath.Encoding ( EncodingException(..) ) -import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -import GHC.IO.Encoding.UTF16 ( mkUTF16le ) -import qualified System.OsString.Windows as PF -#else -import GHC.IO.Encoding.UTF8 ( mkUTF8 ) -import qualified System.OsString.Posix as PF -#endif - - - - --- | Partial unicode friendly encoding. --- --- On windows this encodes as UTF16-LE (strictly), which is a pretty good guess. --- On unix this encodes as UTF8 (strictly), which is a good guess. --- --- Throws a 'EncodingException' if encoding fails. -encodeUtf :: MonadThrow m => String -> m OsString -encodeUtf = fmap OsString . PF.encodeUtf - --- | Encode an 'OsString' given the platform specific encodings. -encodeWith :: TextEncoding -- ^ unix text encoding - -> TextEncoding -- ^ windows text encoding - -> String - -> Either EncodingException OsString -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -encodeWith _ winEnc str = OsString <$> PF.encodeWith winEnc str -#else -encodeWith unixEnc _ str = OsString <$> PF.encodeWith unixEnc str -#endif - --- | Like 'encodeUtf', except this mimics the behavior of the base library when doing filesystem --- operations, which is: --- --- 1. on unix, uses shady PEP 383 style encoding (based on the current locale, --- but PEP 383 only works properly on UTF-8 encodings, so good luck) --- 2. on windows does permissive UTF-16 encoding, where coding errors generate --- Chars in the surrogate range --- --- Looking up the locale requires IO. If you're not worried about calls --- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure --- to deeply evaluate the result to catch exceptions). -encodeFS :: String -> IO OsString -encodeFS = fmap OsString . PF.encodeFS - - --- | Partial unicode friendly decoding. --- --- On windows this decodes as UTF16-LE (strictly), which is a pretty good guess. --- On unix this decodes as UTF8 (strictly), which is a good guess. Note that --- filenames on unix are encoding agnostic char arrays. --- --- Throws a 'EncodingException' if decoding fails. -decodeUtf :: MonadThrow m => OsString -> m String -decodeUtf (OsString x) = PF.decodeUtf x - --- | Decode an 'OsString' with the specified encoding. --- --- The String is forced into memory to catch all exceptions. -decodeWith :: TextEncoding -- ^ unix text encoding - -> TextEncoding -- ^ windows text encoding - -> OsString - -> Either EncodingException String -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -decodeWith _ winEnc (OsString x) = PF.decodeWith winEnc x -#else -decodeWith unixEnc _ (OsString x) = PF.decodeWith unixEnc x -#endif - - --- | Like 'decodeUtf', except this mimics the behavior of the base library when doing filesystem --- operations, which is: --- --- 1. on unix, uses shady PEP 383 style encoding (based on the current locale, --- but PEP 383 only works properly on UTF-8 encodings, so good luck) --- 2. on windows does permissive UTF-16 encoding, where coding errors generate --- Chars in the surrogate range --- --- Looking up the locale requires IO. If you're not worried about calls --- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure --- to deeply evaluate the result to catch exceptions). -decodeFS :: OsString -> IO String -decodeFS (OsString x) = PF.decodeFS x - - --- | Constructs an @OsString@ from a ByteString. --- --- On windows, this ensures valid UCS-2LE, on unix it is passed unchanged/unchecked. --- --- Throws 'EncodingException' on invalid UCS-2LE on windows (although unlikely). -fromBytes :: MonadThrow m - => ByteString - -> m OsString -fromBytes = fmap OsString . PF.fromBytes - - --- | QuasiQuote an 'OsString'. This accepts Unicode characters --- and encodes as UTF-8 on unix and UTF-16 on windows. -osstr :: QuasiQuoter -osstr = - QuasiQuoter -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) - { quoteExp = \s -> do - osp <- either (fail . show) (pure . OsString) . PF.encodeWith (mkUTF16le ErrorOnCodingFailure) $ s - lift osp - , quotePat = \_ -> - fail "illegal QuasiQuote (allowed as expression only, used as a pattern)" - , quoteType = \_ -> - fail "illegal QuasiQuote (allowed as expression only, used as a type)" - , quoteDec = \_ -> - fail "illegal QuasiQuote (allowed as expression only, used as a declaration)" - } -#else - { quoteExp = \s -> do - osp <- either (fail . show) (pure . OsString) . PF.encodeWith (mkUTF8 ErrorOnCodingFailure) $ s - lift osp - , quotePat = \_ -> - fail "illegal QuasiQuote (allowed as expression only, used as a pattern)" - , quoteType = \_ -> - fail "illegal QuasiQuote (allowed as expression only, used as a type)" - , quoteDec = \_ -> - fail "illegal QuasiQuote (allowed as expression only, used as a declaration)" - } -#endif - - --- | Unpack an 'OsString' to a list of 'OsChar'. -unpack :: OsString -> [OsChar] -unpack (OsString x) = OsChar <$> PF.unpack x - - --- | Pack a list of 'OsChar' to an 'OsString' --- --- Note that using this in conjunction with 'unsafeFromChar' to --- convert from @[Char]@ to 'OsString' is probably not what --- you want, because it will truncate unicode code points. -pack :: [OsChar] -> OsString -pack = OsString . PF.pack . fmap (\(OsChar x) -> x) - - --- | Truncates on unix to 1 and on Windows to 2 octets. -unsafeFromChar :: Char -> OsChar -unsafeFromChar = OsChar . PF.unsafeFromChar - --- | Converts back to a unicode codepoint (total). -toChar :: OsChar -> Char -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -toChar (OsChar (WindowsChar w)) = chr $ fromIntegral w -#else -toChar (OsChar (PosixChar w)) = chr $ fromIntegral w -#endif +import "os-string" System.OsString.Internal diff --git a/System/OsString/Internal/Types.hs b/System/OsString/Internal/Types.hs index 33f960ff..7e7c83cd 100644 --- a/System/OsString/Internal/Types.hs +++ b/System/OsString/Internal/Types.hs @@ -8,7 +8,7 @@ {-# LANGUAGE TypeApplications #-} {-# LANGUAGE PatternSynonyms #-} -module System.OsString.Internal.Types +module System.OsString.Internal.Types {-# DEPRECATED "Use System.OsString.Internal.Types from os-string package instead. This module will be removed in filepath >= 1.5." #-} ( WindowsString(..) , pattern WS @@ -29,218 +29,4 @@ module System.OsString.Internal.Types ) where - -import Control.DeepSeq -import Data.Data -import Data.Word -import Language.Haskell.TH.Syntax - ( Lift (..), lift ) -#if !MIN_VERSION_base(4,11,0) -import Data.Semigroup -#endif -import GHC.Generics (Generic) - -import System.OsPath.Encoding.Internal -import qualified System.OsPath.Data.ByteString.Short as BS -import qualified System.OsPath.Data.ByteString.Short.Word16 as BS16 -#if MIN_VERSION_template_haskell(2,16,0) -import qualified Language.Haskell.TH.Syntax as TH -#endif - --- Using unpinned bytearrays to avoid Heap fragmentation and --- which are reasonably cheap to pass to FFI calls --- wrapped with typeclass-friendly types allowing to avoid CPP --- --- Note that, while unpinned bytearrays incur a memcpy on each --- FFI call, this overhead is generally much preferable to --- the memory fragmentation of pinned bytearrays - --- | Commonly used windows string as wide character bytes. -newtype WindowsString = WindowsString { getWindowsString :: BS.ShortByteString } - deriving (Eq, Ord, Semigroup, Monoid, Typeable, Generic, NFData) - --- | Decodes as UCS-2. -instance Show WindowsString where - -- cWcharsToChars_UCS2 is total - show = show . cWcharsToChars_UCS2 . BS16.unpack . getWindowsString - --- | Just a short bidirectional synonym for 'WindowsString' constructor. -pattern WS :: BS.ShortByteString -> WindowsString -pattern WS { unWS } <- WindowsString unWS where - WS a = WindowsString a -#if __GLASGOW_HASKELL__ >= 802 -{-# COMPLETE WS #-} -#endif - - -instance Lift WindowsString where - lift (WindowsString bs) - = [| WindowsString (BS.pack $(lift $ BS.unpack bs)) :: WindowsString |] -#if MIN_VERSION_template_haskell(2,17,0) - liftTyped = TH.unsafeCodeCoerce . TH.lift -#elif MIN_VERSION_template_haskell(2,16,0) - liftTyped = TH.unsafeTExpCoerce . TH.lift -#endif - --- | Commonly used Posix string as uninterpreted @char[]@ --- array. -newtype PosixString = PosixString { getPosixString :: BS.ShortByteString } - deriving (Eq, Ord, Semigroup, Monoid, Typeable, Generic, NFData) - --- | Prints the raw bytes without decoding. -instance Show PosixString where - show (PosixString ps) = show ps - --- | Just a short bidirectional synonym for 'PosixString' constructor. -pattern PS :: BS.ShortByteString -> PosixString -pattern PS { unPS } <- PosixString unPS where - PS a = PosixString a -#if __GLASGOW_HASKELL__ >= 802 -{-# COMPLETE PS #-} -#endif - -instance Lift PosixString where - lift (PosixString bs) - = [| PosixString (BS.pack $(lift $ BS.unpack bs)) :: PosixString |] -#if MIN_VERSION_template_haskell(2,17,0) - liftTyped = TH.unsafeCodeCoerce . TH.lift -#elif MIN_VERSION_template_haskell(2,16,0) - liftTyped = TH.unsafeTExpCoerce . TH.lift -#endif - - -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -type PlatformString = WindowsString -#else -type PlatformString = PosixString -#endif - -newtype WindowsChar = WindowsChar { getWindowsChar :: Word16 } - deriving (Eq, Ord, Typeable, Generic, NFData) - -instance Show WindowsChar where - show (WindowsChar wc) = show wc - -newtype PosixChar = PosixChar { getPosixChar :: Word8 } - deriving (Eq, Ord, Typeable, Generic, NFData) - -instance Show PosixChar where - show (PosixChar pc) = show pc - --- | Just a short bidirectional synonym for 'WindowsChar' constructor. -pattern WW :: Word16 -> WindowsChar -pattern WW { unWW } <- WindowsChar unWW where - WW a = WindowsChar a -#if __GLASGOW_HASKELL__ >= 802 -{-# COMPLETE WW #-} -#endif - --- | Just a short bidirectional synonym for 'PosixChar' constructor. -pattern PW :: Word8 -> PosixChar -pattern PW { unPW } <- PosixChar unPW where - PW a = PosixChar a -#if __GLASGOW_HASKELL__ >= 802 -{-# COMPLETE PW #-} -#endif - -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -type PlatformChar = WindowsChar -#else -type PlatformChar = PosixChar -#endif - - --- | Newtype representing short operating system specific strings. --- --- Internally this is either 'WindowsString' or 'PosixString', --- depending on the platform. Both use unpinned --- 'ShortByteString' for efficiency. --- --- The constructor is only exported via "System.OsString.Internal.Types", since --- dealing with the internals isn't generally recommended, but supported --- in case you need to write platform specific code. -newtype OsString = OsString { getOsString :: PlatformString } - deriving (Typeable, Generic, NFData) - --- | On windows, decodes as UCS-2. On unix prints the raw bytes without decoding. -instance Show OsString where - show (OsString os) = show os - --- | Byte equality of the internal representation. -instance Eq OsString where - (OsString a) == (OsString b) = a == b - --- | Byte ordering of the internal representation. -instance Ord OsString where - compare (OsString a) (OsString b) = compare a b - - --- | \"String-Concatenation\" for 'OsString'. This is __not__ the same --- as '()'. -instance Monoid OsString where -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) - mempty = OsString (WindowsString BS.empty) -#if MIN_VERSION_base(4,16,0) - mappend = (<>) -#else - mappend (OsString (WindowsString a)) (OsString (WindowsString b)) - = OsString (WindowsString (mappend a b)) -#endif -#else - mempty = OsString (PosixString BS.empty) -#if MIN_VERSION_base(4,16,0) - mappend = (<>) -#else - mappend (OsString (PosixString a)) (OsString (PosixString b)) - = OsString (PosixString (mappend a b)) -#endif -#endif -#if MIN_VERSION_base(4,11,0) -instance Semigroup OsString where -#if MIN_VERSION_base(4,16,0) -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) - (<>) (OsString (WindowsString a)) (OsString (WindowsString b)) - = OsString (WindowsString (mappend a b)) -#else - (<>) (OsString (PosixString a)) (OsString (PosixString b)) - = OsString (PosixString (mappend a b)) -#endif -#else - (<>) = mappend -#endif -#endif - - -instance Lift OsString where -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) - lift (OsString (WindowsString bs)) - = [| OsString (WindowsString (BS.pack $(lift $ BS.unpack bs))) :: OsString |] -#else - lift (OsString (PosixString bs)) - = [| OsString (PosixString (BS.pack $(lift $ BS.unpack bs))) :: OsString |] -#endif -#if MIN_VERSION_template_haskell(2,17,0) - liftTyped = TH.unsafeCodeCoerce . TH.lift -#elif MIN_VERSION_template_haskell(2,16,0) - liftTyped = TH.unsafeTExpCoerce . TH.lift -#endif - - --- | Newtype representing a code unit. --- --- On Windows, this is restricted to two-octet codepoints 'Word16', --- on POSIX one-octet ('Word8'). -newtype OsChar = OsChar { getOsChar :: PlatformChar } - deriving (Typeable, Generic, NFData) - -instance Show OsChar where - show (OsChar pc) = show pc - --- | Byte equality of the internal representation. -instance Eq OsChar where - (OsChar a) == (OsChar b) = a == b - --- | Byte ordering of the internal representation. -instance Ord OsChar where - compare (OsChar a) (OsChar b) = compare a b - +import "os-string" System.OsString.Internal.Types diff --git a/System/OsString/Posix.hs b/System/OsString/Posix.hs index 33b4d843..a75740bb 100644 --- a/System/OsString/Posix.hs +++ b/System/OsString/Posix.hs @@ -1,7 +1,4 @@ -{-# LANGUAGE CPP #-} -#undef WINDOWS -#define MODULE_NAME Posix -#define PLATFORM_STRING PosixString -#define PLATFORM_WORD PosixChar -#define IS_WINDOWS False -#include "Common.hs" +module System.OsString.Posix {-# DEPRECATED "Use System.OsString.Posix from os-string package instead. This module will be removed in filepath >= 1.5." #-} (module P) where + +import "os-string" System.OsString.Posix as P + diff --git a/System/OsString/Types.hs b/System/OsString/Types.hs deleted file mode 100644 index 896c3b21..00000000 --- a/System/OsString/Types.hs +++ /dev/null @@ -1,14 +0,0 @@ -module System.OsString.Types - ( - WindowsString - , PosixString - , PlatformString - , WindowsChar - , PosixChar - , PlatformChar - , OsString - , OsChar - ) -where - -import System.OsString.Internal.Types diff --git a/System/OsString/Windows.hs b/System/OsString/Windows.hs index 1f15653b..d8902962 100644 --- a/System/OsString/Windows.hs +++ b/System/OsString/Windows.hs @@ -1,13 +1,4 @@ -{-# LANGUAGE CPP #-} -#undef POSIX -#define MODULE_NAME Windows -#define PLATFORM_STRING WindowsString -#define PLATFORM_WORD WindowsChar -#define IS_WINDOWS True -#define WINDOWS -#include "Common.hs" -#undef MODULE_NAME -#undef FILEPATH_NAME -#undef OSSTRING_NAME -#undef IS_WINDOWS -#undef WINDOWS +module System.OsString.Windows {-# DEPRECATED "Use System.OsString.Windows from os-string package instead. This module will be removed in filepath >= 1.5." #-} (module W) where + +import "os-string" System.OsString.Windows as W + diff --git a/bench/BenchFilePath.hs b/bench/BenchFilePath.hs index 5319f1c0..c6b395ba 100644 --- a/bench/BenchFilePath.hs +++ b/bench/BenchFilePath.hs @@ -1,20 +1,21 @@ {-# LANGUAGE TypeApplications #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE PackageImports #-} module Main where import System.OsPath.Types -import System.OsPath.Encoding ( ucs2le ) -import qualified System.OsString.Internal.Types as OST +import "os-string" System.OsString.Encoding ( ucs2le ) +import qualified "os-string" System.OsString.Internal.Types as OST import qualified Data.ByteString.Short as SBS import Test.Tasty.Bench import qualified System.FilePath.Posix as PF import qualified System.FilePath.Posix as WF -import qualified System.OsString.Posix as OSP -import qualified System.OsString.Windows as WSP +import qualified "os-string" System.OsString.Posix as OSP +import qualified "os-string" System.OsString.Windows as WSP import qualified System.OsPath.Posix as APF import qualified System.OsPath.Windows as AWF diff --git a/cabal.project b/cabal.project index 6f920794..4da33bc7 100644 --- a/cabal.project +++ b/cabal.project @@ -1 +1,8 @@ packages: ./ + +-- TODO: remove +source-repository-package + type: git + location: https://github.com/haskell/os-string.git + tag: d06d98f71e0e93154c856a4cd6e66a635dfeacfd + diff --git a/changelog.md b/changelog.md index a5bca439..80a09dd9 100644 --- a/changelog.md +++ b/changelog.md @@ -2,6 +2,11 @@ _Note: below all `FilePath` values are unquoted, so `\\` really means two backslashes._ +## 1.4.200.0 *??? 2023* + +* Introduce bytestring-like functions (substrings, predicates, searching, etc.) to `System.OsString`, `System.OsString.Windows` and `System.OsString.Posix` +* split `OsString` functionality out into [`os-string` package](http://hackage.haskell.org/package/os-string) and deprecate relevant modules + ## 1.4.100.4 *Jul 2023* * Fix isInfixOf and breakSubString in Word16, wrt [#195](https://github.com/haskell/filepath/issues/195) diff --git a/filepath.cabal b/filepath.cabal index c3b76370..6e8a50a7 100644 --- a/filepath.cabal +++ b/filepath.cabal @@ -1,6 +1,6 @@ cabal-version: 2.2 name: filepath -version: 1.4.100.4 +version: 1.4.200.0 -- NOTE: Don't forget to update ./changelog.md license: BSD-3-Clause @@ -54,8 +54,6 @@ extra-source-files: Makefile System/FilePath/Internal.hs System/OsPath/Common.hs - System/OsString/Common.hs - tests/bytestring-tests/Properties/Common.hs extra-doc-files: changelog.md @@ -94,6 +92,9 @@ library System.OsString.Posix System.OsString.Windows + default-extensions: + PackageImports + other-extensions: CPP PatternGuards @@ -108,6 +109,7 @@ library , deepseq , exceptions , template-haskell + , os-string >=1.0.0 ghc-options: -Wall @@ -127,6 +129,7 @@ test-suite filepath-tests , base , bytestring >=0.11.3.0 , filepath + , os-string >=1.0.0 , QuickCheck >=2.7 && <2.15 default-language: Haskell2010 @@ -148,23 +151,7 @@ test-suite filepath-equivalent-tests , base , bytestring >=0.11.3.0 , filepath - , QuickCheck >=2.7 && <2.15 - -test-suite bytestring-tests - default-language: Haskell2010 - ghc-options: -Wall - type: exitcode-stdio-1.0 - main-is: Main.hs - hs-source-dirs: tests tests/bytestring-tests - other-modules: - Properties.ShortByteString - Properties.ShortByteString.Word16 - TestUtil - - build-depends: - , base - , bytestring >=0.11.3.0 - , filepath + , os-string >=1.0.0 , QuickCheck >=2.7 && <2.15 test-suite abstract-filepath @@ -184,6 +171,7 @@ test-suite abstract-filepath , bytestring >=0.11.3.0 , deepseq , filepath + , os-string >=1.0.0 , QuickCheck >=2.7 && <2.15 , quickcheck-classes-base ^>=0.6.2 @@ -198,6 +186,7 @@ benchmark bench-filepath , bytestring >=0.11.3.0 , deepseq , filepath + , os-string >=1.0.0 , tasty-bench ghc-options: -with-rtsopts=-A32m diff --git a/tests/TestUtil.hs b/tests/TestUtil.hs index 1f926a12..b6fb57d0 100644 --- a/tests/TestUtil.hs +++ b/tests/TestUtil.hs @@ -1,5 +1,6 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE TypeApplications #-} +{-# LANGUAGE PackageImports #-} {-# OPTIONS_GHC -Wno-orphans #-} module TestUtil( @@ -24,8 +25,8 @@ import qualified System.OsPath.Windows as AFP_W import qualified System.OsPath.Posix as AFP_P import System.OsPath.Types #endif -import System.OsString.Internal.Types -import System.OsPath.Encoding.Internal +import "os-string" System.OsString.Internal.Types +import "os-string" System.OsString.Encoding.Internal import GHC.IO.Encoding.UTF16 ( mkUTF16le ) import GHC.IO.Encoding.UTF8 ( mkUTF8 ) import GHC.IO.Encoding.Failure diff --git a/tests/abstract-filepath/Arbitrary.hs b/tests/abstract-filepath/Arbitrary.hs index 7918eb16..bc06bcd9 100644 --- a/tests/abstract-filepath/Arbitrary.hs +++ b/tests/abstract-filepath/Arbitrary.hs @@ -1,13 +1,14 @@ +{-# LANGUAGE PackageImports #-} {-# OPTIONS_GHC -Wno-orphans #-} module Arbitrary where import Data.Char import Data.Maybe -import System.OsString -import System.OsString.Internal.Types -import qualified System.OsString.Posix as Posix -import qualified System.OsString.Windows as Windows +import "os-string" System.OsString +import "os-string" System.OsString.Internal.Types +import qualified "os-string" System.OsString.Posix as Posix +import qualified "os-string" System.OsString.Windows as Windows import Data.ByteString ( ByteString ) import qualified Data.ByteString as ByteString import Test.QuickCheck diff --git a/tests/abstract-filepath/EncodingSpec.hs b/tests/abstract-filepath/EncodingSpec.hs index 1a0c3ac3..b9ae810c 100644 --- a/tests/abstract-filepath/EncodingSpec.hs +++ b/tests/abstract-filepath/EncodingSpec.hs @@ -2,6 +2,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE TypeApplications #-} +{-# LANGUAGE PackageImports #-} module EncodingSpec where @@ -12,8 +13,8 @@ import Arbitrary import Test.QuickCheck import Data.Either ( isRight ) -import qualified System.OsPath.Data.ByteString.Short as BS8 -import qualified System.OsPath.Data.ByteString.Short.Word16 as BS16 +import qualified "os-string" System.OsString.Data.ByteString.Short as BS8 +import qualified "os-string" System.OsString.Data.ByteString.Short.Word16 as BS16 import System.OsPath.Encoding.Internal import GHC.IO (unsafePerformIO) import GHC.IO.Encoding ( setFileSystemEncoding ) diff --git a/tests/abstract-filepath/OsPathSpec.hs b/tests/abstract-filepath/OsPathSpec.hs index bee6fb57..a1ebd4b1 100644 --- a/tests/abstract-filepath/OsPathSpec.hs +++ b/tests/abstract-filepath/OsPathSpec.hs @@ -2,20 +2,21 @@ {-# LANGUAGE TypeApplications #-} {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE PackageImports #-} module OsPathSpec where import Data.Maybe import System.OsPath as OSP -import System.OsString.Internal.Types +import "os-string" System.OsString.Internal.Types import System.OsPath.Posix as Posix import System.OsPath.Windows as Windows import System.OsPath.Encoding -import qualified System.OsString.Internal.Types as OS -import System.OsPath.Data.ByteString.Short ( toShort ) -import System.OsString.Posix as PosixS -import System.OsString.Windows as WindowsS +import qualified "os-string" System.OsString.Internal.Types as OS +import "os-string" System.OsString.Data.ByteString.Short ( toShort ) +import "os-string" System.OsString.Posix as PosixS hiding (map) +import "os-string" System.OsString.Windows as WindowsS hiding (map) import Control.Exception import Data.ByteString ( ByteString ) @@ -29,8 +30,8 @@ import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) import Control.DeepSeq import Data.Bifunctor ( first ) import qualified Data.ByteString.Char8 as C -import qualified System.OsPath.Data.ByteString.Short.Word16 as BS16 -import qualified System.OsPath.Data.ByteString.Short as SBS +import qualified "os-string" System.OsString.Data.ByteString.Short.Word16 as BS16 +import qualified "os-string" System.OsString.Data.ByteString.Short as SBS import Data.Char ( ord ) import Data.Proxy ( Proxy(..) ) diff --git a/tests/bytestring-tests/Main.hs b/tests/bytestring-tests/Main.hs deleted file mode 100644 index a37e79a9..00000000 --- a/tests/bytestring-tests/Main.hs +++ /dev/null @@ -1,10 +0,0 @@ -{-# LANGUAGE TypeApplications #-} - -module Main (main) where - -import qualified Properties.ShortByteString as PropSBS -import qualified Properties.ShortByteString.Word16 as PropSBSW16 -import TestUtil - -main :: IO () -main = runTests (PropSBS.tests ++ PropSBSW16.tests) diff --git a/tests/bytestring-tests/Properties/Common.hs b/tests/bytestring-tests/Properties/Common.hs deleted file mode 100644 index c5ef566a..00000000 --- a/tests/bytestring-tests/Properties/Common.hs +++ /dev/null @@ -1,441 +0,0 @@ --- | --- Module : Properties.ShortByteString --- Copyright : (c) Andrew Lelechenko 2021 --- License : BSD-style - -{-# LANGUAGE CPP #-} -{-# LANGUAGE ViewPatterns #-} -{-# OPTIONS_GHC -Wno-orphans #-} - --- We are happy to sacrifice optimizations in exchange for faster compilation, --- but need to test rewrite rules. As one can check using -ddump-rule-firings, --- rewrite rules do not fire in -O0 mode, so we use -O1, but disable almost all --- optimizations. It roughly halves compilation time. -{-# OPTIONS_GHC -O1 -fenable-rewrite-rules - -fmax-simplifier-iterations=1 -fsimplifier-phases=0 - -fno-call-arity -fno-case-merge -fno-cmm-elim-common-blocks -fno-cmm-sink - -fno-cpr-anal -fno-cse -fno-do-eta-reduction -fno-float-in -fno-full-laziness - -fno-loopification -fno-specialise -fno-strictness #-} - -#ifdef WORD16 -module Properties.ShortByteString.Word16 (tests) where -import System.OsPath.Data.ByteString.Short.Internal (_nul, isSpace) -import qualified System.OsPath.Data.ByteString.Short.Word16 as B -import qualified System.OsPath.Data.ByteString.Short as BS -#else -module Properties.ShortByteString (tests) where -import qualified System.OsPath.Data.ByteString.Short as B -import qualified Data.Char as C -#endif -import Data.ByteString.Short (ShortByteString) - -import Data.Word - -import Control.Arrow -import Data.Foldable -import Data.List as L -import Data.Semigroup -import Data.Tuple -import Test.QuickCheck -import Test.QuickCheck.Monadic ( monadicIO, run ) -import Text.Show.Functions () - -#ifdef WORD16 -numWord :: ShortByteString -> Int -numWord = B.numWord16 - -toElem :: Word16 -> Word16 -toElem = id - -swapW :: Word16 -> Word16 -swapW = byteSwap16 - -sizedByteString :: Int -> Gen ShortByteString -sizedByteString n = do m <- choose(0, n) - fmap B.pack $ vectorOf m arbitrary - -instance Arbitrary ShortByteString where - arbitrary = do - bs <- sized sizedByteString - n <- choose (0, 2) - return (B.drop n bs) -- to give us some with non-0 offset - -instance CoArbitrary ShortByteString where - coarbitrary s = coarbitrary (B.unpack s) - -#else -_nul :: Word8 -_nul = 0x00 - -isSpace :: Word8 -> Bool -isSpace = C.isSpace . word8ToChar - --- | Total conversion to char. -word8ToChar :: Word8 -> Char -word8ToChar = C.chr . fromIntegral - -numWord :: ShortByteString -> Int -numWord = B.length - -toElem :: Word8 -> Word8 -toElem = id - -swapW :: Word8 -> Word8 -swapW = id - - -sizedByteString :: Int -> Gen ShortByteString -sizedByteString n = do m <- choose(0, n) - fmap B.pack $ vectorOf m arbitrary - -instance Arbitrary ShortByteString where - arbitrary = do - bs <- sized sizedByteString - n <- choose (0, 2) - return (B.drop n bs) -- to give us some with non-0 offset - shrink = map B.pack . shrink . B.unpack - -instance CoArbitrary ShortByteString where - coarbitrary s = coarbitrary (B.unpack s) - -#endif - - -tests :: [(String, Property)] -tests = - [ ("pack . unpack", - property $ \x -> x === B.pack (B.unpack x)) - , ("unpack . pack" , - property $ \(map toElem -> xs) -> xs === B.unpack (B.pack xs)) - , ("read . show" , - property $ \x -> (x :: ShortByteString) === read (show x)) - - , ("==" , - property $ \x y -> (x == y) === (B.unpack x == B.unpack y)) - , ("== refl" , - property $ \x -> (x :: ShortByteString) == x) - , ("== symm", - property $ \x y -> ((x :: ShortByteString) == y) === (y == x)) - , ("== pack unpack", - property $ \x -> x == B.pack (B.unpack x)) - - , ("compare", - property $ \x y -> compare x y === compare (swapW <$> B.unpack x) (swapW <$> B.unpack y)) - , ("compare EQ", - property $ \x -> compare (x :: ShortByteString) x == EQ) - , ("compare GT", - property $ \x (toElem -> c) -> compare (B.snoc x c) x == GT) - , ("compare LT", - property $ \x (toElem -> c) -> compare x (B.snoc x c) == LT) - , ("compare GT empty", - property $ \x -> not (B.null x) ==> compare x B.empty == GT) - , ("compare LT empty", - property $ \x -> not (B.null x) ==> compare B.empty x == LT) - , ("compare GT concat", - property $ \x y -> not (B.null y) ==> compare (x <> y) x == GT) - , ("compare char" , - property $ \(toElem -> c) (toElem -> d) -> compare (swapW c) (swapW d) == compare (B.singleton c) (B.singleton d)) - , ("compare unsigned", - once $ compare (B.singleton 255) (B.singleton 127) == GT) - - , ("null" , - property $ \x -> B.null x === null (B.unpack x)) - , ("empty 0" , - once $ numWord B.empty === 0) - , ("empty []", - once $ B.unpack B.empty === []) - , ("mempty 0", - once $ numWord mempty === 0) - , ("mempty []", - once $ B.unpack mempty === []) - -#ifdef WORD16 - , ("isInfixOf works correctly under UTF16", - once $ - let foo = BS.pack [0xbb, 0x03] - foo' = BS.pack [0xd2, 0xbb] - bar = BS.pack [0xd2, 0xbb, 0x03, 0xad] - bar' = BS.pack [0xd2, 0xbb, 0x03, 0xad, 0xd2, 0xbb, 0x03, 0xad, 0xbb, 0x03, 0x00, 0x00] - in [B.isInfixOf foo bar, B.isInfixOf foo' bar, B.isInfixOf foo bar'] === [False, True, True] - ) -#endif - , ("break breakSubstring", - property $ \(toElem -> c) x -> B.break (== c) x === B.breakSubstring (B.singleton c) x - ) - , ("breakSubstring", - property $ \x y -> not (B.null x) ==> B.null (snd (B.breakSubstring x y)) === not (B.isInfixOf x y) - ) - , ("breakSubstring empty", - property $ \x -> B.breakSubstring B.empty x === (B.empty, x) - ) - , ("isInfixOf", - property $ \x y -> B.isInfixOf x y === L.isInfixOf (B.unpack x) (B.unpack y)) - - , ("mconcat" , - property $ \xs -> B.unpack (mconcat xs) === mconcat (map B.unpack xs)) - , ("mconcat [x,x]" , - property $ \x -> B.unpack (mconcat [x, x]) === mconcat [B.unpack x, B.unpack x]) - , ("mconcat [x,[]]" , - property $ \x -> B.unpack (mconcat [x, B.empty]) === mconcat [B.unpack x, []]) - - , ("null" , - property $ \x -> B.null x === null (B.unpack x)) - , ("reverse" , - property $ \x -> B.unpack (B.reverse x) === reverse (B.unpack x)) - , ("all" , - property $ \f x -> B.all f x === all f (B.unpack x)) - , ("all ==" , - property $ \(toElem -> c) x -> B.all (== c) x === all (== c) (B.unpack x)) - , ("any" , - property $ \f x -> B.any f x === any f (B.unpack x)) - , ("any ==" , - property $ \(toElem -> c) x -> B.any (== c) x === any (== c) (B.unpack x)) - , ("mappend" , - property $ \x y -> B.unpack (mappend x y) === B.unpack x `mappend` B.unpack y) - , ("<>" , - property $ \x y -> B.unpack (x <> y) === B.unpack x <> B.unpack y) - , ("stimes" , - property $ \(Positive n) x -> stimes (n :: Int) (x :: ShortByteString) === mtimesDefault n x) - - , ("break" , - property $ \f x -> (B.unpack *** B.unpack) (B.break f x) === break f (B.unpack x)) - , ("break ==" , - property $ \(toElem -> c) x -> (B.unpack *** B.unpack) (B.break (== c) x) === break (== c) (B.unpack x)) - , ("break /=" , - property $ \(toElem -> c) x -> (B.unpack *** B.unpack) (B.break (/= c) x) === break (/= c) (B.unpack x)) - , ("break span" , - property $ \f x -> B.break f x === B.span (not . f) x) - , ("breakEnd" , - property $ \f x -> B.breakEnd f x === swap ((B.reverse *** B.reverse) (B.break f (B.reverse x)))) - , ("breakEnd" , - property $ \f x -> B.breakEnd f x === B.spanEnd (not . f) x) - , ("break isSpace" , - property $ \x -> (B.unpack *** B.unpack) (B.break isSpace x) === break isSpace (B.unpack x)) - - , ("singleton" , - property $ \(toElem -> c) -> B.unpack (B.singleton c) === [c]) - , ("cons" , - property $ \(toElem -> c) x -> B.unpack (B.cons c x) === c : B.unpack x) - , ("cons []" , - property $ \(toElem -> c) -> B.unpack (B.cons c B.empty) === [c]) - , ("uncons" , - property $ \x -> fmap (second B.unpack) (B.uncons x) === L.uncons (B.unpack x)) - , ("snoc" , - property $ \(toElem -> c) x -> B.unpack (B.snoc x c) === B.unpack x ++ [c]) - , ("snoc []" , - property $ \(toElem -> c) -> B.unpack (B.snoc B.empty c) === [c]) - , ("unsnoc" , - property $ \x -> fmap (first B.unpack) (B.unsnoc x) === unsnoc (B.unpack x)) - - , ("drop" , - property $ \n x -> B.unpack (B.drop n x) === drop (fromIntegral n) (B.unpack x)) - , ("drop 10" , - property $ \x -> B.unpack (B.drop 10 x) === drop 10 (B.unpack x)) - , ("dropWhile" , - property $ \f x -> B.unpack (B.dropWhile f x) === dropWhile f (B.unpack x)) - , ("dropWhile ==" , - property $ \(toElem -> c) x -> B.unpack (B.dropWhile (== c) x) === dropWhile (== c) (B.unpack x)) - , ("dropWhile /=" , - property $ \(toElem -> c) x -> B.unpack (B.dropWhile (/= c) x) === dropWhile (/= c) (B.unpack x)) - , ("dropWhile isSpace" , - property $ \x -> B.unpack (B.dropWhile isSpace x) === dropWhile isSpace (B.unpack x)) - - , ("take" , - property $ \n x -> B.unpack (B.take n x) === take (fromIntegral n) (B.unpack x)) - , ("take 10" , - property $ \x -> B.unpack (B.take 10 x) === take 10 (B.unpack x)) - , ("takeWhile" , - property $ \f x -> B.unpack (B.takeWhile f x) === takeWhile f (B.unpack x)) - , ("takeWhile ==" , - property $ \(toElem -> c) x -> B.unpack (B.takeWhile (== c) x) === takeWhile (== c) (B.unpack x)) - , ("takeWhile /=" , - property $ \(toElem -> c) x -> B.unpack (B.takeWhile (/= c) x) === takeWhile (/= c) (B.unpack x)) - - , ("takeWhile isSpace" , - property $ \x -> B.unpack (B.takeWhile isSpace x) === takeWhile isSpace (B.unpack x)) - - , ("dropEnd" , - property $ \n x -> B.dropEnd n x === B.take (numWord x - n) x) - , ("dropWhileEnd" , - property $ \f x -> B.dropWhileEnd f x === B.reverse (B.dropWhile f (B.reverse x))) - , ("takeEnd" , - property $ \n x -> B.takeEnd n x === B.drop (numWord x - n) x) - , ("takeWhileEnd" , - property $ \f x -> B.takeWhileEnd f x === B.reverse (B.takeWhile f (B.reverse x))) - - , ("length" , - property $ \x -> numWord x === fromIntegral (length (B.unpack x))) - , ("count" , - property $ \(toElem -> c) x -> B.count c x === fromIntegral (length (elemIndices c (B.unpack x)))) - , ("filter" , - property $ \f x -> B.unpack (B.filter f x) === filter f (B.unpack x)) - , ("filter compose" , - property $ \f g x -> B.filter f (B.filter g x) === B.filter (\c -> f c && g c) x) - , ("filter ==" , - property $ \(toElem -> c) x -> B.unpack (B.filter (== c) x) === filter (== c) (B.unpack x)) - , ("filter /=" , - property $ \(toElem -> c) x -> B.unpack (B.filter (/= c) x) === filter (/= c) (B.unpack x)) - , ("partition" , - property $ \f x -> (B.unpack *** B.unpack) (B.partition f x) === partition f (B.unpack x)) - - , ("find" , - property $ \f x -> B.find f x === find f (B.unpack x)) - , ("findIndex" , - property $ \f x -> B.findIndex f x === fmap fromIntegral (findIndex f (B.unpack x))) - , ("findIndices" , - property $ \f x -> B.findIndices f x === fmap fromIntegral (findIndices f (B.unpack x))) - , ("findIndices ==" , - property $ \(toElem -> c) x -> B.findIndices (== c) x === fmap fromIntegral (findIndices (== c) (B.unpack x))) - - , ("elem" , - property $ \(toElem -> c) x -> B.elem c x === elem c (B.unpack x)) - , ("not elem" , - property $ \(toElem -> c) x -> not (B.elem c x) === notElem c (B.unpack x)) - , ("elemIndex" , - property $ \(toElem -> c) x -> B.elemIndex c x === fmap fromIntegral (elemIndex c (B.unpack x))) - , ("elemIndices" , - property $ \(toElem -> c) x -> B.elemIndices c x === fmap fromIntegral (elemIndices c (B.unpack x))) - - - , ("map" , - property $ \f x -> B.unpack (B.map (toElem . f) x) === map (toElem . f) (B.unpack x)) - , ("map compose" , - property $ \f g x -> B.map (toElem . f) (B.map (toElem . g) x) === B.map (toElem . f . toElem . g) x) - , ("replicate" , - property $ \n (toElem -> c) -> B.unpack (B.replicate (fromIntegral n) c) === replicate n c) - , ("replicate 0" , - property $ \(toElem -> c) -> B.unpack (B.replicate 0 c) === replicate 0 c) - - , ("span" , - property $ \f x -> (B.unpack *** B.unpack) (B.span f x) === span f (B.unpack x)) - , ("span ==" , - property $ \(toElem -> c) x -> (B.unpack *** B.unpack) (B.span (== c) x) === span (== c) (B.unpack x)) - , ("span /=" , - property $ \(toElem -> c) x -> (B.unpack *** B.unpack) (B.span (/= c) x) === span (/= c) (B.unpack x)) - , ("spanEnd" , - property $ \f x -> B.spanEnd f x === swap ((B.reverse *** B.reverse) (B.span f (B.reverse x)))) - , ("split" , - property $ \(toElem -> c) x -> map B.unpack (B.split c x) === split c (B.unpack x)) - , ("split empty" , - property $ \(toElem -> c) -> B.split c B.empty === []) - , ("splitWith" , - property $ \f x -> map B.unpack (B.splitWith f x) === splitWith f (B.unpack x)) - , ("splitWith split" , - property $ \(toElem -> c) x -> B.splitWith (== c) x === B.split c x) - , ("splitWith empty" , - property $ \f -> B.splitWith f B.empty === []) - , ("splitWith length" , - property $ \f x -> let splits = B.splitWith f x; l1 = fromIntegral (length splits); l2 = numWord (B.filter f x) in - (l1 == l2 || l1 == l2 + 1) && sum (map numWord splits) + l2 == numWord x) - , ("splitAt" , - property $ \n x -> (B.unpack *** B.unpack) (B.splitAt n x) === splitAt (fromIntegral n) (B.unpack x)) - - , ("head" , - property $ \x -> not (B.null x) ==> B.head x == head (B.unpack x)) - , ("last" , - property $ \x -> not (B.null x) ==> B.last x == last (B.unpack x)) - , ("tail" , - property $ \x -> not (B.null x) ==> B.unpack (B.tail x) == tail (B.unpack x)) - , ("tail length" , - property $ \x -> not (B.null x) ==> numWord x == 1 + numWord (B.tail x)) - , ("init" , - property $ \x -> not (B.null x) ==> B.unpack (B.init x) == init (B.unpack x)) - , ("init length" , - property $ \x -> not (B.null x) ==> numWord x == 1 + numWord (B.init x)) - - , ("foldl" , - property $ \f (toElem -> c) x -> B.foldl ((toElem .) . f) c x === foldl ((toElem .) . f) c (B.unpack x)) - , ("foldl'" , - property $ \f (toElem -> c) x -> B.foldl' ((toElem .) . f) c x === foldl' ((toElem .) . f) c (B.unpack x)) - , ("foldr" , - property $ \f (toElem -> c) x -> B.foldr ((toElem .) . f) c x === foldr ((toElem .) . f) c (B.unpack x)) - , ("foldr'" , - property $ \f (toElem -> c) x -> B.foldr' ((toElem .) . f) c x === foldr' ((toElem .) . f) c (B.unpack x)) - - , ("foldl cons" , - property $ \x -> B.foldl (flip B.cons) B.empty x === B.reverse x) - , ("foldr cons" , - property $ \x -> B.foldr B.cons B.empty x === x) - , ("foldl special" , - property $ \x (toElem -> c) -> B.unpack (B.foldl (\acc t -> if t == c then acc else B.cons t acc) B.empty x) === - foldl (\acc t -> if t == c then acc else t : acc) [] (B.unpack x)) - , ("foldr special" , - property $ \x (toElem -> c) -> B.unpack (B.foldr (\t acc -> if t == c then acc else B.cons t acc) B.empty x) === - foldr (\t acc -> if t == c then acc else t : acc) [] (B.unpack x)) - - , ("foldl1" , - property $ \f x -> not (B.null x) ==> B.foldl1 ((toElem .) . f) x == foldl1 ((toElem .) . f) (B.unpack x)) - , ("foldl1'" , - property $ \f x -> not (B.null x) ==> B.foldl1' ((toElem .) . f) x == foldl1' ((toElem .) . f) (B.unpack x)) - , ("foldr1" , - property $ \f x -> not (B.null x) ==> B.foldr1 ((toElem .) . f) x == foldr1 ((toElem .) . f) (B.unpack x)) - , ("foldr1'", -- there is not Data.List.foldr1' - property $ \f x -> not (B.null x) ==> B.foldr1' ((toElem .) . f) x == foldr1 ((toElem .) . f) (B.unpack x)) - - , ("foldl1 const" , - property $ \x -> not (B.null x) ==> B.foldl1 const x == B.head x) - , ("foldl1 flip const" , - property $ \x -> not (B.null x) ==> B.foldl1 (flip const) x == B.last x) - , ("foldr1 const" , - property $ \x -> not (B.null x) ==> B.foldr1 const x == B.head x) - , ("foldr1 flip const" , - property $ \x -> not (B.null x) ==> B.foldr1 (flip const) x == B.last x) - , ("foldl1 max" , - property $ \x -> not (B.null x) ==> B.foldl1 max x == B.foldl max minBound x) - , ("foldr1 max" , - property $ \x -> not (B.null x) ==> B.foldr1 max x == B.foldr max minBound x) - - , ("index" , - property $ \(NonNegative n) x -> fromIntegral n < numWord x ==> B.index x (fromIntegral n) == B.unpack x !! n) - , ("indexMaybe" , - property $ \(NonNegative n) x -> fromIntegral n < numWord x ==> B.indexMaybe x (fromIntegral n) == Just (B.unpack x !! n)) - , ("indexMaybe Nothing" , - property $ \n x -> (n :: Int) < 0 || fromIntegral n >= numWord x ==> B.indexMaybe x (fromIntegral n) == Nothing) - , ("!?" , - property $ \n x -> B.indexMaybe x (fromIntegral (n :: Int)) === x B.!? (fromIntegral n)) - - , ("unfoldrN" , - property $ \n f (toElem -> c) -> B.unpack (fst (B.unfoldrN n (fmap (first toElem) . f) c)) === - take (fromIntegral n) (unfoldr (fmap (first toElem) . f) c)) - , ("unfoldrN replicate" , - property $ \n (toElem -> c) -> fst (B.unfoldrN n (\t -> Just (t, t)) c) === B.replicate n c) - , ("unfoldr" , - property $ \n a (toElem -> c) -> B.unpack (B.unfoldr (\x -> if x <= 100 * n then Just (c, x + 1 :: Int) else Nothing) a) === - unfoldr (\x -> if x <= 100 * n then Just (c, x + 1) else Nothing) a) - - --, ("unfoldr" , - -- property $ \n f (toElem -> a) -> B.unpack (B.take (fromIntegral n) (B.unfoldr (fmap (first toElem) . f) a)) === - -- take n (unfoldr (fmap (first toElem) . f) a)) - -- -#ifdef WORD16 - , ("useAsCWString str packCWString == str" , - property $ \x -> not (B.any (== _nul) x) - ==> monadicIO $ run (B.useAsCWString x B.packCWString >>= \x' -> pure (x == x'))) - , ("useAsCWStringLen str packCWStringLen == str" , - property $ \x -> not (B.any (== _nul) x) - ==> monadicIO $ run (B.useAsCWStringLen x B.packCWStringLen >>= \x' -> pure (x == x'))) -#else - , ("useAsCString str packCString == str" , - property $ \x -> not (B.any (== _nul) x) - ==> monadicIO $ run (B.useAsCString x B.packCString >>= \x' -> pure (x == x'))) - , ("useAsCStringLen str packCStringLen == str" , - property $ \x -> not (B.any (== _nul) x) - ==> monadicIO $ run (B.useAsCStringLen x B.packCStringLen >>= \x' -> pure (x == x'))) -#endif - ] - -split :: Eq a => a -> [a] -> [[a]] -split c = splitWith (== c) - -splitWith :: (a -> Bool) -> [a] -> [[a]] -splitWith _ [] = [] -splitWith f ys = go [] ys - where - go acc [] = [reverse acc] - go acc (x : xs) - | f x = reverse acc : go [] xs - | otherwise = go (x : acc) xs - -unsnoc :: [a] -> Maybe ([a], a) -unsnoc [] = Nothing -unsnoc xs = Just (init xs, last xs) diff --git a/tests/bytestring-tests/Properties/ShortByteString.hs b/tests/bytestring-tests/Properties/ShortByteString.hs deleted file mode 100644 index 3040dfb8..00000000 --- a/tests/bytestring-tests/Properties/ShortByteString.hs +++ /dev/null @@ -1,3 +0,0 @@ -{-# LANGUAGE CPP #-} -#undef WORD16 -#include "Common.hs" diff --git a/tests/bytestring-tests/Properties/ShortByteString/Word16.hs b/tests/bytestring-tests/Properties/ShortByteString/Word16.hs deleted file mode 100644 index aa426397..00000000 --- a/tests/bytestring-tests/Properties/ShortByteString/Word16.hs +++ /dev/null @@ -1,3 +0,0 @@ -{-# LANGUAGE CPP #-} -#define WORD16 -#include "../Common.hs" diff --git a/tests/filepath-tests/TestGen.hs b/tests/filepath-tests/TestGen.hs index 2075e7f0..eef92e51 100755 --- a/tests/filepath-tests/TestGen.hs +++ b/tests/filepath-tests/TestGen.hs @@ -2,6 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE CPP #-} +{-# LANGUAGE PackageImports #-} {-# OPTIONS_GHC -Wno-name-shadowing #-} {-# OPTIONS_GHC -Wno-orphans #-} module TestGen(tests) where @@ -14,11 +15,11 @@ import Data.String import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) import GHC.IO.Encoding.UTF16 ( mkUTF16le ) import GHC.IO.Encoding.UTF8 ( mkUTF8 ) -import System.OsString.Internal.Types -import System.OsPath.Encoding.Internal +import "os-string" System.OsString.Internal.Types +import "os-string" System.OsString.Encoding.Internal import qualified Data.Char as C -import qualified System.OsPath.Data.ByteString.Short as SBS -import qualified System.OsPath.Data.ByteString.Short.Word16 as SBS16 +import qualified "os-string" System.OsString.Data.ByteString.Short as SBS +import qualified "os-string" System.OsString.Data.ByteString.Short.Word16 as SBS16 import qualified System.FilePath.Windows as W import qualified System.FilePath.Posix as P import qualified System.OsPath.Windows as AFP_W