diff --git a/src/Data/Text/IO.hs b/src/Data/Text/IO.hs index a4569bc3..3d5a4446 100644 --- a/src/Data/Text/IO.hs +++ b/src/Data/Text/IO.hs @@ -45,24 +45,18 @@ module Data.Text.IO import Data.Text (Text) import Prelude hiding (appendFile, getContents, getLine, interact, putStr, putStrLn, readFile, writeFile) -import System.IO (Handle, IOMode(..), hPutChar, openFile, stdin, stdout, +import System.IO (Handle, IOMode(..), openFile, stdin, stdout, withFile) import qualified Control.Exception as E import Control.Monad (liftM2, when) -import Data.IORef (readIORef, writeIORef) +import Data.IORef (readIORef) import qualified Data.Text as T -import Data.Text.Internal.Fusion (stream) -import Data.Text.Internal.Fusion.Types (Step(..), Stream(..)) -import Data.Text.Internal.IO (hGetLineWith, readChunk) -import GHC.IO.Buffer (Buffer(..), BufferState(..), RawCharBuffer, CharBuffer, - emptyBuffer, isEmptyBuffer, newCharBuffer) -import qualified GHC.IO.Buffer +import Data.Text.Internal.Fusion (stream, streamLn) +import Data.Text.Internal.IO (hGetLineWith, readChunk, hPutStream) +import GHC.IO.Buffer (CharBuffer, isEmptyBuffer) import GHC.IO.Exception (IOException(ioe_type), IOErrorType(InappropriateType)) -import GHC.IO.Handle.Internals (augmentIOError, hClose_help, wantReadableHandle, - wantWritableHandle) -import GHC.IO.Handle.Text (commitBuffer') -import GHC.IO.Handle.Types (BufferList(..), BufferMode(..), Handle__(..), - HandleType(..), Newline(..)) +import GHC.IO.Handle.Internals (augmentIOError, hClose_help, wantReadableHandle) +import GHC.IO.Handle.Types (BufferMode(..), Handle__(..), HandleType(..)) import System.IO (hGetBuffering, hFileSize, hSetBuffering, hTell) import System.IO.Error (isEOFError) @@ -174,109 +168,11 @@ hGetLine = hGetLineWith T.concat -- | Write a string to a handle. hPutStr :: Handle -> Text -> IO () --- This function is lifted almost verbatim from GHC.IO.Handle.Text. -hPutStr h t = do - (buffer_mode, nl) <- - wantWritableHandle "hPutStr" h $ \h_ -> do - bmode <- getSpareBuffer h_ - return (bmode, haOutputNL h_) - let str = stream t - case buffer_mode of - (NoBuffering, _) -> hPutChars h str - (LineBuffering, buf) -> writeLines h nl buf str - (BlockBuffering _, buf) -> writeBlocks (nl == CRLF) h buf str - -hPutChars :: Handle -> Stream Char -> IO () -hPutChars h (Stream next0 s0 _len) = loop s0 - where - loop !s = case next0 s of - Done -> return () - Skip s' -> loop s' - Yield x s' -> hPutChar h x >> loop s' - --- The following functions are largely lifted from GHC.IO.Handle.Text, --- but adapted to a coinductive stream of data instead of an inductive --- list. --- --- We have several variations of more or less the same code for --- performance reasons. Splitting the original buffered write --- function into line- and block-oriented versions gave us a 2.1x --- performance improvement. Lifting out the raw/cooked newline --- handling gave a few more percent on top. - -writeLines :: Handle -> Newline -> CharBuffer -> Stream Char -> IO () -writeLines h nl buf0 (Stream next0 s0 _len) = outer s0 buf0 - where - outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int) - where - inner !s !n = - case next0 s of - Done -> commit n False{-no flush-} True{-release-} >> return () - Skip s' -> inner s' n - Yield x s' - | n + 1 >= len -> commit n True{-needs flush-} False >>= outer s - | x == '\n' -> do - n' <- if nl == CRLF - then do n1 <- writeCharBuf raw len n '\r' - writeCharBuf raw len n1 '\n' - else writeCharBuf raw len n x - commit n' True{-needs flush-} False >>= outer s' - | otherwise -> writeCharBuf raw len n x >>= inner s' - commit = commitBuffer h raw len - -writeBlocks :: Bool -> Handle -> CharBuffer -> Stream Char -> IO () -writeBlocks isCRLF h buf0 (Stream next0 s0 _len) = outer s0 buf0 - where - outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int) - where - inner !s !n = - case next0 s of - Done -> commit n False{-no flush-} True{-release-} >> return () - Skip s' -> inner s' n - Yield x s' - | isCRLF && x == '\n' && n + 1 < len -> do - n1 <- writeCharBuf raw len n '\r' - writeCharBuf raw len n1 '\n' >>= inner s' - | n < len -> writeCharBuf raw len n x >>= inner s' - | otherwise -> commit n True{-needs flush-} False >>= outer s - commit = commitBuffer h raw len - --- | Only modifies the raw buffer and not the buffer attributes -writeCharBuf :: RawCharBuffer -> Int -> Int -> Char -> IO Int -writeCharBuf bufRaw bufSize n c = E.assert (n >= 0 && n < bufSize) $ - GHC.IO.Buffer.writeCharBuf bufRaw n c - --- This function is completely lifted from GHC.IO.Handle.Text. -getSpareBuffer :: Handle__ -> IO (BufferMode, CharBuffer) -getSpareBuffer Handle__{haCharBuffer=ref, - haBuffers=spare_ref, - haBufferMode=mode} - = do - case mode of - NoBuffering -> return (mode, error "no buffer!") - _ -> do - bufs <- readIORef spare_ref - buf <- readIORef ref - case bufs of - BufferListCons b rest -> do - writeIORef spare_ref rest - return ( mode, emptyBuffer b (bufSize buf) WriteBuffer) - BufferListNil -> do - new_buf <- newCharBuffer (bufSize buf) WriteBuffer - return (mode, new_buf) - - --- This function is modified from GHC.Internal.IO.Handle.Text. -commitBuffer :: Handle -> RawCharBuffer -> Int -> Int -> Bool -> Bool - -> IO CharBuffer -commitBuffer hdl !raw !sz !count flush release = - wantWritableHandle "commitAndReleaseBuffer" hdl $ - commitBuffer' raw sz count flush release -{-# INLINE commitBuffer #-} +hPutStr h = hPutStream h . stream -- | Write a string to a handle, followed by a newline. hPutStrLn :: Handle -> Text -> IO () -hPutStrLn h t = hPutStr h t >> hPutChar h '\n' +hPutStrLn h = hPutStream h . streamLn -- | The 'interact' function takes a function of type @Text -> Text@ -- as its argument. The entire input from the standard input device is diff --git a/src/Data/Text/Internal/Fusion.hs b/src/Data/Text/Internal/Fusion.hs index d003b60c..24f40921 100644 --- a/src/Data/Text/Internal/Fusion.hs +++ b/src/Data/Text/Internal/Fusion.hs @@ -25,6 +25,7 @@ module Data.Text.Internal.Fusion -- * Creation and elimination , stream + , streamLn , unstream , reverseStream @@ -49,7 +50,7 @@ module Data.Text.Internal.Fusion , countChar ) where -import Prelude (Bool(..), Char, Maybe(..), Monad(..), Int, +import Prelude (Bool(..), Char, Eq(..), Maybe(..), Monad(..), Int, Num(..), Ord(..), ($), otherwise) import Data.Bits (shiftL, shiftR) @@ -98,6 +99,33 @@ stream (Text arr off len) = Stream next off (betweenSize (len `shiftR` 2) len) _ -> U8.chr4 n0 n1 n2 n3 {-# INLINE [0] stream #-} +-- | /O(n)/ @'streamLn' t = 'stream' (t <> \'\\n\')@ +streamLn :: +#if defined(ASSERTS) + HasCallStack => +#endif + Text -> Stream Char +streamLn (Text arr off len) = Stream next off (betweenSize (len `shiftR` 2) (len + 1)) + where + !end = off+len + next !i + | i > end = Done + | i == end = Yield '\n' (i + 1) + | otherwise = Yield chr (i + l) + where + n0 = A.unsafeIndex arr i + n1 = A.unsafeIndex arr (i + 1) + n2 = A.unsafeIndex arr (i + 2) + n3 = A.unsafeIndex arr (i + 3) + + l = U8.utf8LengthByLeader n0 + chr = case l of + 1 -> unsafeChr8 n0 + 2 -> U8.chr2 n0 n1 + 3 -> U8.chr3 n0 n1 n2 + _ -> U8.chr4 n0 n1 n2 n3 +{-# INLINE [0] streamLn #-} + -- | /O(n)/ Converts 'Text' into a 'Stream' 'Char', but iterates -- backwards through the text. -- diff --git a/src/Data/Text/Internal/IO.hs b/src/Data/Text/Internal/IO.hs index 8a26f87b..5c9fea8b 100644 --- a/src/Data/Text/Internal/IO.hs +++ b/src/Data/Text/Internal/IO.hs @@ -18,6 +18,7 @@ module Data.Text.Internal.IO ( hGetLineWith , readChunk + , hPutStream ) where import qualified Control.Exception as E @@ -28,12 +29,15 @@ import Data.Text.Internal.Fusion.Types (Step(..), Stream(..)) import Data.Text.Internal.Fusion.Size (exactSize, maxSize) import Data.Text.Unsafe (inlinePerformIO) import Foreign.Storable (peekElemOff) -import GHC.IO.Buffer (Buffer(..), CharBuffer, RawCharBuffer, bufferAdjustL, - bufferElems, charSize, isEmptyBuffer, readCharBuf, - withRawBuffer, writeCharBuf) -import GHC.IO.Handle.Internals (ioe_EOF, readTextDevice, wantReadableHandle_) -import GHC.IO.Handle.Types (Handle__(..), Newline(..)) -import System.IO (Handle) +import GHC.IO.Buffer (Buffer(..), BufferState(..), CharBuffer, RawCharBuffer, + bufferAdjustL, bufferElems, charSize, emptyBuffer, + isEmptyBuffer, newCharBuffer, readCharBuf, withRawBuffer, + writeCharBuf) +import GHC.IO.Handle.Internals (ioe_EOF, readTextDevice, wantReadableHandle_, + wantWritableHandle) +import GHC.IO.Handle.Text (commitBuffer') +import GHC.IO.Handle.Types (BufferList(..), BufferMode(..), Handle__(..), Newline(..)) +import System.IO (Handle, hPutChar) import System.IO.Error (isEOFError) import qualified Data.Text as T @@ -162,5 +166,106 @@ readChunk hh@Handle__{..} buf = do writeIORef haCharBuffer (bufferAdjustL r buf') return t +-- | Print a @Stream Char@. +hPutStream :: Handle -> Stream Char -> IO () +-- This function is lifted almost verbatim from GHC.IO.Handle.Text. +hPutStream h str = do + (buffer_mode, nl) <- + wantWritableHandle "hPutStr" h $ \h_ -> do + bmode <- getSpareBuffer h_ + return (bmode, haOutputNL h_) + case buffer_mode of + (NoBuffering, _) -> hPutChars h str + (LineBuffering, buf) -> writeLines h nl buf str + (BlockBuffering _, buf) -> writeBlocks (nl == CRLF) h buf str + +hPutChars :: Handle -> Stream Char -> IO () +hPutChars h (Stream next0 s0 _len) = loop s0 + where + loop !s = case next0 s of + Done -> return () + Skip s' -> loop s' + Yield x s' -> hPutChar h x >> loop s' + +-- The following functions are largely lifted from GHC.IO.Handle.Text, +-- but adapted to a coinductive stream of data instead of an inductive +-- list. +-- +-- We have several variations of more or less the same code for +-- performance reasons. Splitting the original buffered write +-- function into line- and block-oriented versions gave us a 2.1x +-- performance improvement. Lifting out the raw/cooked newline +-- handling gave a few more percent on top. + +writeLines :: Handle -> Newline -> CharBuffer -> Stream Char -> IO () +writeLines h nl buf0 (Stream next0 s0 _len) = outer s0 buf0 + where + outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int) + where + inner !s !n = + case next0 s of + Done -> commit n False{-no flush-} True{-release-} >> return () + Skip s' -> inner s' n + Yield x s' + | n + 1 >= len -> commit n True{-needs flush-} False >>= outer s + | x == '\n' -> do + n' <- if nl == CRLF + then do n1 <- writeCharBuf' raw len n '\r' + writeCharBuf' raw len n1 '\n' + else writeCharBuf' raw len n x + commit n' True{-needs flush-} False >>= outer s' + | otherwise -> writeCharBuf' raw len n x >>= inner s' + commit = commitBuffer h raw len + +writeBlocks :: Bool -> Handle -> CharBuffer -> Stream Char -> IO () +writeBlocks isCRLF h buf0 (Stream next0 s0 _len) = outer s0 buf0 + where + outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int) + where + inner !s !n = + case next0 s of + Done -> commit n False{-no flush-} True{-release-} >> return () + Skip s' -> inner s' n + Yield x s' + | isCRLF && x == '\n' && n + 1 < len -> do + n1 <- writeCharBuf' raw len n '\r' + writeCharBuf' raw len n1 '\n' >>= inner s' + | n < len -> writeCharBuf' raw len n x >>= inner s' + | otherwise -> commit n True{-needs flush-} False >>= outer s + commit = commitBuffer h raw len + +-- | Only modifies the raw buffer and not the buffer attributes +writeCharBuf' :: RawCharBuffer -> Int -> Int -> Char -> IO Int +writeCharBuf' bufRaw bufSize n c = E.assert (n >= 0 && n < bufSize) $ + writeCharBuf bufRaw n c + +-- This function is completely lifted from GHC.IO.Handle.Text. +getSpareBuffer :: Handle__ -> IO (BufferMode, CharBuffer) +getSpareBuffer Handle__{haCharBuffer=ref, + haBuffers=spare_ref, + haBufferMode=mode} + = do + case mode of + NoBuffering -> return (mode, error "no buffer!") + _ -> do + bufs <- readIORef spare_ref + buf <- readIORef ref + case bufs of + BufferListCons b rest -> do + writeIORef spare_ref rest + return ( mode, emptyBuffer b (bufSize buf) WriteBuffer) + BufferListNil -> do + new_buf <- newCharBuffer (bufSize buf) WriteBuffer + return (mode, new_buf) + + +-- This function is modified from GHC.Internal.IO.Handle.Text. +commitBuffer :: Handle -> RawCharBuffer -> Int -> Int -> Bool -> Bool + -> IO CharBuffer +commitBuffer hdl !raw !sz !count flush release = + wantWritableHandle "commitAndReleaseBuffer" hdl $ + commitBuffer' raw sz count flush release +{-# INLINE commitBuffer #-} + sizeError :: String -> a sizeError loc = error $ "Data.Text.IO." ++ loc ++ ": bad internal buffer size" diff --git a/src/Data/Text/Internal/Lazy/Fusion.hs b/src/Data/Text/Internal/Lazy/Fusion.hs index 3297c00f..00985679 100644 --- a/src/Data/Text/Internal/Lazy/Fusion.hs +++ b/src/Data/Text/Internal/Lazy/Fusion.hs @@ -17,6 +17,7 @@ module Data.Text.Internal.Lazy.Fusion ( stream + , streamLn , unstream , unstreamChunks , length @@ -56,6 +57,22 @@ stream text = Stream next (text :*: 0) unknownSize where Iter c d = iter t i {-# INLINE [0] stream #-} +-- | /O(n)/ @'streamLn' t = 'stream' (t <> \'\\n\')@ +streamLn :: +#if defined(ASSERTS) + HasCallStack => +#endif + Text -> Stream Char +streamLn text = Stream next (text :*: 0) unknownSize + where + next (Empty :*: 0) = Yield '\n' (Empty :*: 1) + next (Empty :*: _) = Done + next (txt@(Chunk t@(I.Text _ _ len) ts) :*: i) + | i >= len = next (ts :*: 0) + | otherwise = Yield c (txt :*: i+d) + where Iter c d = iter t i +{-# INLINE [0] streamLn #-} + -- | /O(n)/ Convert a 'Stream Char' into a 'Text', using the given -- chunk size. unstreamChunks :: diff --git a/src/Data/Text/Lazy/IO.hs b/src/Data/Text/Lazy/IO.hs index 2ebd7eef..0ea09351 100644 --- a/src/Data/Text/Lazy/IO.hs +++ b/src/Data/Text/Lazy/IO.hs @@ -41,15 +41,14 @@ module Data.Text.Lazy.IO import Data.Text.Lazy (Text) import Prelude hiding (appendFile, getContents, getLine, interact, putStr, putStrLn, readFile, writeFile) -import System.IO (Handle, IOMode(..), hPutChar, openFile, stdin, stdout, - withFile) -import qualified Data.Text.IO as T +import System.IO (Handle, IOMode(..), openFile, stdin, stdout, withFile) import qualified Data.Text.Lazy as L import qualified Control.Exception as E import Control.Monad (when) import Data.IORef (readIORef) -import Data.Text.Internal.IO (hGetLineWith, readChunk) +import Data.Text.Internal.IO (hGetLineWith, readChunk, hPutStream) import Data.Text.Internal.Lazy (chunk, empty) +import Data.Text.Internal.Lazy.Fusion (stream, streamLn) import GHC.IO.Buffer (isEmptyBuffer) import GHC.IO.Exception (IOException(..), IOErrorType(..), ioException) import GHC.IO.Handle.Internals (augmentIOError, hClose_help, @@ -129,11 +128,11 @@ hGetLine = hGetLineWith L.fromChunks -- | Write a string to a handle. hPutStr :: Handle -> Text -> IO () -hPutStr h = mapM_ (T.hPutStr h) . L.toChunks +hPutStr h = hPutStream h . stream -- | Write a string to a handle, followed by a newline. hPutStrLn :: Handle -> Text -> IO () -hPutStrLn h t = hPutStr h t >> hPutChar h '\n' +hPutStrLn h = hPutStream h . streamLn -- | The 'interact' function takes a function of type @Text -> Text@ -- as its argument. The entire input from the standard input device is