From 17bc1adcf340b8619c401226de72a36a8d789662 Mon Sep 17 00:00:00 2001 From: Luke Worth Date: Wed, 9 Nov 2022 04:06:22 +1100 Subject: [PATCH 1/2] Make missing-field behaviour configurable This is a breaking change as it removes the `Eq` and `Show` instances for `EncodeOptions`. --- cassava.cabal | 2 +- src/Data/Csv/Builder.hs | 2 +- src/Data/Csv/Encoding.hs | 27 ++++++++++++++++----------- src/Data/Csv/Incremental.hs | 14 +++++++------- 4 files changed, 25 insertions(+), 20 deletions(-) diff --git a/cassava.cabal b/cassava.cabal index 650783c..326f319 100644 --- a/cassava.cabal +++ b/cassava.cabal @@ -1,6 +1,6 @@ cabal-version: 1.12 Name: cassava -Version: 0.5.3.0 +Version: 0.6.0.0 Synopsis: A CSV parsing and encoding library Description: { diff --git a/src/Data/Csv/Builder.hs b/src/Data/Csv/Builder.hs index 5652f2b..8ce947b 100644 --- a/src/Data/Csv/Builder.hs +++ b/src/Data/Csv/Builder.hs @@ -60,7 +60,7 @@ encodeRecordWith opts r = encodeNamedRecordWith :: ToNamedRecord a => EncodeOptions -> Header -> a -> Builder.Builder encodeNamedRecordWith opts hdr nr = - Encoding.encodeNamedRecord hdr (encQuoting opts) (encDelimiter opts) + Encoding.encodeNamedRecord hdr (encQuoting opts) (encDelimiter opts) (encMissing opts) (toNamedRecord nr) Mon.<> Encoding.recordSep (encUseCrLf opts) -- | Like 'encodeDefaultOrderedNamedRecord', but lets you customize diff --git a/src/Data/Csv/Encoding.hs b/src/Data/Csv/Encoding.hs index 758bdb8..aae4229 100644 --- a/src/Data/Csv/Encoding.hs +++ b/src/Data/Csv/Encoding.hs @@ -211,7 +211,11 @@ data EncodeOptions = EncodeOptions -- | What kind of quoting should be applied to text fields. , encQuoting :: !Quoting - } deriving (Eq, Show) + + -- | What to write into empty fields given their field name. For + -- backward-compatibility, this defaults to a call to `error`. + , encMissing :: Name -> Field + } -- | Encoding options for CSV files. defaultEncodeOptions :: EncodeOptions @@ -220,6 +224,9 @@ defaultEncodeOptions = EncodeOptions , encUseCrLf = True , encIncludeHeader = True , encQuoting = QuoteMinimal + , encMissing = \n -> moduleError "namedRecordToRecord" $ + "header contains name " ++ show (B8.unpack n) ++ + " which is not present in the named record" } -- | Like 'encode', but lets you customize how the CSV data is @@ -262,9 +269,9 @@ encodeRecord qtng delim = mconcat . intersperse (word8 delim) -- | Encode a single named record, without the trailing record -- separator (i.e. newline), using the given field order. -encodeNamedRecord :: Header -> Quoting -> Word8 -> NamedRecord -> Builder -encodeNamedRecord hdr qtng delim = - encodeRecord qtng delim . namedRecordToRecord hdr +encodeNamedRecord :: Header -> Quoting -> Word8 -> (Name -> Field) -> NamedRecord -> Builder +encodeNamedRecord hdr qtng delim missing = + encodeRecord qtng delim . namedRecordToRecord missing hdr -- TODO: Optimize escape :: Quoting -> Word8 -> B.ByteString -> B.ByteString @@ -300,7 +307,7 @@ encodeByNameWith opts hdr v rows True = encodeRecord (encQuoting opts) (encDelimiter opts) hdr <> recordSep (encUseCrLf opts) <> records records = unlines (recordSep (encUseCrLf opts)) - . map (encodeNamedRecord hdr (encQuoting opts) (encDelimiter opts) + . map (encodeNamedRecord hdr (encQuoting opts) (encDelimiter opts) (encMissing opts) . toNamedRecord) $ v {-# INLINE encodeByNameWith #-} @@ -320,18 +327,16 @@ encodeDefaultOrderedByNameWith opts v rows True = encodeRecord (encQuoting opts) (encDelimiter opts) hdr <> recordSep (encUseCrLf opts) <> records records = unlines (recordSep (encUseCrLf opts)) - . map (encodeNamedRecord hdr (encQuoting opts) (encDelimiter opts) + . map (encodeNamedRecord hdr (encQuoting opts) (encDelimiter opts) (encMissing opts) . toNamedRecord) $ v {-# INLINE encodeDefaultOrderedByNameWith #-} -namedRecordToRecord :: Header -> NamedRecord -> Record -namedRecordToRecord hdr nr = V.map find hdr +namedRecordToRecord :: (Name -> Field) -> Header -> NamedRecord -> Record +namedRecordToRecord missing hdr nr = V.map find hdr where find n = case HM.lookup n nr of - Nothing -> moduleError "namedRecordToRecord" $ - "header contains name " ++ show (B8.unpack n) ++ - " which is not present in the named record" + Nothing -> missing n Just v -> v moduleError :: String -> String -> a diff --git a/src/Data/Csv/Incremental.hs b/src/Data/Csv/Incremental.hs index a6fc94f..1463fe9 100644 --- a/src/Data/Csv/Incremental.hs +++ b/src/Data/Csv/Incremental.hs @@ -401,7 +401,7 @@ encodeByNameWith opts hdr b = Builder.toLazyByteString $ encHdr <> runNamedBuilder b hdr (encQuoting opts) (encDelimiter opts) - (encUseCrLf opts) + (encMissing opts) (encUseCrLf opts) where encHdr | encIncludeHeader opts = @@ -418,7 +418,7 @@ encodeDefaultOrderedByNameWith opts b = Builder.toLazyByteString $ encHdr <> runNamedBuilder b hdr (encQuoting opts) - (encDelimiter opts) (encUseCrLf opts) + (encDelimiter opts) (encMissing opts) (encUseCrLf opts) where hdr = Conversion.headerOrder (undefined :: a) @@ -430,8 +430,8 @@ encodeDefaultOrderedByNameWith opts b = -- | Encode a single named record. encodeNamedRecord :: ToNamedRecord a => a -> NamedBuilder a -encodeNamedRecord nr = NamedBuilder $ \ hdr qtng delim useCrLf -> - Encoding.encodeNamedRecord hdr qtng delim +encodeNamedRecord nr = NamedBuilder $ \ hdr qtng delim missing useCrLf -> + Encoding.encodeNamedRecord hdr qtng delim missing (Conversion.toNamedRecord nr) <> recordSep useCrLf -- | A builder for building the CSV data incrementally. Just like the @@ -440,14 +440,14 @@ encodeNamedRecord nr = NamedBuilder $ \ hdr qtng delim useCrLf -> -- a left-associative, `foldl'` style makes the building not be -- incremental. newtype NamedBuilder a = NamedBuilder { - runNamedBuilder :: Header -> Quoting -> Word8 -> Bool -> Builder.Builder + runNamedBuilder :: Header -> Quoting -> Word8 -> (Name -> Field) -> Bool -> Builder.Builder } -- | @since 0.5.0.0 instance Semigroup (NamedBuilder a) where NamedBuilder f <> NamedBuilder g = - NamedBuilder $ \ hdr qtng delim useCrlf -> - f hdr qtng delim useCrlf <> g hdr qtng delim useCrlf + NamedBuilder $ \ hdr qtng delim missing useCrlf -> + f hdr qtng delim missing useCrlf <> g hdr qtng delim missing useCrlf instance Monoid (NamedBuilder a) where mempty = NamedBuilder (\ _ _ _ _ -> mempty) From 785e42a8cdbcbd56621b9c065b7d776f0a4d6fe3 Mon Sep 17 00:00:00 2001 From: Luke Worth Date: Wed, 9 Nov 2022 04:27:19 +1100 Subject: [PATCH 2/2] Add unit test for missing fields --- tests/UnitTests.hs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/UnitTests.hs b/tests/UnitTests.hs index 4e625e0..c260a37 100644 --- a/tests/UnitTests.hs +++ b/tests/UnitTests.hs @@ -380,9 +380,24 @@ customDelim delim f1 f2 = delim `notElem` [cr, nl, dquote] ==> cr = 13 dquote = 34 +customMissing :: Assertion +customMissing = encodeByNameWith encOpts hdr nrs @?= ex + where + encOpts = defaultEncodeOptions { encMissing = id } + hdr = V.fromList ["abc", "def"] + nrs :: [NamedRecord] + nrs = + [ HM.fromList [("abc", "123")] + , HM.fromList [("def", "456")] + , HM.fromList [("abc", "234"), ("def", "567")] + , HM.fromList [] + ] + ex = "abc,def\r\n123,def\r\nabc,456\r\n234,567\r\nabc,def\r\n" + customOptionsTests :: [TF.Test] customOptionsTests = [ testProperty "customDelim" customDelim + , testCase "customMissing" customMissing ] ------------------------------------------------------------------------