Skip to content

Commit

Permalink
fix: parsing of (text encoded) addresses
Browse files Browse the repository at this point in the history
  • Loading branch information
romanofski committed Jan 6, 2025
1 parent ca2c329 commit 108d00e
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 2 deletions.
1 change: 1 addition & 0 deletions src/Data/IMF.hs
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ module Data.IMF
, Address(..)
, address
, addressList
, addressSpec
, AddrSpec(..)
, Domain(..)
, Mailbox(..)
Expand Down
1 change: 1 addition & 0 deletions src/Data/IMF/Syntax.hs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ module Data.IMF.Syntax
, crlf
, vchar
, word
, dquote
, quotedString
, dotAtomText
, dotAtom
Expand Down
13 changes: 11 additions & 2 deletions src/Data/IMF/Text.hs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import Control.Applicative ((<|>), optional)
import Data.CaseInsensitive
import Data.Foldable (fold)
import Data.List (intersperse)
import Data.Char (isLetter)
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as LT
Expand All @@ -49,7 +50,7 @@ import Data.List.NonEmpty (intersperse)

import Data.MIME.Charset (decodeLenient)
import Data.IMF (Mailbox(..), Address(..), AddrSpec(..), Domain(..))
import Data.IMF.Syntax
import Data.IMF.Syntax hiding (quotedString, word)


renderMailboxes :: [Mailbox] -> T.Text
Expand Down Expand Up @@ -99,14 +100,22 @@ mailbox = Mailbox <$> optional displayName <*> angleAddr
readMailbox :: String -> Either String Mailbox
readMailbox = parseOnly (mailbox <* endOfInput) . T.pack

quotedString :: Parser T.Text
quotedString = optionalFWS *> dquote *> phrase <* dquote

word :: Parser T.Text
word = do
foo <- optionalFWS *> A.many1 (A.satisfy (\c -> isLetter c || isAtext c))
pure $ T.pack foo

-- | Version of 'phrase' that does not process encoded-word
-- (we are parsing Text so will assume that the input does not
-- contain encoded words. TODO this is probably wrong :)
phrase :: Parser T.Text
phrase = foldMany1Sep (singleton ' ') word

displayName :: Parser T.Text
displayName = phrase
displayName = phrase <|> quotedString

mailboxList :: Parser [Mailbox]
mailboxList = mailbox `sepBy` char ','
Expand Down
30 changes: 30 additions & 0 deletions tests/Headers.hs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import Data.List.NonEmpty (NonEmpty((:|)))
import Data.String (IsString)
import Data.Word (Word8)

import qualified Data.Text as T
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Char8 as L8
Expand All @@ -47,6 +48,7 @@ renderField = Builder.toLazyByteString . buildField
unittests :: TestTree
unittests = testGroup "Headers"
[ parsesMailboxesSuccessfully
, parsesMailboxesNonASCIISuccessfully
, parsesTextMailboxesSuccessfully
, parsesAddressesSuccessfully
, parsesTextAddressesSuccessfully
Expand Down Expand Up @@ -136,6 +138,26 @@ rendersAddressesToTextSuccessfully =
, "undisclosed-recipients:;")
]

nonASCIIDisplayNameFixtures :: IsString s => [(String, Either String Mailbox -> Assertion, s)]
nonASCIIDisplayNameFixtures =
[
( "Czech"
, (Right (Mailbox (Just "Lud\283k Tiberiu") (AddrSpec "foo" (DomainDotAtom $ "bar" :| ["test"]))) @=?)
, "Luděk Tiberiu")
, ( "Chinese"
, (Right (Mailbox (Just "佐藤 直樹") (AddrSpec "foo" (DomainDotAtom $ "bar" :| ["test"]))) @=?)
, "佐藤 直樹")
, ("Japanese"
,(Right (Mailbox (Just "鈴木 一郎") (AddrSpec "foo" (DomainDotAtom $ "bar" :| ["test"]))) @=?)
, "鈴木 一郎")
, ("Korean"
, (Right (Mailbox (Just "김철수") (AddrSpec "foo" (DomainDotAtom $ "bar" :| ["test"]))) @=?)
, "김철수")
, ("Apostrophy"
, (Right (Mailbox (Just "O'Neill McCarthy") (AddrSpec "foo" (DomainDotAtom $ "bar" :| ["test"]))) @=?)
, "O'Neill McCarthy")
]

-- | Note some examples are taken from https://tools.ietf.org/html/rfc3696#section-3
mailboxFixtures :: IsString s => [(String, Either String Mailbox -> Assertion, s)]
mailboxFixtures =
Expand Down Expand Up @@ -188,6 +210,14 @@ parsesMailboxesSuccessfully =
testCase desc $ f (AText.parseOnly AddressText.mailbox input)) <$>
mailboxFixtures

parsesMailboxesNonASCIISuccessfully :: TestTree
parsesMailboxesNonASCIISuccessfully =
testGroup "parsing mailboxes (nonASCII)" $
(\(desc, assertion, input) ->
testCase desc $ assertion (AText.parseOnly AddressText.mailbox (input <> " <[email protected]>"))) <$>
nonASCIIDisplayNameFixtures


parsesTextMailboxesSuccessfully :: TestTree
parsesTextMailboxesSuccessfully =
testGroup "parsing mailboxes (text)" $
Expand Down

0 comments on commit 108d00e

Please sign in to comment.