From 498343c96e46fb1147a0f281201006943e2d5e39 Mon Sep 17 00:00:00 2001 From: Philippe Pittoli Date: Wed, 31 Jan 2024 02:08:11 +0100 Subject: [PATCH] Do not change the original input while parsing. --- src/GenericParser/EmailAddress.purs | 75 ++++++++++++++++------------- test/Main.purs | 2 +- 2 files changed, 42 insertions(+), 35 deletions(-) diff --git a/src/GenericParser/EmailAddress.purs b/src/GenericParser/EmailAddress.purs index 6a22e62..eaa0711 100644 --- a/src/GenericParser/EmailAddress.purs +++ b/src/GenericParser/EmailAddress.purs @@ -2,9 +2,10 @@ -- | This module is experimental and doesn't follow every rule for an email address, yet. module GenericParser.EmailAddress where -import Prelude (Unit, (<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, void, unit) +import Prelude ((<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, unit) import Control.Alt ((<|>)) +import Control.Lazy (defer) import Data.Array as A import Data.Char as C import Data.Either (Either(..)) @@ -28,20 +29,26 @@ data EmailError -- | obs-FWS: obsolete folding white space. -- | -- | obs-FWS = 1*WSP *(CRLF 1*WSP) -obs_fws :: forall e. Parser e Unit -obs_fws = do _ <- A.many wsp - void $ A.many $ do _ <- crlf - void $ many1 wsp +obs_fws :: forall e. Parser e String +obs_fws = do x <- A.many wsp + xs <- A.many $ do v <- crlf + w <- many1 wsp + pure $ v <> CU.fromCharArray w + pure $ A.fold $ [CU.fromCharArray x] <> xs -- | FWS: folding white space. This can be described in plain english as: -- | 1. an OPTIONAL line with potential white spaces followed by at least one white space -- | 2. or, by the obs-FWS rule (meaning: many empty lines) -- | -- | FWS = ([*WSP CRLF] 1*WSP) / obs-FWS -fws :: forall e. Parser e Unit -fws = do _ <- tryMaybe do _ <- A.many wsp - crlf - void $ many1 wsp +fws :: forall e. Parser e String +fws = do x <- tryMaybe do xs <- A.many wsp + v <- crlf + pure $ CU.fromCharArray xs <> v + w <- many1 wsp + let first_part = maybe "" id x + second_part = CU.fromCharArray w + pure $ first_part <> second_part <|> obs_fws -- | ctext: comment text, meaning printable US-ASCII characters excluding '(', ')' and '\'. @@ -69,28 +76,26 @@ quoted_pair = do _ <- char '\\' -- | Comment content. -- | -- | ccontent = ctext / quoted-pair / comment -ccontent :: forall e. Parser e Unit -ccontent = a_ctext <|> a_quoted_pair <|> comment - where a_ctext :: Parser e Unit - a_ctext = void ctext - a_quoted_pair :: Parser e Unit - a_quoted_pair = void quoted_pair +ccontent :: forall e. Parser e String +ccontent = CU.singleton <$> ctext <|> quoted_pair <|> defer \_ -> comment -- | Comment. Nothing to return since comments aren't to be processed. -- | -- | comment = "(" *([FWS] ccontent) [FWS] ")" -comment :: forall e. Parser e Unit +comment :: forall e. Parser e String comment = do _ <- char '(' - _ <- A.many (do _ <- A.many fws - void ccontent) - void $ char ')' + xs <- A.many do _ <- A.many fws + ccontent + _ <- char ')' + pure $ "(" <> A.fold xs <> ")" -- | CFWS: comment folding white space. -- | -- | CFWS = (1*([FWS] comment) [FWS]) / FWS -cfws :: forall e. Parser e Unit -cfws = do void $ many1 $ do _ <- tryMaybe fws - comment +cfws :: forall e. Parser e String +cfws = do xs <- many1 $ do _ <- tryMaybe fws + comment + pure $ A.fold xs <|> fws -- | `address`: email address. @@ -167,8 +172,7 @@ address_list = do a <- address -- | -- | group-list = mailbox-list / CFWS / obs-group-list group_list :: forall e. Parser e String -group_list = mailbox_list <|> do _ <- cfws <|> obs_group_list - pure "" +group_list = mailbox_list <|> cfws <|> obs_group_list -- | `atext`: atom accepted characters. @@ -319,8 +323,7 @@ obs_route = do l <- obs_domain_list -- | obs-domain-list = *(CFWS / ",") "@" domain -- | *("," [CFWS] ["@" domain]) obs_domain_list :: forall e. Parser e String -obs_domain_list = do _ <- A.many $ cfws <|> do _ <- char ',' - pure unit +obs_domain_list = do _ <- A.many $ cfws <|> CU.singleton <$> char ',' _ <- char '@' domain @@ -354,11 +357,12 @@ obs_addr_list = do _ <- A.many do _ <- tryMaybe cfws -- | `obs_group_list` -- | -- | obs-group-list = 1*([CFWS] ",") [CFWS] -obs_group_list :: forall e. Parser e Unit -obs_group_list = do _ <- many1 $ do _ <- tryMaybe cfws - char ',' - _ <- tryMaybe cfws - pure unit +obs_group_list :: forall e. Parser e String +obs_group_list = do xs <- many1 $ do x <- tryMaybe cfws + _ <- char ',' + pure $ maybe "" id x <> "," + c <- tryMaybe cfws + pure $ A.fold xs <> maybe "" id c -- | `obs_local_part` -- | @@ -408,6 +412,8 @@ obs_no_ws_ctl = sat cond obs_ctext :: forall e. Parser e Char obs_ctext = obs_no_ws_ctl +-- | `obs_qtext`: obsolete accepted quoted text. +-- | -- | obs-qtext = obs-NO-WS-CTL obs_qtext :: forall e. Parser e Char obs_qtext = obs_no_ws_ctl @@ -527,7 +533,7 @@ obs_phrase_list = do first_phrase <- phrase_or_cfws -- -- | qtext = %d33 / %d35-91 / %d93-126 / obs-qtext qtext :: forall e. Parser e Char -qtext = char_num 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext +qtext = char_range 32 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext -- | qcontent = qtext / quoted-pair -- | @@ -541,8 +547,9 @@ qcontent = CU.singleton <$> qtext <|> quoted_pair quoted_string :: forall e. Parser e String quoted_string = do _ <- tryMaybe cfws _ <- char '"' - s <- A.many $ do _ <- tryMaybe fws - qcontent + s <- A.many $ do xs <- tryMaybe fws + c <- qcontent + pure $ maybe "" id xs <> c _ <- char '"' _ <- tryMaybe cfws pure $ "\"" <> A.fold s <> "\"" diff --git a/test/Main.purs b/test/Main.purs index ef65d23..abf686d 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -23,7 +23,7 @@ logtest fname (Parser p) str r e = do Left { position, error } -> "failed at position " <> show position <> case error of Nothing -> " -> no error reported" Just err -> " -> error: " <> e err - Right { suffix, result } -> (r result) <> " '" <> suffix.string <> "'" + Right { suffix, result } -> ">[" <> (r result) <> "]< '" <> suffix.string <> "'" id :: forall a. a -> a id a = a