Do not change the original input while parsing.

This commit is contained in:
Philippe Pittoli 2024-01-31 02:08:11 +01:00
parent 45f867f3c5
commit 498343c96e
2 changed files with 42 additions and 35 deletions

View File

@ -2,9 +2,10 @@
-- | This module is experimental and doesn't follow every rule for an email address, yet.
module GenericParser.EmailAddress where
import Prelude (Unit, (<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, void, unit)
import Prelude ((<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, unit)
import Control.Alt ((<|>))
import Control.Lazy (defer)
import Data.Array as A
import Data.Char as C
import Data.Either (Either(..))
@ -28,20 +29,26 @@ data EmailError
-- | obs-FWS: obsolete folding white space.
-- |
-- | obs-FWS = 1*WSP *(CRLF 1*WSP)
obs_fws :: forall e. Parser e Unit
obs_fws = do _ <- A.many wsp
void $ A.many $ do _ <- crlf
void $ many1 wsp
obs_fws :: forall e. Parser e String
obs_fws = do x <- A.many wsp
xs <- A.many $ do v <- crlf
w <- many1 wsp
pure $ v <> CU.fromCharArray w
pure $ A.fold $ [CU.fromCharArray x] <> xs
-- | FWS: folding white space. This can be described in plain english as:
-- | 1. an OPTIONAL line with potential white spaces followed by at least one white space
-- | 2. or, by the obs-FWS rule (meaning: many empty lines)
-- |
-- | FWS = ([*WSP CRLF] 1*WSP) / obs-FWS
fws :: forall e. Parser e Unit
fws = do _ <- tryMaybe do _ <- A.many wsp
crlf
void $ many1 wsp
fws :: forall e. Parser e String
fws = do x <- tryMaybe do xs <- A.many wsp
v <- crlf
pure $ CU.fromCharArray xs <> v
w <- many1 wsp
let first_part = maybe "" id x
second_part = CU.fromCharArray w
pure $ first_part <> second_part
<|> obs_fws
-- | ctext: comment text, meaning printable US-ASCII characters excluding '(', ')' and '\'.
@ -69,28 +76,26 @@ quoted_pair = do _ <- char '\\'
-- | Comment content.
-- |
-- | ccontent = ctext / quoted-pair / comment
ccontent :: forall e. Parser e Unit
ccontent = a_ctext <|> a_quoted_pair <|> comment
where a_ctext :: Parser e Unit
a_ctext = void ctext
a_quoted_pair :: Parser e Unit
a_quoted_pair = void quoted_pair
ccontent :: forall e. Parser e String
ccontent = CU.singleton <$> ctext <|> quoted_pair <|> defer \_ -> comment
-- | Comment. Nothing to return since comments aren't to be processed.
-- |
-- | comment = "(" *([FWS] ccontent) [FWS] ")"
comment :: forall e. Parser e Unit
comment :: forall e. Parser e String
comment = do _ <- char '('
_ <- A.many (do _ <- A.many fws
void ccontent)
void $ char ')'
xs <- A.many do _ <- A.many fws
ccontent
_ <- char ')'
pure $ "(" <> A.fold xs <> ")"
-- | CFWS: comment folding white space.
-- |
-- | CFWS = (1*([FWS] comment) [FWS]) / FWS
cfws :: forall e. Parser e Unit
cfws = do void $ many1 $ do _ <- tryMaybe fws
comment
cfws :: forall e. Parser e String
cfws = do xs <- many1 $ do _ <- tryMaybe fws
comment
pure $ A.fold xs
<|> fws
-- | `address`: email address.
@ -167,8 +172,7 @@ address_list = do a <- address
-- |
-- | group-list = mailbox-list / CFWS / obs-group-list
group_list :: forall e. Parser e String
group_list = mailbox_list <|> do _ <- cfws <|> obs_group_list
pure ""
group_list = mailbox_list <|> cfws <|> obs_group_list
-- | `atext`: atom accepted characters.
@ -319,8 +323,7 @@ obs_route = do l <- obs_domain_list
-- | obs-domain-list = *(CFWS / ",") "@" domain
-- | *("," [CFWS] ["@" domain])
obs_domain_list :: forall e. Parser e String
obs_domain_list = do _ <- A.many $ cfws <|> do _ <- char ','
pure unit
obs_domain_list = do _ <- A.many $ cfws <|> CU.singleton <$> char ','
_ <- char '@'
domain
@ -354,11 +357,12 @@ obs_addr_list = do _ <- A.many do _ <- tryMaybe cfws
-- | `obs_group_list`
-- |
-- | obs-group-list = 1*([CFWS] ",") [CFWS]
obs_group_list :: forall e. Parser e Unit
obs_group_list = do _ <- many1 $ do _ <- tryMaybe cfws
char ','
_ <- tryMaybe cfws
pure unit
obs_group_list :: forall e. Parser e String
obs_group_list = do xs <- many1 $ do x <- tryMaybe cfws
_ <- char ','
pure $ maybe "" id x <> ","
c <- tryMaybe cfws
pure $ A.fold xs <> maybe "" id c
-- | `obs_local_part`
-- |
@ -408,6 +412,8 @@ obs_no_ws_ctl = sat cond
obs_ctext :: forall e. Parser e Char
obs_ctext = obs_no_ws_ctl
-- | `obs_qtext`: obsolete accepted quoted text.
-- |
-- | obs-qtext = obs-NO-WS-CTL
obs_qtext :: forall e. Parser e Char
obs_qtext = obs_no_ws_ctl
@ -527,7 +533,7 @@ obs_phrase_list = do first_phrase <- phrase_or_cfws
--
-- | qtext = %d33 / %d35-91 / %d93-126 / obs-qtext
qtext :: forall e. Parser e Char
qtext = char_num 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext
qtext = char_range 32 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext
-- | qcontent = qtext / quoted-pair
-- |
@ -541,8 +547,9 @@ qcontent = CU.singleton <$> qtext <|> quoted_pair
quoted_string :: forall e. Parser e String
quoted_string = do _ <- tryMaybe cfws
_ <- char '"'
s <- A.many $ do _ <- tryMaybe fws
qcontent
s <- A.many $ do xs <- tryMaybe fws
c <- qcontent
pure $ maybe "" id xs <> c
_ <- char '"'
_ <- tryMaybe cfws
pure $ "\"" <> A.fold s <> "\""

View File

@ -23,7 +23,7 @@ logtest fname (Parser p) str r e = do
Left { position, error } -> "failed at position " <> show position <> case error of
Nothing -> " -> no error reported"
Just err -> " -> error: " <> e err
Right { suffix, result } -> (r result) <> " '" <> suffix.string <> "'"
Right { suffix, result } -> ">[" <> (r result) <> "]< '" <> suffix.string <> "'"
id :: forall a. a -> a
id a = a