Do not change the original input while parsing.

This commit is contained in:
Philippe Pittoli 2024-01-31 02:08:11 +01:00
parent 45f867f3c5
commit 498343c96e
2 changed files with 42 additions and 35 deletions

View file

@ -2,9 +2,10 @@
-- | This module is experimental and doesn't follow every rule for an email address, yet. -- | This module is experimental and doesn't follow every rule for an email address, yet.
module GenericParser.EmailAddress where module GenericParser.EmailAddress where
import Prelude (Unit, (<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, void, unit) import Prelude ((<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, unit)
import Control.Alt ((<|>)) import Control.Alt ((<|>))
import Control.Lazy (defer)
import Data.Array as A import Data.Array as A
import Data.Char as C import Data.Char as C
import Data.Either (Either(..)) import Data.Either (Either(..))
@ -28,20 +29,26 @@ data EmailError
-- | obs-FWS: obsolete folding white space. -- | obs-FWS: obsolete folding white space.
-- | -- |
-- | obs-FWS = 1*WSP *(CRLF 1*WSP) -- | obs-FWS = 1*WSP *(CRLF 1*WSP)
obs_fws :: forall e. Parser e Unit obs_fws :: forall e. Parser e String
obs_fws = do _ <- A.many wsp obs_fws = do x <- A.many wsp
void $ A.many $ do _ <- crlf xs <- A.many $ do v <- crlf
void $ many1 wsp w <- many1 wsp
pure $ v <> CU.fromCharArray w
pure $ A.fold $ [CU.fromCharArray x] <> xs
-- | FWS: folding white space. This can be described in plain english as: -- | FWS: folding white space. This can be described in plain english as:
-- | 1. an OPTIONAL line with potential white spaces followed by at least one white space -- | 1. an OPTIONAL line with potential white spaces followed by at least one white space
-- | 2. or, by the obs-FWS rule (meaning: many empty lines) -- | 2. or, by the obs-FWS rule (meaning: many empty lines)
-- | -- |
-- | FWS = ([*WSP CRLF] 1*WSP) / obs-FWS -- | FWS = ([*WSP CRLF] 1*WSP) / obs-FWS
fws :: forall e. Parser e Unit fws :: forall e. Parser e String
fws = do _ <- tryMaybe do _ <- A.many wsp fws = do x <- tryMaybe do xs <- A.many wsp
crlf v <- crlf
void $ many1 wsp pure $ CU.fromCharArray xs <> v
w <- many1 wsp
let first_part = maybe "" id x
second_part = CU.fromCharArray w
pure $ first_part <> second_part
<|> obs_fws <|> obs_fws
-- | ctext: comment text, meaning printable US-ASCII characters excluding '(', ')' and '\'. -- | ctext: comment text, meaning printable US-ASCII characters excluding '(', ')' and '\'.
@ -69,28 +76,26 @@ quoted_pair = do _ <- char '\\'
-- | Comment content. -- | Comment content.
-- | -- |
-- | ccontent = ctext / quoted-pair / comment -- | ccontent = ctext / quoted-pair / comment
ccontent :: forall e. Parser e Unit ccontent :: forall e. Parser e String
ccontent = a_ctext <|> a_quoted_pair <|> comment ccontent = CU.singleton <$> ctext <|> quoted_pair <|> defer \_ -> comment
where a_ctext :: Parser e Unit
a_ctext = void ctext
a_quoted_pair :: Parser e Unit
a_quoted_pair = void quoted_pair
-- | Comment. Nothing to return since comments aren't to be processed. -- | Comment. Nothing to return since comments aren't to be processed.
-- | -- |
-- | comment = "(" *([FWS] ccontent) [FWS] ")" -- | comment = "(" *([FWS] ccontent) [FWS] ")"
comment :: forall e. Parser e Unit comment :: forall e. Parser e String
comment = do _ <- char '(' comment = do _ <- char '('
_ <- A.many (do _ <- A.many fws xs <- A.many do _ <- A.many fws
void ccontent) ccontent
void $ char ')' _ <- char ')'
pure $ "(" <> A.fold xs <> ")"
-- | CFWS: comment folding white space. -- | CFWS: comment folding white space.
-- | -- |
-- | CFWS = (1*([FWS] comment) [FWS]) / FWS -- | CFWS = (1*([FWS] comment) [FWS]) / FWS
cfws :: forall e. Parser e Unit cfws :: forall e. Parser e String
cfws = do void $ many1 $ do _ <- tryMaybe fws cfws = do xs <- many1 $ do _ <- tryMaybe fws
comment comment
pure $ A.fold xs
<|> fws <|> fws
-- | `address`: email address. -- | `address`: email address.
@ -167,8 +172,7 @@ address_list = do a <- address
-- | -- |
-- | group-list = mailbox-list / CFWS / obs-group-list -- | group-list = mailbox-list / CFWS / obs-group-list
group_list :: forall e. Parser e String group_list :: forall e. Parser e String
group_list = mailbox_list <|> do _ <- cfws <|> obs_group_list group_list = mailbox_list <|> cfws <|> obs_group_list
pure ""
-- | `atext`: atom accepted characters. -- | `atext`: atom accepted characters.
@ -319,8 +323,7 @@ obs_route = do l <- obs_domain_list
-- | obs-domain-list = *(CFWS / ",") "@" domain -- | obs-domain-list = *(CFWS / ",") "@" domain
-- | *("," [CFWS] ["@" domain]) -- | *("," [CFWS] ["@" domain])
obs_domain_list :: forall e. Parser e String obs_domain_list :: forall e. Parser e String
obs_domain_list = do _ <- A.many $ cfws <|> do _ <- char ',' obs_domain_list = do _ <- A.many $ cfws <|> CU.singleton <$> char ','
pure unit
_ <- char '@' _ <- char '@'
domain domain
@ -354,11 +357,12 @@ obs_addr_list = do _ <- A.many do _ <- tryMaybe cfws
-- | `obs_group_list` -- | `obs_group_list`
-- | -- |
-- | obs-group-list = 1*([CFWS] ",") [CFWS] -- | obs-group-list = 1*([CFWS] ",") [CFWS]
obs_group_list :: forall e. Parser e Unit obs_group_list :: forall e. Parser e String
obs_group_list = do _ <- many1 $ do _ <- tryMaybe cfws obs_group_list = do xs <- many1 $ do x <- tryMaybe cfws
char ',' _ <- char ','
_ <- tryMaybe cfws pure $ maybe "" id x <> ","
pure unit c <- tryMaybe cfws
pure $ A.fold xs <> maybe "" id c
-- | `obs_local_part` -- | `obs_local_part`
-- | -- |
@ -408,6 +412,8 @@ obs_no_ws_ctl = sat cond
obs_ctext :: forall e. Parser e Char obs_ctext :: forall e. Parser e Char
obs_ctext = obs_no_ws_ctl obs_ctext = obs_no_ws_ctl
-- | `obs_qtext`: obsolete accepted quoted text.
-- |
-- | obs-qtext = obs-NO-WS-CTL -- | obs-qtext = obs-NO-WS-CTL
obs_qtext :: forall e. Parser e Char obs_qtext :: forall e. Parser e Char
obs_qtext = obs_no_ws_ctl obs_qtext = obs_no_ws_ctl
@ -527,7 +533,7 @@ obs_phrase_list = do first_phrase <- phrase_or_cfws
-- --
-- | qtext = %d33 / %d35-91 / %d93-126 / obs-qtext -- | qtext = %d33 / %d35-91 / %d93-126 / obs-qtext
qtext :: forall e. Parser e Char qtext :: forall e. Parser e Char
qtext = char_num 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext qtext = char_range 32 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext
-- | qcontent = qtext / quoted-pair -- | qcontent = qtext / quoted-pair
-- | -- |
@ -541,8 +547,9 @@ qcontent = CU.singleton <$> qtext <|> quoted_pair
quoted_string :: forall e. Parser e String quoted_string :: forall e. Parser e String
quoted_string = do _ <- tryMaybe cfws quoted_string = do _ <- tryMaybe cfws
_ <- char '"' _ <- char '"'
s <- A.many $ do _ <- tryMaybe fws s <- A.many $ do xs <- tryMaybe fws
qcontent c <- qcontent
pure $ maybe "" id xs <> c
_ <- char '"' _ <- char '"'
_ <- tryMaybe cfws _ <- tryMaybe cfws
pure $ "\"" <> A.fold s <> "\"" pure $ "\"" <> A.fold s <> "\""

View file

@ -23,7 +23,7 @@ logtest fname (Parser p) str r e = do
Left { position, error } -> "failed at position " <> show position <> case error of Left { position, error } -> "failed at position " <> show position <> case error of
Nothing -> " -> no error reported" Nothing -> " -> no error reported"
Just err -> " -> error: " <> e err Just err -> " -> error: " <> e err
Right { suffix, result } -> (r result) <> " '" <> suffix.string <> "'" Right { suffix, result } -> ">[" <> (r result) <> "]< '" <> suffix.string <> "'"
id :: forall a. a -> a id :: forall a. a -> a
id a = a id a = a