WIP RFC5234 (augmented bnf) and RFC5322 (email addresses).

master
Philippe Pittoli 2024-01-27 04:46:15 +01:00
parent e2a919d78c
commit ef336bd2de
1 changed files with 98 additions and 20 deletions

View File

@ -2,7 +2,7 @@
-- | This module is experimental and doesn't follow every rule for an email address, yet. -- | This module is experimental and doesn't follow every rule for an email address, yet.
module GenericParser.EmailAddress where module GenericParser.EmailAddress where
import Prelude (Unit, unit, bind, pure, ($), (<>), (==), (||), between) import Prelude (Unit, unit, bind, pure, ($), (<>), (==), (||), between, void)
import Control.Alt ((<|>)) import Control.Alt ((<|>))
import Data.Array as A import Data.Array as A
@ -21,22 +21,83 @@ data EmailError
= InvalidCharacter = InvalidCharacter
| InvalidDomain (Maybe DomainError) | InvalidDomain (Maybe DomainError)
-- | RFC 5234:
--ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
--
--BIT = "0" / "1"
--
--CHAR = %x01-7F
-- ; any 7-bit US-ASCII character,
-- ; excluding NUL
--
--CR = %x0D
-- ; carriage return
--
--CRLF = CR LF
-- ; Internet standard newline
--
--CTL = %x00-1F / %x7F
-- ; controls
--
--DIGIT = %x30-39
-- ; 0-9
--
--DQUOTE = %x22
-- ; " (Double Quote)
--
--HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
-- | HTAB: horizontal tab.
-- | HTAB = %x09
htab :: forall e. Parser e Char
htab = char '\t'
--LF = %x0A
-- ; linefeed
--
--LWSP = *(WSP / CRLF WSP)
-- ; Use of this linear-white-space rule
-- ; permits lines containing only white
-- ; space that are no longer legal in
-- ; mail headers and have caused
-- ; interoperability problems in other
-- ; contexts.
-- ; Do not use when defining mail
-- ; headers and use with caution in
-- ; other contexts.
--
--OCTET = %x00-FF
-- ; 8 bits of data
--
-- | SP: space.
-- |
-- | SP = %x20
sp :: forall e. Parser e Char
sp = char ' '
-- | VCHAR = %x21-7E
-- |
-- | Visible printing characters.
vchar :: forall e. Parser e Char
vchar = sat (\x -> between 33 126 $ C.toCharCode x)
-- | WSP: white space.
-- |
-- | WSP = SP / HTAB
wsp :: forall e. Parser e Char
wsp = sp <|> htab
crlf :: forall e. Parser e Unit crlf :: forall e. Parser e Unit
crlf = do _ <- char '\r' crlf = do _ <- char '\r'
_ <- char '\n' _ <- char '\n'
pure unit pure unit
-- | WSP = a white space.
-- |
-- | TODO: I assumed it's just a space or tab. Verify and fix.
wsp :: Parser EmailError Char
wsp = char ' ' <|> char '\t'
--wsp = space -- in case we want any possible space value
-- | obs-FWS = 1*WSP *(CRLF 1*WSP) -- | obs-FWS = 1*WSP *(CRLF 1*WSP)
-- | -- |
-- | Obsolete FWS. -- | Obsolete FWS.
obs_fws :: Parser EmailError Unit obs_fws :: forall e. Parser e Unit
obs_fws = do _ <- A.many wsp obs_fws = do _ <- A.many wsp
_ <- A.many $ do _ <- crlf _ <- A.many $ do _ <- crlf
_ <- many1 wsp _ <- many1 wsp
@ -48,7 +109,7 @@ obs_fws = do _ <- A.many wsp
-- In english: FWS is described as: -- In english: FWS is described as:
-- 1. an OPTIONAL line with potential white spaces followed by at least one white space -- 1. an OPTIONAL line with potential white spaces followed by at least one white space
-- 2. or, by the obs-FWS rule (meaning: many empty lines) -- 2. or, by the obs-FWS rule (meaning: many empty lines)
fws :: Parser EmailError Unit fws :: forall e. Parser e Unit
fws = do _ <- tryMaybe do _ <- A.many wsp fws = do _ <- tryMaybe do _ <- A.many wsp
_ <- crlf _ <- crlf
pure unit pure unit
@ -60,27 +121,44 @@ fws = do _ <- tryMaybe do _ <- A.many wsp
-- %d42-91 / ; characters not including -- %d42-91 / ; characters not including
-- %d93-126 / ; "(", ")", or "\" -- %d93-126 / ; "(", ")", or "\"
-- obs-ctext -- obs-ctext
ctext :: Parser EmailError Char ctext :: forall e. Parser e Char
ctext = sat cond <|> obs_ctext ctext = sat cond <|> obs_ctext
where cond x = let charcode = C.toCharCode x where cond x = let charcode = C.toCharCode x
in between 33 39 charcode in between 33 39 charcode
|| between 42 91 charcode || between 42 91 charcode
|| between 93 126 charcode || between 93 126 charcode
-- ccontent = ctext / quoted-pair / comment -- | TODO
--ccontent :: Parser EmailError String quoted_pair :: forall e. Parser e Char
--ccontent = do quoted_pair = char ' '
-- comment = "(" *([FWS] ccontent) [FWS] ")" -- | ccontent = ctext / quoted-pair / comment
--comment :: Parser EmailError String -- |
--comment = do -- | Comment content. TODO
ccontent :: forall e. Parser e Unit
ccontent = many_ctext <|> a_quoted_pair <|> comment
where many_ctext :: Parser e Unit
many_ctext = do void $ A.many ctext
a_quoted_pair :: Parser e Unit
a_quoted_pair = do void quoted_pair
-- | comment = "(" *([FWS] ccontent) [FWS] ")"
-- |
-- | Comment. Nothing to return.
comment :: forall e. Parser e Unit
comment = do _ <- char '('
_ <- A.many (do _ <- A.many fws
_ <- ccontent
pure unit)
_ <- char ')'
pure unit
-- CFWS = (1*([FWS] comment) [FWS]) / FWS -- CFWS = (1*([FWS] comment) [FWS]) / FWS
--cfws :: Parser EmailError String --cfws :: forall e. Parser e String
--cfws = do --cfws = do
-- address = mailbox / group -- address = mailbox / group
--address :: Parser EmailError String --address :: forall e. Parser e String
--address = do --address = do
--mailbox = name-addr / addr-spec --mailbox = name-addr / addr-spec
@ -138,7 +216,7 @@ dtext = sat cond <|> obs_dtext
-- | TODO: Obsolete domain text. -- | TODO: Obsolete domain text.
-- | obs-dtext = obs-NO-WS-CTL / quoted-pair -- | obs-dtext = obs-NO-WS-CTL / quoted-pair
obs_dtext :: forall e. Parser e Char obs_dtext :: forall e. Parser e Char
obs_dtext = obs_no_ws_ctl --<|> quoted_pair obs_dtext = obs_no_ws_ctl <|> quoted_pair
--obs-NO-WS-CTL = %d1-8 / ; US-ASCII control --obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
-- %d11 / ; characters that do not -- %d11 / ; characters that do not