WIP RFC5234 (augmented bnf) and RFC5322 (email addresses).
parent
e2a919d78c
commit
ef336bd2de
|
@ -2,7 +2,7 @@
|
||||||
-- | This module is experimental and doesn't follow every rule for an email address, yet.
|
-- | This module is experimental and doesn't follow every rule for an email address, yet.
|
||||||
module GenericParser.EmailAddress where
|
module GenericParser.EmailAddress where
|
||||||
|
|
||||||
import Prelude (Unit, unit, bind, pure, ($), (<>), (==), (||), between)
|
import Prelude (Unit, unit, bind, pure, ($), (<>), (==), (||), between, void)
|
||||||
|
|
||||||
import Control.Alt ((<|>))
|
import Control.Alt ((<|>))
|
||||||
import Data.Array as A
|
import Data.Array as A
|
||||||
|
@ -21,22 +21,83 @@ data EmailError
|
||||||
= InvalidCharacter
|
= InvalidCharacter
|
||||||
| InvalidDomain (Maybe DomainError)
|
| InvalidDomain (Maybe DomainError)
|
||||||
|
|
||||||
|
-- | RFC 5234:
|
||||||
|
|
||||||
|
--ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
|
||||||
|
--
|
||||||
|
--BIT = "0" / "1"
|
||||||
|
--
|
||||||
|
--CHAR = %x01-7F
|
||||||
|
-- ; any 7-bit US-ASCII character,
|
||||||
|
-- ; excluding NUL
|
||||||
|
--
|
||||||
|
--CR = %x0D
|
||||||
|
-- ; carriage return
|
||||||
|
--
|
||||||
|
--CRLF = CR LF
|
||||||
|
-- ; Internet standard newline
|
||||||
|
--
|
||||||
|
--CTL = %x00-1F / %x7F
|
||||||
|
-- ; controls
|
||||||
|
--
|
||||||
|
--DIGIT = %x30-39
|
||||||
|
-- ; 0-9
|
||||||
|
--
|
||||||
|
--DQUOTE = %x22
|
||||||
|
-- ; " (Double Quote)
|
||||||
|
--
|
||||||
|
--HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
|
||||||
|
|
||||||
|
-- | HTAB: horizontal tab.
|
||||||
|
-- | HTAB = %x09
|
||||||
|
htab :: forall e. Parser e Char
|
||||||
|
htab = char '\t'
|
||||||
|
|
||||||
|
--LF = %x0A
|
||||||
|
-- ; linefeed
|
||||||
|
--
|
||||||
|
--LWSP = *(WSP / CRLF WSP)
|
||||||
|
-- ; Use of this linear-white-space rule
|
||||||
|
-- ; permits lines containing only white
|
||||||
|
-- ; space that are no longer legal in
|
||||||
|
-- ; mail headers and have caused
|
||||||
|
-- ; interoperability problems in other
|
||||||
|
-- ; contexts.
|
||||||
|
-- ; Do not use when defining mail
|
||||||
|
-- ; headers and use with caution in
|
||||||
|
-- ; other contexts.
|
||||||
|
--
|
||||||
|
--OCTET = %x00-FF
|
||||||
|
-- ; 8 bits of data
|
||||||
|
--
|
||||||
|
-- | SP: space.
|
||||||
|
-- |
|
||||||
|
-- | SP = %x20
|
||||||
|
sp :: forall e. Parser e Char
|
||||||
|
sp = char ' '
|
||||||
|
|
||||||
|
-- | VCHAR = %x21-7E
|
||||||
|
-- |
|
||||||
|
-- | Visible printing characters.
|
||||||
|
vchar :: forall e. Parser e Char
|
||||||
|
vchar = sat (\x -> between 33 126 $ C.toCharCode x)
|
||||||
|
|
||||||
|
-- | WSP: white space.
|
||||||
|
-- |
|
||||||
|
-- | WSP = SP / HTAB
|
||||||
|
wsp :: forall e. Parser e Char
|
||||||
|
wsp = sp <|> htab
|
||||||
|
|
||||||
|
|
||||||
crlf :: forall e. Parser e Unit
|
crlf :: forall e. Parser e Unit
|
||||||
crlf = do _ <- char '\r'
|
crlf = do _ <- char '\r'
|
||||||
_ <- char '\n'
|
_ <- char '\n'
|
||||||
pure unit
|
pure unit
|
||||||
|
|
||||||
-- | WSP = a white space.
|
|
||||||
-- |
|
|
||||||
-- | TODO: I assumed it's just a space or tab. Verify and fix.
|
|
||||||
wsp :: Parser EmailError Char
|
|
||||||
wsp = char ' ' <|> char '\t'
|
|
||||||
--wsp = space -- in case we want any possible space value
|
|
||||||
|
|
||||||
-- | obs-FWS = 1*WSP *(CRLF 1*WSP)
|
-- | obs-FWS = 1*WSP *(CRLF 1*WSP)
|
||||||
-- |
|
-- |
|
||||||
-- | Obsolete FWS.
|
-- | Obsolete FWS.
|
||||||
obs_fws :: Parser EmailError Unit
|
obs_fws :: forall e. Parser e Unit
|
||||||
obs_fws = do _ <- A.many wsp
|
obs_fws = do _ <- A.many wsp
|
||||||
_ <- A.many $ do _ <- crlf
|
_ <- A.many $ do _ <- crlf
|
||||||
_ <- many1 wsp
|
_ <- many1 wsp
|
||||||
|
@ -48,7 +109,7 @@ obs_fws = do _ <- A.many wsp
|
||||||
-- In english: FWS is described as:
|
-- In english: FWS is described as:
|
||||||
-- 1. an OPTIONAL line with potential white spaces followed by at least one white space
|
-- 1. an OPTIONAL line with potential white spaces followed by at least one white space
|
||||||
-- 2. or, by the obs-FWS rule (meaning: many empty lines)
|
-- 2. or, by the obs-FWS rule (meaning: many empty lines)
|
||||||
fws :: Parser EmailError Unit
|
fws :: forall e. Parser e Unit
|
||||||
fws = do _ <- tryMaybe do _ <- A.many wsp
|
fws = do _ <- tryMaybe do _ <- A.many wsp
|
||||||
_ <- crlf
|
_ <- crlf
|
||||||
pure unit
|
pure unit
|
||||||
|
@ -60,27 +121,44 @@ fws = do _ <- tryMaybe do _ <- A.many wsp
|
||||||
-- %d42-91 / ; characters not including
|
-- %d42-91 / ; characters not including
|
||||||
-- %d93-126 / ; "(", ")", or "\"
|
-- %d93-126 / ; "(", ")", or "\"
|
||||||
-- obs-ctext
|
-- obs-ctext
|
||||||
ctext :: Parser EmailError Char
|
ctext :: forall e. Parser e Char
|
||||||
ctext = sat cond <|> obs_ctext
|
ctext = sat cond <|> obs_ctext
|
||||||
where cond x = let charcode = C.toCharCode x
|
where cond x = let charcode = C.toCharCode x
|
||||||
in between 33 39 charcode
|
in between 33 39 charcode
|
||||||
|| between 42 91 charcode
|
|| between 42 91 charcode
|
||||||
|| between 93 126 charcode
|
|| between 93 126 charcode
|
||||||
|
|
||||||
-- ccontent = ctext / quoted-pair / comment
|
-- | TODO
|
||||||
--ccontent :: Parser EmailError String
|
quoted_pair :: forall e. Parser e Char
|
||||||
--ccontent = do
|
quoted_pair = char ' '
|
||||||
|
|
||||||
-- comment = "(" *([FWS] ccontent) [FWS] ")"
|
-- | ccontent = ctext / quoted-pair / comment
|
||||||
--comment :: Parser EmailError String
|
-- |
|
||||||
--comment = do
|
-- | Comment content. TODO
|
||||||
|
ccontent :: forall e. Parser e Unit
|
||||||
|
ccontent = many_ctext <|> a_quoted_pair <|> comment
|
||||||
|
where many_ctext :: Parser e Unit
|
||||||
|
many_ctext = do void $ A.many ctext
|
||||||
|
a_quoted_pair :: Parser e Unit
|
||||||
|
a_quoted_pair = do void quoted_pair
|
||||||
|
|
||||||
|
-- | comment = "(" *([FWS] ccontent) [FWS] ")"
|
||||||
|
-- |
|
||||||
|
-- | Comment. Nothing to return.
|
||||||
|
comment :: forall e. Parser e Unit
|
||||||
|
comment = do _ <- char '('
|
||||||
|
_ <- A.many (do _ <- A.many fws
|
||||||
|
_ <- ccontent
|
||||||
|
pure unit)
|
||||||
|
_ <- char ')'
|
||||||
|
pure unit
|
||||||
|
|
||||||
-- CFWS = (1*([FWS] comment) [FWS]) / FWS
|
-- CFWS = (1*([FWS] comment) [FWS]) / FWS
|
||||||
--cfws :: Parser EmailError String
|
--cfws :: forall e. Parser e String
|
||||||
--cfws = do
|
--cfws = do
|
||||||
|
|
||||||
-- address = mailbox / group
|
-- address = mailbox / group
|
||||||
--address :: Parser EmailError String
|
--address :: forall e. Parser e String
|
||||||
--address = do
|
--address = do
|
||||||
|
|
||||||
--mailbox = name-addr / addr-spec
|
--mailbox = name-addr / addr-spec
|
||||||
|
@ -138,7 +216,7 @@ dtext = sat cond <|> obs_dtext
|
||||||
-- | TODO: Obsolete domain text.
|
-- | TODO: Obsolete domain text.
|
||||||
-- | obs-dtext = obs-NO-WS-CTL / quoted-pair
|
-- | obs-dtext = obs-NO-WS-CTL / quoted-pair
|
||||||
obs_dtext :: forall e. Parser e Char
|
obs_dtext :: forall e. Parser e Char
|
||||||
obs_dtext = obs_no_ws_ctl --<|> quoted_pair
|
obs_dtext = obs_no_ws_ctl <|> quoted_pair
|
||||||
|
|
||||||
--obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
|
--obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
|
||||||
-- %d11 / ; characters that do not
|
-- %d11 / ; characters that do not
|
||||||
|
|
Loading…
Reference in New Issue