WIP RFC5234 (augmented bnf) and RFC5322 (email addresses).
parent
e2a919d78c
commit
ef336bd2de
|
@ -2,7 +2,7 @@
|
|||
-- | This module is experimental and doesn't follow every rule for an email address, yet.
|
||||
module GenericParser.EmailAddress where
|
||||
|
||||
import Prelude (Unit, unit, bind, pure, ($), (<>), (==), (||), between)
|
||||
import Prelude (Unit, unit, bind, pure, ($), (<>), (==), (||), between, void)
|
||||
|
||||
import Control.Alt ((<|>))
|
||||
import Data.Array as A
|
||||
|
@ -21,22 +21,83 @@ data EmailError
|
|||
= InvalidCharacter
|
||||
| InvalidDomain (Maybe DomainError)
|
||||
|
||||
-- | RFC 5234:
|
||||
|
||||
--ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
|
||||
--
|
||||
--BIT = "0" / "1"
|
||||
--
|
||||
--CHAR = %x01-7F
|
||||
-- ; any 7-bit US-ASCII character,
|
||||
-- ; excluding NUL
|
||||
--
|
||||
--CR = %x0D
|
||||
-- ; carriage return
|
||||
--
|
||||
--CRLF = CR LF
|
||||
-- ; Internet standard newline
|
||||
--
|
||||
--CTL = %x00-1F / %x7F
|
||||
-- ; controls
|
||||
--
|
||||
--DIGIT = %x30-39
|
||||
-- ; 0-9
|
||||
--
|
||||
--DQUOTE = %x22
|
||||
-- ; " (Double Quote)
|
||||
--
|
||||
--HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
|
||||
|
||||
-- | HTAB: horizontal tab.
|
||||
-- | HTAB = %x09
|
||||
htab :: forall e. Parser e Char
|
||||
htab = char '\t'
|
||||
|
||||
--LF = %x0A
|
||||
-- ; linefeed
|
||||
--
|
||||
--LWSP = *(WSP / CRLF WSP)
|
||||
-- ; Use of this linear-white-space rule
|
||||
-- ; permits lines containing only white
|
||||
-- ; space that are no longer legal in
|
||||
-- ; mail headers and have caused
|
||||
-- ; interoperability problems in other
|
||||
-- ; contexts.
|
||||
-- ; Do not use when defining mail
|
||||
-- ; headers and use with caution in
|
||||
-- ; other contexts.
|
||||
--
|
||||
--OCTET = %x00-FF
|
||||
-- ; 8 bits of data
|
||||
--
|
||||
-- | SP: space.
|
||||
-- |
|
||||
-- | SP = %x20
|
||||
sp :: forall e. Parser e Char
|
||||
sp = char ' '
|
||||
|
||||
-- | VCHAR = %x21-7E
|
||||
-- |
|
||||
-- | Visible printing characters.
|
||||
vchar :: forall e. Parser e Char
|
||||
vchar = sat (\x -> between 33 126 $ C.toCharCode x)
|
||||
|
||||
-- | WSP: white space.
|
||||
-- |
|
||||
-- | WSP = SP / HTAB
|
||||
wsp :: forall e. Parser e Char
|
||||
wsp = sp <|> htab
|
||||
|
||||
|
||||
crlf :: forall e. Parser e Unit
|
||||
crlf = do _ <- char '\r'
|
||||
_ <- char '\n'
|
||||
pure unit
|
||||
|
||||
-- | WSP = a white space.
|
||||
-- |
|
||||
-- | TODO: I assumed it's just a space or tab. Verify and fix.
|
||||
wsp :: Parser EmailError Char
|
||||
wsp = char ' ' <|> char '\t'
|
||||
--wsp = space -- in case we want any possible space value
|
||||
|
||||
-- | obs-FWS = 1*WSP *(CRLF 1*WSP)
|
||||
-- |
|
||||
-- | Obsolete FWS.
|
||||
obs_fws :: Parser EmailError Unit
|
||||
obs_fws :: forall e. Parser e Unit
|
||||
obs_fws = do _ <- A.many wsp
|
||||
_ <- A.many $ do _ <- crlf
|
||||
_ <- many1 wsp
|
||||
|
@ -48,7 +109,7 @@ obs_fws = do _ <- A.many wsp
|
|||
-- In english: FWS is described as:
|
||||
-- 1. an OPTIONAL line with potential white spaces followed by at least one white space
|
||||
-- 2. or, by the obs-FWS rule (meaning: many empty lines)
|
||||
fws :: Parser EmailError Unit
|
||||
fws :: forall e. Parser e Unit
|
||||
fws = do _ <- tryMaybe do _ <- A.many wsp
|
||||
_ <- crlf
|
||||
pure unit
|
||||
|
@ -60,27 +121,44 @@ fws = do _ <- tryMaybe do _ <- A.many wsp
|
|||
-- %d42-91 / ; characters not including
|
||||
-- %d93-126 / ; "(", ")", or "\"
|
||||
-- obs-ctext
|
||||
ctext :: Parser EmailError Char
|
||||
ctext :: forall e. Parser e Char
|
||||
ctext = sat cond <|> obs_ctext
|
||||
where cond x = let charcode = C.toCharCode x
|
||||
in between 33 39 charcode
|
||||
|| between 42 91 charcode
|
||||
|| between 93 126 charcode
|
||||
|
||||
-- ccontent = ctext / quoted-pair / comment
|
||||
--ccontent :: Parser EmailError String
|
||||
--ccontent = do
|
||||
-- | TODO
|
||||
quoted_pair :: forall e. Parser e Char
|
||||
quoted_pair = char ' '
|
||||
|
||||
-- comment = "(" *([FWS] ccontent) [FWS] ")"
|
||||
--comment :: Parser EmailError String
|
||||
--comment = do
|
||||
-- | ccontent = ctext / quoted-pair / comment
|
||||
-- |
|
||||
-- | Comment content. TODO
|
||||
ccontent :: forall e. Parser e Unit
|
||||
ccontent = many_ctext <|> a_quoted_pair <|> comment
|
||||
where many_ctext :: Parser e Unit
|
||||
many_ctext = do void $ A.many ctext
|
||||
a_quoted_pair :: Parser e Unit
|
||||
a_quoted_pair = do void quoted_pair
|
||||
|
||||
-- | comment = "(" *([FWS] ccontent) [FWS] ")"
|
||||
-- |
|
||||
-- | Comment. Nothing to return.
|
||||
comment :: forall e. Parser e Unit
|
||||
comment = do _ <- char '('
|
||||
_ <- A.many (do _ <- A.many fws
|
||||
_ <- ccontent
|
||||
pure unit)
|
||||
_ <- char ')'
|
||||
pure unit
|
||||
|
||||
-- CFWS = (1*([FWS] comment) [FWS]) / FWS
|
||||
--cfws :: Parser EmailError String
|
||||
--cfws :: forall e. Parser e String
|
||||
--cfws = do
|
||||
|
||||
-- address = mailbox / group
|
||||
--address :: Parser EmailError String
|
||||
--address :: forall e. Parser e String
|
||||
--address = do
|
||||
|
||||
--mailbox = name-addr / addr-spec
|
||||
|
@ -138,7 +216,7 @@ dtext = sat cond <|> obs_dtext
|
|||
-- | TODO: Obsolete domain text.
|
||||
-- | obs-dtext = obs-NO-WS-CTL / quoted-pair
|
||||
obs_dtext :: forall e. Parser e Char
|
||||
obs_dtext = obs_no_ws_ctl --<|> quoted_pair
|
||||
obs_dtext = obs_no_ws_ctl <|> quoted_pair
|
||||
|
||||
--obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
|
||||
-- %d11 / ; characters that do not
|
||||
|
|
Loading…
Reference in New Issue