576 lines
20 KiB
Text
576 lines
20 KiB
Text
-- | `EmailAddress` is a parser for simple email addresses.
|
|
-- | This module is experimental and doesn't follow every rule for an email address, yet.
|
|
module GenericParser.EmailAddress where
|
|
|
|
import Prelude (Unit, (<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, void, unit)
|
|
|
|
import Control.Alt ((<|>))
|
|
import Data.Array as A
|
|
import Data.Char as C
|
|
import Data.Either (Either(..))
|
|
import Data.Maybe (Maybe(..), maybe)
|
|
import Data.String.CodeUnits as CU
|
|
|
|
import GenericParser.BaseFunctions (id)
|
|
import GenericParser.Parser (Parser(..)
|
|
, sat, char, char_num, char_range, string, item, many1, tryMaybe
|
|
, current_input, failureError, parse, rollback, until)
|
|
import GenericParser.DomainParser.Common (DomainError)
|
|
import GenericParser.DomainParser (sub_eof)
|
|
-- ABNF core rules.
|
|
import GenericParser.RFC5234 (crlf, digit, wsp, vchar, lf, cr)
|
|
import GenericParser.SomeParsers (letter, alphanum)
|
|
|
|
data EmailError
|
|
= InvalidCharacter
|
|
| InvalidDomain (Maybe DomainError)
|
|
|
|
-- | obs-FWS: obsolete folding white space.
|
|
-- |
|
|
-- | obs-FWS = 1*WSP *(CRLF 1*WSP)
|
|
obs_fws :: forall e. Parser e Unit
|
|
obs_fws = do _ <- A.many wsp
|
|
void $ A.many $ do _ <- crlf
|
|
void $ many1 wsp
|
|
|
|
-- | FWS: folding white space. This can be described in plain english as:
|
|
-- | 1. an OPTIONAL line with potential white spaces followed by at least one white space
|
|
-- | 2. or, by the obs-FWS rule (meaning: many empty lines)
|
|
-- |
|
|
-- | FWS = ([*WSP CRLF] 1*WSP) / obs-FWS
|
|
fws :: forall e. Parser e Unit
|
|
fws = do _ <- tryMaybe do _ <- A.many wsp
|
|
crlf
|
|
void $ many1 wsp
|
|
<|> obs_fws
|
|
|
|
-- | ctext: comment text, meaning printable US-ASCII characters excluding '(', ')' and '\'.
|
|
-- |
|
|
-- | ctext = %d33-39 / ; Printable US-ASCII
|
|
-- | %d42-91 / ; characters not including
|
|
-- | %d93-126 / ; "(", ")", or "\"
|
|
-- | obs-ctext
|
|
ctext :: forall e. Parser e Char
|
|
ctext = sat cond <|> obs_ctext
|
|
where cond x = let charcode = C.toCharCode x
|
|
in between 33 39 charcode
|
|
|| between 42 91 charcode
|
|
|| between 93 126 charcode
|
|
|
|
-- | `quoted_pair`: pair of characters.
|
|
-- |
|
|
-- | quoted-pair = ("\" (VCHAR / WSP)) / obs-qp
|
|
quoted_pair :: forall e. Parser e String
|
|
quoted_pair = do _ <- char '\\'
|
|
v <- vchar <|> wsp
|
|
pure $ "\\" <> CU.singleton v
|
|
<|> obs_qp
|
|
|
|
-- | Comment content.
|
|
-- |
|
|
-- | ccontent = ctext / quoted-pair / comment
|
|
ccontent :: forall e. Parser e Unit
|
|
ccontent = a_ctext <|> a_quoted_pair <|> comment
|
|
where a_ctext :: Parser e Unit
|
|
a_ctext = void ctext
|
|
a_quoted_pair :: Parser e Unit
|
|
a_quoted_pair = void quoted_pair
|
|
|
|
-- | Comment. Nothing to return since comments aren't to be processed.
|
|
-- |
|
|
-- | comment = "(" *([FWS] ccontent) [FWS] ")"
|
|
comment :: forall e. Parser e Unit
|
|
comment = do _ <- char '('
|
|
_ <- A.many (do _ <- A.many fws
|
|
void ccontent)
|
|
void $ char ')'
|
|
|
|
-- | CFWS: comment folding white space.
|
|
-- |
|
|
-- | CFWS = (1*([FWS] comment) [FWS]) / FWS
|
|
cfws :: forall e. Parser e Unit
|
|
cfws = do void $ many1 $ do _ <- tryMaybe fws
|
|
comment
|
|
<|> fws
|
|
|
|
-- | `address`: email address.
|
|
-- |
|
|
-- | address = mailbox / group
|
|
address :: forall e. Parser e String
|
|
address = mailbox <|> group
|
|
|
|
-- | `mailbox`: mail address.
|
|
-- |
|
|
-- | mailbox = name-addr / addr-spec
|
|
mailbox :: forall e. Parser e String
|
|
mailbox = name_addr <|> addr_spec
|
|
|
|
-- | `name_addr`: address name.
|
|
-- |
|
|
-- | name-addr = [display-name] angle-addr
|
|
name_addr :: forall e. Parser e String
|
|
name_addr = do _ <- tryMaybe display_name
|
|
angle_addr
|
|
|
|
-- | `angle_addr` address specification between '<' and '>' characters.
|
|
-- |
|
|
-- | angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr
|
|
angle_addr :: forall e. Parser e String
|
|
angle_addr = do _ <- tryMaybe cfws
|
|
_ <- char '<'
|
|
a <- addr_spec
|
|
_ <- char '>'
|
|
_ <- tryMaybe cfws
|
|
pure $ "<" <> a <> ">"
|
|
<|> obs_angle_addr
|
|
|
|
-- | `group`: a list of email addresses.
|
|
-- |
|
|
-- | group = display-name ":" [group-list] ";" [CFWS]
|
|
group :: forall e. Parser e String
|
|
group = do _ <- display_name
|
|
_ <- char ':'
|
|
xs <- tryMaybe group_list
|
|
_ <- char ';'
|
|
_ <- tryMaybe cfws
|
|
let res = ":" <> maybe "" id xs <> ";"
|
|
pure res
|
|
|
|
-- | `display_name`: displayed name, not the actual email address.
|
|
-- |
|
|
-- | display-name = phrase
|
|
display_name :: forall e. Parser e String
|
|
display_name = phrase
|
|
|
|
-- | `mailbox_list`
|
|
-- |
|
|
-- | mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
|
|
mailbox_list :: forall e. Parser e String
|
|
mailbox_list = do mb <- mailbox
|
|
xs <- A.many $ do _ <- char ','
|
|
mx <- mailbox
|
|
pure $ "," <> mx
|
|
pure $ mb <> A.fold xs
|
|
<|> obs_mbox_list
|
|
|
|
-- | `address_list`
|
|
-- |
|
|
-- | address-list = (address *("," address)) / obs-addr-list
|
|
address_list :: forall e. Parser e String
|
|
address_list = do a <- address
|
|
xs <- A.many do _ <- char ','
|
|
address
|
|
pure $ A.fold $ A.intersperse "," $ [a] <> xs
|
|
<|> obs_addr_list
|
|
|
|
-- | `group_list`
|
|
-- |
|
|
-- | group-list = mailbox-list / CFWS / obs-group-list
|
|
group_list :: forall e. Parser e String
|
|
group_list = mailbox_list <|> do _ <- cfws <|> obs_group_list
|
|
pure ""
|
|
|
|
|
|
-- | `atext`: atom accepted characters.
|
|
-- |
|
|
-- | atext = ALPHA / DIGIT / ; Printable US-ASCII
|
|
-- | "!" / "#" / ; characters not including
|
|
-- | "$" / "%" / ; specials. Used for atoms.
|
|
-- | "&" / "'" /
|
|
-- | "*" / "+" /
|
|
-- | "-" / "/" /
|
|
-- | "=" / "?" /
|
|
-- | "^" / "_" /
|
|
-- | "`" / "{" /
|
|
-- | "|" / "}" /
|
|
-- | "~"
|
|
atext :: forall e. Parser e Char
|
|
atext = alphanum
|
|
<|> char '!' <|> char '#'
|
|
<|> char '$' <|> char '%'
|
|
<|> char '&' <|> char '\''
|
|
<|> char '*' <|> char '+'
|
|
<|> char '-' <|> char '/'
|
|
<|> char '=' <|> char '?'
|
|
<|> char '^' <|> char '_'
|
|
<|> char '`' <|> char '{'
|
|
<|> char '|' <|> char '}'
|
|
<|> char '~'
|
|
|
|
|
|
-- | `atom`
|
|
-- |
|
|
-- | atom = [CFWS] 1*atext [CFWS]
|
|
atom :: forall e. Parser e String
|
|
atom = CU.fromCharArray <$> do _ <- tryMaybe cfws
|
|
a <- many1 atext
|
|
_ <- tryMaybe cfws
|
|
pure a
|
|
|
|
-- | `dot_atom_text`
|
|
-- |
|
|
-- | dot-atom-text = 1*atext *("." 1*atext)
|
|
dot_atom_text :: forall e. Parser e String
|
|
dot_atom_text = do xs0 <- many1 atext
|
|
xs1 <- A.many $ do _ <- char '.'
|
|
xs <- many1 atext
|
|
pure $ "." <> CU.fromCharArray xs
|
|
let str0 = CU.fromCharArray xs0
|
|
str1 = A.fold xs1
|
|
pure $ str0 <> str1
|
|
|
|
-- | `dot_atom`
|
|
-- |
|
|
-- | dot-atom = [CFWS] dot-atom-text [CFWS]
|
|
dot_atom :: forall e. Parser e String
|
|
dot_atom = do _ <- tryMaybe cfws
|
|
x <- dot_atom_text
|
|
_ <- tryMaybe cfws
|
|
pure x
|
|
|
|
-- | `specials`: special characters that do not appear in `atext`.
|
|
-- |
|
|
-- | specials = "(" / ")" / "<" / ">" / "[" / "]" / ":" / ";" / "@" /
|
|
-- | "\" / "," / "." / DQUOTE
|
|
specials :: forall e. Parser e Char
|
|
specials = char '('
|
|
<|> char ')'
|
|
<|> char '<'
|
|
<|> char '>'
|
|
<|> char '['
|
|
<|> char ']'
|
|
<|> char ':'
|
|
<|> char ';'
|
|
<|> char '@'
|
|
<|> char '\\'
|
|
<|> char ','
|
|
<|> char '.'
|
|
<|> char '"'
|
|
|
|
|
|
-- | `addr_spec`
|
|
-- |
|
|
-- | addr-spec = local-part "@" domain
|
|
addr_spec :: forall e. Parser e String
|
|
addr_spec = do lpart <- local_part
|
|
_ <- char '@'
|
|
dom <- domain
|
|
pure $ lpart <> "@" <> dom
|
|
|
|
-- | `local_part`
|
|
-- |
|
|
-- | local-part = dot-atom / quoted-string / obs-local-part
|
|
local_part :: forall e. Parser e String
|
|
local_part = dot_atom <|> quoted_string <|> obs_local_part
|
|
|
|
|
|
-- | `domain`
|
|
-- |
|
|
-- | domain = dot-atom / domain-literal / obs-domain
|
|
domain :: forall e. Parser e String
|
|
domain = dot_atom <|> domain_literal <|> obs_domain
|
|
|
|
-- | `domain_literal`
|
|
-- |
|
|
-- | domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
|
|
domain_literal :: forall e. Parser e String
|
|
domain_literal = do _ <- tryMaybe cfws
|
|
_ <- char '['
|
|
xs <- A.many do _ <- tryMaybe fws
|
|
dtext
|
|
_ <- tryMaybe fws
|
|
_ <- char ']'
|
|
_ <- tryMaybe cfws
|
|
pure $ A.fold xs
|
|
|
|
-- | dtext: characters in domains.
|
|
-- |
|
|
-- | dtext = %d33-90 / ; Printable US-ASCII
|
|
-- | %d94-126 / ; characters not including
|
|
-- | obs-dtext ; "[", "]", or "\"
|
|
dtext :: forall e. Parser e String
|
|
dtext = CU.singleton <$> sat cond <|> obs_dtext
|
|
where cond x = let charcode = C.toCharCode x
|
|
in between 33 90 charcode || between 94 126 charcode
|
|
|
|
|
|
-- | `obs_angle_addr`: obsolete address specification between '<' and '>' characters.
|
|
-- |
|
|
-- | obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS]
|
|
obs_angle_addr :: forall e. Parser e String
|
|
obs_angle_addr = do _ <- tryMaybe cfws
|
|
_ <- char '<'
|
|
r <- obs_route
|
|
a <- addr_spec
|
|
_ <- char '>'
|
|
_ <- tryMaybe cfws
|
|
pure $ r <> a
|
|
|
|
-- | `obs_route`
|
|
-- |
|
|
-- | obs-route = obs-domain-list ":"
|
|
obs_route :: forall e. Parser e String
|
|
obs_route = do l <- obs_domain_list
|
|
_ <- char ':'
|
|
pure $ l <> ":"
|
|
|
|
-- | `obs_domain_list`
|
|
-- |
|
|
-- | obs-domain-list = *(CFWS / ",") "@" domain
|
|
-- | *("," [CFWS] ["@" domain])
|
|
obs_domain_list :: forall e. Parser e String
|
|
obs_domain_list = do _ <- A.many $ cfws <|> do _ <- char ','
|
|
pure unit
|
|
_ <- char '@'
|
|
domain
|
|
|
|
-- | `obs_mbox_list`
|
|
-- |
|
|
-- | obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS])
|
|
obs_mbox_list :: forall e. Parser e String
|
|
obs_mbox_list = do _ <- A.many $ do _ <- cfws
|
|
_ <- char ','
|
|
pure unit
|
|
mb <- mailbox
|
|
xs <- A.many $ do _ <- char ','
|
|
x <- mailbox <|> do _ <- cfws
|
|
pure ""
|
|
pure x
|
|
pure $ mb <> A.fold xs
|
|
|
|
-- | `obs_addr_list`
|
|
-- |
|
|
-- | obs-addr-list = *([CFWS] ",") address *("," [address / CFWS])
|
|
obs_addr_list :: forall e. Parser e String
|
|
obs_addr_list = do _ <- A.many do _ <- tryMaybe cfws
|
|
char ','
|
|
a <- address
|
|
xs <- A.many do _ <- char ','
|
|
address <|> do _ <- cfws
|
|
pure ""
|
|
let res = A.fold $ A.intersperse "," $ [a] <> xs
|
|
pure res
|
|
|
|
-- | `obs_group_list`
|
|
-- |
|
|
-- | obs-group-list = 1*([CFWS] ",") [CFWS]
|
|
obs_group_list :: forall e. Parser e Unit
|
|
obs_group_list = do _ <- many1 $ do _ <- tryMaybe cfws
|
|
char ','
|
|
_ <- tryMaybe cfws
|
|
pure unit
|
|
|
|
-- | `obs_local_part`
|
|
-- |
|
|
-- | obs-local-part = word *("." word)
|
|
obs_local_part :: forall e. Parser e String
|
|
obs_local_part = do w <- word
|
|
ws <- A.many $ do _ <- char '.'
|
|
w1 <- word
|
|
pure $ "." <> w1
|
|
pure $ w <> A.fold ws
|
|
|
|
-- | `obs_domain`
|
|
-- |
|
|
-- | obs-domain = atom *("." atom)
|
|
obs_domain :: forall e. Parser e String
|
|
obs_domain = do a <- atom
|
|
xs <- A.many $ do _ <- char '.'
|
|
x <- atom
|
|
pure $ "." <> x
|
|
pure $ a <> A.fold xs
|
|
|
|
-- | `obs_dtext`: obsolete domain text.
|
|
-- |
|
|
-- | obs-dtext = obs-NO-WS-CTL / quoted-pair
|
|
obs_dtext :: forall e. Parser e String
|
|
obs_dtext = CU.singleton <$> obs_no_ws_ctl <|> quoted_pair
|
|
|
|
-- | obs-NO-WS-CTL: US-ASCII control characters without carriage return,
|
|
-- | line feed and white space characters.
|
|
-- |
|
|
-- | obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
|
|
-- | %d11 / ; characters that do not
|
|
-- | %d12 / ; include the carriage
|
|
-- | %d14-31 / ; return, line feed, and
|
|
-- | %d127 ; white space characters
|
|
obs_no_ws_ctl :: forall e. Parser e Char
|
|
obs_no_ws_ctl = sat cond
|
|
where cond x = let charcode = C.toCharCode x
|
|
in between 1 8 charcode
|
|
|| between 11 12 charcode
|
|
|| between 14 31 charcode
|
|
|| charcode == 127
|
|
|
|
-- | obs-ctext: obsolete comment text.
|
|
-- |
|
|
-- | obs-ctext = obs-NO-WS-CTL
|
|
obs_ctext :: forall e. Parser e Char
|
|
obs_ctext = obs_no_ws_ctl
|
|
|
|
-- | obs-qtext = obs-NO-WS-CTL
|
|
obs_qtext :: forall e. Parser e Char
|
|
obs_qtext = obs_no_ws_ctl
|
|
|
|
-- | `obs_utext`: obsolete text.
|
|
-- |
|
|
-- | obs-utext = %d0 / obs-NO-WS-CTL / VCHAR
|
|
obs_utext :: forall e. Parser e Char
|
|
obs_utext = char_num 0 <|> obs_no_ws_ctl <|> vchar
|
|
|
|
-- | `obs_qp`: obsolete quoted-pair rule.
|
|
-- |
|
|
-- | obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR)
|
|
obs_qp :: forall e. Parser e String
|
|
obs_qp = do _ <- char '\\'
|
|
v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr
|
|
pure $ "\\" <> CU.singleton v
|
|
|
|
-- | `obs_body`: obsolete body.
|
|
-- |
|
|
-- | Note: the simpler version found in the errata is implemented, which basically accept everything.
|
|
-- |
|
|
-- | Note: `text` is replaced by `vchar`.
|
|
-- |
|
|
-- | (RFC)
|
|
-- | obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)
|
|
-- |
|
|
-- | (RFC Errata v1)
|
|
-- | obs-body = *(%d0-127)
|
|
-- |
|
|
-- | (RFC Errata v2)
|
|
-- | obs-body = *(d0 /text / LF / CR)
|
|
-- Errata v1
|
|
--obs_body :: forall e. Parser e String
|
|
--obs_body = CU.fromCharArray <$> A.many item
|
|
|
|
-- Errata v2
|
|
obs_body :: forall e. Parser e String
|
|
obs_body = CU.fromCharArray <$> do A.many (char_num 0 <|> vchar <|> lf <|> cr)
|
|
|
|
--obs_body original
|
|
--obs_body :: forall e. Parser e String
|
|
--obs_body = do A.many $ do _ <- A.many lf
|
|
-- _ <- A.many cr
|
|
-- v <- A.many $ do x <- char_num 0 <|> text
|
|
-- _ <- A.many lf
|
|
-- _ <- A.many cr
|
|
-- pure x
|
|
-- pure $ A.fold v
|
|
-- <|> _ <- crlf
|
|
-- pure ""
|
|
|
|
-- | `obs_unstruct`
|
|
-- |
|
|
-- | Note: implement the version found in the Errata page.
|
|
-- |
|
|
-- | obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS)
|
|
-- |
|
|
-- | (RFC Errata)
|
|
-- | obs-unstruct = *( (*CR 1*(obs-utext / FWS)) / 1*LF ) *CR
|
|
obs_unstruct :: forall e. Parser e String
|
|
obs_unstruct = (CU.fromCharArray <<< A.fold) <$> A.many do _ <- A.many cr
|
|
many1 (obs_utext <|> do _ <- fws
|
|
pure ' ')
|
|
<|> do _ <- fws
|
|
pure ""
|
|
|
|
-- | `obs_phrase`: obsolete "phrase".
|
|
-- |
|
|
-- | obs-phrase = word *(word / "." / CFWS)
|
|
obs_phrase :: forall e. Parser e String
|
|
obs_phrase = do w <- word
|
|
ws <- A.many (word <|> string "." <|> do _ <- cfws
|
|
pure "")
|
|
pure $ w <> A.fold ws
|
|
|
|
-- | `word`.
|
|
-- |
|
|
-- | word = atom / quoted-string
|
|
word :: forall e. Parser e String
|
|
word = atom <|> quoted_string
|
|
|
|
-- | `phrase`: list of words (at least one) or the obsolete phrase rule.
|
|
-- |
|
|
-- | phrase = 1*word / obs-phrase
|
|
phrase :: forall e. Parser e String
|
|
phrase = do ws <- many1 word
|
|
pure $ A.fold ws
|
|
<|> obs_phrase
|
|
|
|
-- | `unstructured`
|
|
-- |
|
|
-- | unstructured = (*([FWS] VCHAR) *WSP) / obs-unstruct
|
|
unstructured :: forall e. Parser e String
|
|
unstructured = do v <- A.many $ do _ <- fws
|
|
vchar
|
|
_ <- A.many wsp
|
|
pure $ CU.fromCharArray v
|
|
<|> obs_unstruct
|
|
|
|
|
|
-- | `obs_phrase_list`: obsolete list of phrases.
|
|
-- |
|
|
-- | obs-phrase-list = [phrase / CFWS] *("," [phrase / CFWS])
|
|
obs_phrase_list :: forall e. Parser e String
|
|
obs_phrase_list = do first_phrase <- phrase_or_cfws
|
|
xs <- A.many $ do _ <- char ','
|
|
phrase_or_cfws
|
|
pure $ A.fold $ [first_phrase] <> xs
|
|
where phrase_or_cfws = do first_phrase <- tryMaybe phrase
|
|
case first_phrase of
|
|
Nothing -> do _ <- cfws
|
|
pure ""
|
|
Just x -> pure x
|
|
|
|
-- | `qtext`: printable US-ASCII characters not including "\" or the quote character.
|
|
--
|
|
-- | qtext = %d33 / %d35-91 / %d93-126 / obs-qtext
|
|
qtext :: forall e. Parser e Char
|
|
qtext = char_num 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext
|
|
|
|
-- | qcontent = qtext / quoted-pair
|
|
-- |
|
|
-- | qcontent = qtext / quoted-pair
|
|
qcontent :: forall e. Parser e String
|
|
qcontent = CU.singleton <$> qtext <|> quoted_pair
|
|
|
|
-- | `quoted_string`
|
|
-- |
|
|
-- | quoted-string = [CFWS] DQUOTE *([FWS] qcontent) [FWS] DQUOTE [CFWS]
|
|
quoted_string :: forall e. Parser e String
|
|
quoted_string = do _ <- tryMaybe cfws
|
|
_ <- char '"'
|
|
s <- A.many $ do _ <- tryMaybe fws
|
|
qcontent
|
|
_ <- char '"'
|
|
_ <- tryMaybe cfws
|
|
pure $ "\"" <> A.fold s <> "\""
|
|
|
|
|
|
-- | TODO: For now, `local_part` only checks that
|
|
-- | (a) the first character is a letter,
|
|
-- | (b) the last character is either a letter or a digit.
|
|
-- | The rest can be any letter, digit, '-' or '.'.
|
|
local_part' :: Parser EmailError String
|
|
local_part' = do firstchar <- letter
|
|
rest <- until end (letter <|> digit <|> char '-' <|> char '.')
|
|
lastchar <- letter <|> digit
|
|
pure $ CU.fromCharArray $ [firstchar] <> rest <> [lastchar]
|
|
where
|
|
end :: forall e. Parser e Char
|
|
end = do c <- item
|
|
_ <- char '@'
|
|
pure c
|
|
|
|
-- | `email` is the parser for email addresses.
|
|
email :: Parser EmailError String
|
|
email = do login <- local_part'
|
|
_ <- char '@'
|
|
input <- current_input
|
|
case parse sub_eof input of
|
|
Left {error, position} ->
|
|
Parser \_ -> failureError position (Just $ InvalidDomain error)
|
|
Right {result, suffix} -> do
|
|
_ <- rollback suffix
|
|
pure $ login <> "@" <> result
|