Email: a few more rules.
This commit is contained in:
parent
82056ba5b9
commit
e2a919d78c
@ -2,29 +2,183 @@
|
|||||||
-- | This module is experimental and doesn't follow every rule for an email address, yet.
|
-- | This module is experimental and doesn't follow every rule for an email address, yet.
|
||||||
module GenericParser.EmailAddress where
|
module GenericParser.EmailAddress where
|
||||||
|
|
||||||
import Prelude (bind, pure, ($), (<>))
|
import Prelude (Unit, unit, bind, pure, ($), (<>), (==), (||), between)
|
||||||
|
|
||||||
import Control.Alt ((<|>))
|
import Control.Alt ((<|>))
|
||||||
import Data.Maybe (Maybe(..))
|
import Data.Array as A
|
||||||
|
import Data.Char as C
|
||||||
import Data.Either (Either(..))
|
import Data.Either (Either(..))
|
||||||
|
import Data.Maybe (Maybe(..))
|
||||||
import Data.String.CodeUnits as CU
|
import Data.String.CodeUnits as CU
|
||||||
|
|
||||||
import GenericParser.DomainParser.Common (DomainError)
|
import GenericParser.DomainParser.Common (DomainError)
|
||||||
import GenericParser.DomainParser (sub_eof)
|
import GenericParser.DomainParser (sub_eof)
|
||||||
import GenericParser.Parser (Parser(..)
|
import GenericParser.Parser (Parser(..)
|
||||||
, char , digit , letter, item
|
, sat, char , digit , letter, item, many1, tryMaybe
|
||||||
, current_input, failureError, parse, rollback, until)
|
, current_input, failureError, parse, rollback, until)
|
||||||
|
|
||||||
data EmailError
|
data EmailError
|
||||||
= InvalidCharacter
|
= InvalidCharacter
|
||||||
| InvalidDomain (Maybe DomainError)
|
| InvalidDomain (Maybe DomainError)
|
||||||
|
|
||||||
-- | TODO: For now, `login_part` only checks that
|
crlf :: forall e. Parser e Unit
|
||||||
|
crlf = do _ <- char '\r'
|
||||||
|
_ <- char '\n'
|
||||||
|
pure unit
|
||||||
|
|
||||||
|
-- | WSP = a white space.
|
||||||
|
-- |
|
||||||
|
-- | TODO: I assumed it's just a space or tab. Verify and fix.
|
||||||
|
wsp :: Parser EmailError Char
|
||||||
|
wsp = char ' ' <|> char '\t'
|
||||||
|
--wsp = space -- in case we want any possible space value
|
||||||
|
|
||||||
|
-- | obs-FWS = 1*WSP *(CRLF 1*WSP)
|
||||||
|
-- |
|
||||||
|
-- | Obsolete FWS.
|
||||||
|
obs_fws :: Parser EmailError Unit
|
||||||
|
obs_fws = do _ <- A.many wsp
|
||||||
|
_ <- A.many $ do _ <- crlf
|
||||||
|
_ <- many1 wsp
|
||||||
|
pure unit
|
||||||
|
pure unit
|
||||||
|
|
||||||
|
-- FWS = ([*WSP CRLF] 1*WSP) / obs-FWS
|
||||||
|
-- ; Folding white space
|
||||||
|
-- In english: FWS is described as:
|
||||||
|
-- 1. an OPTIONAL line with potential white spaces followed by at least one white space
|
||||||
|
-- 2. or, by the obs-FWS rule (meaning: many empty lines)
|
||||||
|
fws :: Parser EmailError Unit
|
||||||
|
fws = do _ <- tryMaybe do _ <- A.many wsp
|
||||||
|
_ <- crlf
|
||||||
|
pure unit
|
||||||
|
_ <- many1 wsp
|
||||||
|
pure unit
|
||||||
|
<|> obs_fws
|
||||||
|
|
||||||
|
-- ctext = %d33-39 / ; Printable US-ASCII
|
||||||
|
-- %d42-91 / ; characters not including
|
||||||
|
-- %d93-126 / ; "(", ")", or "\"
|
||||||
|
-- obs-ctext
|
||||||
|
ctext :: Parser EmailError Char
|
||||||
|
ctext = sat cond <|> obs_ctext
|
||||||
|
where cond x = let charcode = C.toCharCode x
|
||||||
|
in between 33 39 charcode
|
||||||
|
|| between 42 91 charcode
|
||||||
|
|| between 93 126 charcode
|
||||||
|
|
||||||
|
-- ccontent = ctext / quoted-pair / comment
|
||||||
|
--ccontent :: Parser EmailError String
|
||||||
|
--ccontent = do
|
||||||
|
|
||||||
|
-- comment = "(" *([FWS] ccontent) [FWS] ")"
|
||||||
|
--comment :: Parser EmailError String
|
||||||
|
--comment = do
|
||||||
|
|
||||||
|
-- CFWS = (1*([FWS] comment) [FWS]) / FWS
|
||||||
|
--cfws :: Parser EmailError String
|
||||||
|
--cfws = do
|
||||||
|
|
||||||
|
-- address = mailbox / group
|
||||||
|
--address :: Parser EmailError String
|
||||||
|
--address = do
|
||||||
|
|
||||||
|
--mailbox = name-addr / addr-spec
|
||||||
|
--
|
||||||
|
--name-addr = [display-name] angle-addr
|
||||||
|
--
|
||||||
|
--angle-addr = [CFWS] "<" addr-spec ">" [CFWS] /
|
||||||
|
-- obs-angle-addr
|
||||||
|
--
|
||||||
|
--group = display-name ":" [group-list] ";" [CFWS]
|
||||||
|
--
|
||||||
|
--display-name = phrase
|
||||||
|
--
|
||||||
|
--mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
|
||||||
|
--
|
||||||
|
--address-list = (address *("," address)) / obs-addr-list
|
||||||
|
--
|
||||||
|
--group-list = mailbox-list / CFWS / obs-group-list
|
||||||
|
|
||||||
|
|
||||||
|
-- addr-spec = local-part "@" domain
|
||||||
|
--
|
||||||
|
-- local-part = dot-atom / quoted-string / obs-local-part
|
||||||
|
--
|
||||||
|
-- domain = dot-atom / domain-literal / obs-domain
|
||||||
|
--
|
||||||
|
-- domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
|
||||||
|
--
|
||||||
|
-- dtext = %d33-90 / ; Printable US-ASCII
|
||||||
|
-- %d94-126 / ; characters not including
|
||||||
|
-- obs-dtext ; "[", "]", or "\"
|
||||||
|
dtext :: forall e. Parser e Char
|
||||||
|
dtext = sat cond <|> obs_dtext
|
||||||
|
where cond x = let charcode = C.toCharCode x
|
||||||
|
in between 33 90 charcode || between 94 126 charcode
|
||||||
|
|
||||||
|
|
||||||
|
--obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS]
|
||||||
|
--
|
||||||
|
--obs-route = obs-domain-list ":"
|
||||||
|
--
|
||||||
|
--obs-domain-list = *(CFWS / ",") "@" domain
|
||||||
|
-- *("," [CFWS] ["@" domain])
|
||||||
|
--
|
||||||
|
--obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS])
|
||||||
|
--
|
||||||
|
--obs-addr-list = *([CFWS] ",") address *("," [address / CFWS])
|
||||||
|
--
|
||||||
|
--obs-group-list = 1*([CFWS] ",") [CFWS]
|
||||||
|
--
|
||||||
|
--obs-local-part = word *("." word)
|
||||||
|
--
|
||||||
|
--obs-domain = atom *("." atom)
|
||||||
|
|
||||||
|
-- | TODO: Obsolete domain text.
|
||||||
|
-- | obs-dtext = obs-NO-WS-CTL / quoted-pair
|
||||||
|
obs_dtext :: forall e. Parser e Char
|
||||||
|
obs_dtext = obs_no_ws_ctl --<|> quoted_pair
|
||||||
|
|
||||||
|
--obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
|
||||||
|
-- %d11 / ; characters that do not
|
||||||
|
-- %d12 / ; include the carriage
|
||||||
|
-- %d14-31 / ; return, line feed, and
|
||||||
|
-- %d127 ; white space characters
|
||||||
|
obs_no_ws_ctl :: forall e. Parser e Char
|
||||||
|
obs_no_ws_ctl = sat cond
|
||||||
|
where cond x = let charcode = C.toCharCode x
|
||||||
|
in between 1 8 charcode
|
||||||
|
|| between 11 12 charcode
|
||||||
|
|| between 14 31 charcode
|
||||||
|
|| charcode == 127
|
||||||
|
|
||||||
|
-- | obs-ctext = obs-NO-WS-CTL
|
||||||
|
obs_ctext :: forall e. Parser e Char
|
||||||
|
obs_ctext = obs_no_ws_ctl
|
||||||
|
|
||||||
|
-- | obs-qtext = obs-NO-WS-CTL
|
||||||
|
obs_qtext :: forall e. Parser e Char
|
||||||
|
obs_qtext = obs_no_ws_ctl
|
||||||
|
|
||||||
|
--obs-utext = %d0 / obs-NO-WS-CTL / VCHAR
|
||||||
|
--
|
||||||
|
--obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR)
|
||||||
|
--
|
||||||
|
--obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)
|
||||||
|
--
|
||||||
|
--obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS)
|
||||||
|
--
|
||||||
|
--obs-phrase = word *(word / "." / CFWS)
|
||||||
|
--
|
||||||
|
--obs-phrase-list = [phrase / CFWS] *("," [phrase / CFWS])
|
||||||
|
|
||||||
|
-- | TODO: For now, `local_part` only checks that
|
||||||
-- | (a) the first character is a letter,
|
-- | (a) the first character is a letter,
|
||||||
-- | (b) the last character is either a letter or a digit.
|
-- | (b) the last character is either a letter or a digit.
|
||||||
-- | The rest can be any letter, digit, '-' or '.'.
|
-- | The rest can be any letter, digit, '-' or '.'.
|
||||||
login_part :: Parser EmailError String
|
local_part :: Parser EmailError String
|
||||||
login_part = do firstchar <- letter
|
local_part = do firstchar <- letter
|
||||||
rest <- until end (letter <|> digit <|> char '-' <|> char '.')
|
rest <- until end (letter <|> digit <|> char '-' <|> char '.')
|
||||||
lastchar <- letter <|> digit
|
lastchar <- letter <|> digit
|
||||||
pure $ CU.fromCharArray $ [firstchar] <> rest <> [lastchar]
|
pure $ CU.fromCharArray $ [firstchar] <> rest <> [lastchar]
|
||||||
@ -36,7 +190,7 @@ login_part = do firstchar <- letter
|
|||||||
|
|
||||||
-- | `email` is the parser for email addresses.
|
-- | `email` is the parser for email addresses.
|
||||||
email :: Parser EmailError String
|
email :: Parser EmailError String
|
||||||
email = do login <- login_part
|
email = do login <- local_part
|
||||||
_ <- char '@'
|
_ <- char '@'
|
||||||
input <- current_input
|
input <- current_input
|
||||||
case parse sub_eof input of
|
case parse sub_eof input of
|
||||||
|
@ -47,6 +47,7 @@ ipv6_chunk'' :: Parser IPv6Error String
|
|||||||
ipv6_chunk'' = do _ <- char ':'
|
ipv6_chunk'' = do _ <- char ':'
|
||||||
ipv6_chunk
|
ipv6_chunk
|
||||||
|
|
||||||
|
-- | `ipv6_full''` parses a representation without shortcuts ("::").
|
||||||
ipv6_full :: Parser IPv6Error String
|
ipv6_full :: Parser IPv6Error String
|
||||||
ipv6_full = do chunks <- many1 ipv6_chunk'
|
ipv6_full = do chunks <- many1 ipv6_chunk'
|
||||||
pos <- current_position
|
pos <- current_position
|
||||||
|
@ -125,6 +125,8 @@ main = do
|
|||||||
|
|
||||||
log ""
|
log ""
|
||||||
test_ipv6 "2001:0"
|
test_ipv6 "2001:0"
|
||||||
|
test_ipv6 "2001::x:0"
|
||||||
|
test_ipv6 "2001:x::0"
|
||||||
test_ipv6 "2001::0"
|
test_ipv6 "2001::0"
|
||||||
test_ipv6 "2001::1:"
|
test_ipv6 "2001::1:"
|
||||||
test_ipv6 "::"
|
test_ipv6 "::"
|
||||||
|
Loading…
Reference in New Issue
Block a user