Compare commits

..

2 Commits

Author SHA1 Message Date
49eb615b79 Fix comments related to RFC5322. 2024-02-10 16:04:35 +01:00
67e318b949 Split EmailAddress and RFC5322. 2024-02-10 16:02:14 +01:00
3 changed files with 573 additions and 557 deletions

View File

@ -1,574 +1,37 @@
-- | `EmailAddress` is a parser for email addresses, implementing the grammar found in RFC5322. -- | `EmailAddress` is a simplistic parser for email addresses.
-- | For a more serious parser, see the `RFC5322` module.
-- | -- |
-- | STATUS: the parser mostly works, except for comments. -- | STATUS: the parser works for very simplistic email addresses.
-- | Comments provoke a stack overflow, this must be investigated. -- | This shouldn't be used in a serious environment.
-- |
-- | Also, the parser needs a thorough review.
module GenericParser.EmailAddress where module GenericParser.EmailAddress where
import Prelude ((<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, unit) import Prelude (bind, pure, ($), (<>))
import Control.Alt ((<|>)) import Control.Alt ((<|>))
import Control.Lazy (defer)
import Data.Array as A
import Data.Char as C
import Data.Either (Either(..)) import Data.Either (Either(..))
import Data.Maybe (Maybe(..), maybe) import Data.Maybe (Maybe(..))
import Data.String.CodeUnits as CU import Data.String.CodeUnits as CU
import GenericParser.BaseFunctions (id) import GenericParser.Parser (Parser(..), char, item, current_input, failureError, parse, rollback, until)
import GenericParser.Parser (Parser(..)
, sat, char, char_num, char_range, string, item, many1, tryMaybe
, current_input, failureError, parse, rollback, until)
import GenericParser.DomainParser.Common (DomainError) import GenericParser.DomainParser.Common (DomainError)
import GenericParser.DomainParser (sub_eof) import GenericParser.DomainParser (sub_eof)
-- ABNF core rules. -- ABNF core rules.
import GenericParser.RFC5234 (crlf, digit, wsp, vchar, lf, cr) import GenericParser.RFC5234 (digit)
import GenericParser.SomeParsers (letter, alphanum) import GenericParser.SomeParsers (letter)
data EmailError data EmailError
= InvalidCharacter = InvalidCharacter
| InvalidDomain (Maybe DomainError) | InvalidDomain (Maybe DomainError)
-- | obs-FWS: obsolete folding white space.
-- |
-- | obs-FWS = `1*WSP *(CRLF 1*WSP)`
obs_fws :: forall e. Parser e String
obs_fws = do x <- A.many wsp
xs <- A.many $ do v <- crlf
w <- many1 wsp
pure $ v <> CU.fromCharArray w
pure $ A.fold $ [CU.fromCharArray x] <> xs
-- | FWS: folding white space. This can be described in plain english as:
-- | 1. an OPTIONAL line with potential white spaces followed by at least one white space
-- | 2. or, by the obs-FWS rule (meaning: many empty lines)
-- |
-- | FWS = `([*WSP CRLF] 1*WSP) / obs-FWS`
fws :: forall e. Parser e String
fws = do x <- tryMaybe do xs <- A.many wsp
v <- crlf
pure $ CU.fromCharArray xs <> v
w <- many1 wsp
let first_part = maybe "" id x
second_part = CU.fromCharArray w
pure $ first_part <> second_part
<|> obs_fws
-- | ctext: comment text, meaning printable US-ASCII characters excluding '(', ')' and '\'.
-- |
-- | ctext = `%d33-39 / %d42-91 / %d93-126 / obs-ctext`
ctext :: forall e. Parser e Char
ctext = sat cond <|> obs_ctext
where cond x = let charcode = C.toCharCode x
in between 33 39 charcode
|| between 42 91 charcode
|| between 93 126 charcode
-- | `quoted_pair`: pair of characters.
-- |
-- | quoted-pair = `("\" (VCHAR / WSP)) / obs-qp`
quoted_pair :: forall e. Parser e String
quoted_pair = do _ <- char '\\'
v <- vchar <|> wsp
pure $ "\\" <> CU.singleton v
<|> obs_qp
-- | Comment content.
-- |
-- | ccontent = `ctext / quoted-pair / comment`
ccontent :: forall e. Parser e String
ccontent = CU.singleton <$> ctext <|> quoted_pair <|> defer \_ -> comment
-- | Comment. Nothing to return since comments aren't to be processed.
-- |
-- | comment = `"(" *([FWS] ccontent) [FWS] ")"`
comment :: forall e. Parser e String
comment = do _ <- char '('
xs <- A.many do _ <- A.many fws
ccontent
_ <- char ')'
pure $ "(" <> A.fold xs <> ")"
-- | CFWS: comment folding white space.
-- |
-- | CFWS = `(1*([FWS] comment) [FWS]) / FWS`
cfws :: forall e. Parser e String
cfws = do xs <- many1 $ do _ <- tryMaybe fws
comment
pure $ A.fold xs
<|> fws
-- | `address`: email address.
-- |
-- | address = `mailbox / group`
address :: forall e. Parser e String
address = mailbox <|> group
-- | `mailbox`: mail address.
-- |
-- | mailbox = `name-addr / addr-spec`
mailbox :: forall e. Parser e String
mailbox = name_addr <|> addr_spec
-- | `name_addr`: address name.
-- |
-- | name-addr = `[display-name] angle-addr`
name_addr :: forall e. Parser e String
name_addr = do _ <- tryMaybe display_name
angle_addr
-- | `angle_addr` address specification between '<' and '>' characters.
-- |
-- | angle-addr = `[CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr`
angle_addr :: forall e. Parser e String
angle_addr = do _ <- tryMaybe cfws
_ <- char '<'
a <- addr_spec
_ <- char '>'
_ <- tryMaybe cfws
pure $ "<" <> a <> ">"
<|> obs_angle_addr
-- | `group`: a list of email addresses.
-- |
-- | group = `display-name ":" [group-list] ";" [CFWS]`
group :: forall e. Parser e String
group = do _ <- display_name
_ <- char ':'
xs <- tryMaybe group_list
_ <- char ';'
_ <- tryMaybe cfws
let res = ":" <> maybe "" id xs <> ";"
pure res
-- | `display_name`: displayed name, not the actual email address.
-- |
-- | display-name = `phrase`
display_name :: forall e. Parser e String
display_name = phrase
-- | `mailbox_list`
-- |
-- | mailbox-list = `(mailbox *("," mailbox)) / obs-mbox-list`
mailbox_list :: forall e. Parser e String
mailbox_list = do mb <- mailbox
xs <- A.many $ do _ <- char ','
mx <- mailbox
pure $ "," <> mx
pure $ mb <> A.fold xs
<|> obs_mbox_list
-- | `address_list`
-- |
-- | address-list = `(address *("," address)) / obs-addr-list`
address_list :: forall e. Parser e String
address_list = do a <- address
xs <- A.many do _ <- char ','
address
pure $ A.fold $ A.intersperse "," $ [a] <> xs
<|> obs_addr_list
-- | `group_list`
-- |
-- | group-list = `mailbox-list / CFWS / obs-group-list`
group_list :: forall e. Parser e String
group_list = mailbox_list <|> cfws <|> obs_group_list
-- | `atext`: atom accepted characters.
-- | Printable US-ASCII characters not including specials.
-- |
-- | atext = `ALPHA / DIGIT / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" /`
-- | `"-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}" / "~"`
atext :: forall e. Parser e Char
atext = alphanum
<|> char '!' <|> char '#'
<|> char '$' <|> char '%'
<|> char '&' <|> char '\''
<|> char '*' <|> char '+'
<|> char '-' <|> char '/'
<|> char '=' <|> char '?'
<|> char '^' <|> char '_'
<|> char '`' <|> char '{'
<|> char '|' <|> char '}'
<|> char '~'
-- | `atom`
-- |
-- | atom = `[CFWS] 1*atext [CFWS]`
atom :: forall e. Parser e String
atom = CU.fromCharArray <$> do _ <- tryMaybe cfws
a <- many1 atext
_ <- tryMaybe cfws
pure a
-- | `dot_atom_text`
-- |
-- | dot-atom-text = `1*atext *("." 1*atext)`
dot_atom_text :: forall e. Parser e String
dot_atom_text = do xs0 <- many1 atext
xs1 <- A.many $ do _ <- char '.'
xs <- many1 atext
pure $ "." <> CU.fromCharArray xs
let str0 = CU.fromCharArray xs0
str1 = A.fold xs1
pure $ str0 <> str1
-- | `dot_atom`
-- |
-- | dot-atom = `[CFWS] dot-atom-text [CFWS]`
dot_atom :: forall e. Parser e String
dot_atom = do _ <- tryMaybe cfws
x <- dot_atom_text
_ <- tryMaybe cfws
pure x
-- | `specials`: special characters that do not appear in `atext`.
-- |
-- | specials = `"(" / ")" / "<" / ">" / "[" / "]" / ":" / ";" / "@" /`
-- | `"\" / "," / "." / DQUOTE`
specials :: forall e. Parser e Char
specials = char '('
<|> char ')'
<|> char '<'
<|> char '>'
<|> char '['
<|> char ']'
<|> char ':'
<|> char ';'
<|> char '@'
<|> char '\\'
<|> char ','
<|> char '.'
<|> char '"'
-- | `addr_spec`
-- |
-- | addr-spec = `local-part "@" domain`
addr_spec :: forall e. Parser e String
addr_spec = do lpart <- local_part
_ <- char '@'
dom <- domain
pure $ lpart <> "@" <> dom
-- | `local_part`
-- |
-- | local-part = `dot-atom / quoted-string / obs-local-part`
local_part :: forall e. Parser e String
local_part = dot_atom <|> quoted_string <|> obs_local_part
-- | `domain`: this is a parser for a domain.
-- | The parser from `GenericParser.DomainParser` cannot be used, these parsers are different.
-- | `DomainParser` is more strict, it doesn't allow comments or IP addresses for example.
-- | Therefore, errors are differents from a parser to another.
-- |
-- | domain = `dot-atom / domain-literal / obs-domain`
domain :: forall e. Parser e String
domain = dot_atom <|> domain_literal <|> obs_domain
-- | `domain_literal`
-- |
-- | domain-literal = `[CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]`
domain_literal :: forall e. Parser e String
domain_literal = do s <- tryMaybe cfws
_ <- char '['
xs <- A.many do _ <- tryMaybe fws
dtext
m <- tryMaybe fws
_ <- char ']'
e <- tryMaybe cfws
pure $ maybe "" id s
<> "[" <> A.fold xs <> maybe "" id m <> "]"
<> maybe "" id e
-- | dtext: characters in domains.
-- | Printable US-ASCII characters not including "[", "]", or "\".
-- |
-- | dtext = `%d33-90 / %d94-126 / obs-dtext`
dtext :: forall e. Parser e String
dtext = CU.singleton <$> sat cond <|> obs_dtext
where cond x = let charcode = C.toCharCode x
in between 33 90 charcode || between 94 126 charcode
-- | `obs_angle_addr`: obsolete address specification between '<' and '>' characters.
-- |
-- | obs-angle-addr = `[CFWS] "<" obs-route addr-spec ">" [CFWS]`
obs_angle_addr :: forall e. Parser e String
obs_angle_addr = do s <- tryMaybe cfws
_ <- char '<'
r <- obs_route
a <- addr_spec
_ <- char '>'
e <- tryMaybe cfws
pure $ maybe "" id s <> "<" <> r <> a <> ">" <> maybe "" id e
-- | `obs_route`
-- |
-- | obs-route = `obs-domain-list ":"`
obs_route :: forall e. Parser e String
obs_route = do l <- obs_domain_list
_ <- char ':'
pure $ l <> ":"
-- | `obs_domain_list`
-- |
-- | obs-domain-list = `*(CFWS / ",") "@" domain *("," [CFWS] ["@" domain])`
obs_domain_list :: forall e. Parser e String
obs_domain_list = do s <- A.many $ cfws <|> CU.singleton <$> char ','
_ <- char '@'
d <- domain
xs <- A.many do _ <- char ','
c <- tryMaybe cfws
_ <- char '@'
d2 <- domain
pure $ "," <> maybe "" id c <> d2
pure $ A.fold s <> d <> A.fold xs
-- | `obs_mbox_list`
-- |
-- | obs-mbox-list = `*([CFWS] ",") mailbox *("," [mailbox / CFWS])`
obs_mbox_list :: forall e. Parser e String
obs_mbox_list = do _ <- A.many $ do _ <- cfws
_ <- char ','
pure unit
mb <- mailbox
xs <- A.many $ do _ <- char ','
x <- mailbox <|> do _ <- cfws
pure ""
pure x
pure $ mb <> A.fold xs
-- | `obs_addr_list`
-- |
-- | obs-addr-list = `*([CFWS] ",") address *("," [address / CFWS])`
obs_addr_list :: forall e. Parser e String
obs_addr_list = do _ <- A.many do _ <- tryMaybe cfws
char ','
a <- address
xs <- A.many do _ <- char ','
address <|> do _ <- cfws
pure ""
let res = A.fold $ A.intersperse "," $ [a] <> xs
pure res
-- | `obs_group_list`
-- |
-- | obs-group-list = `1*([CFWS] ",") [CFWS]`
obs_group_list :: forall e. Parser e String
obs_group_list = do xs <- many1 $ do x <- tryMaybe cfws
_ <- char ','
pure $ maybe "" id x <> ","
c <- tryMaybe cfws
pure $ A.fold xs <> maybe "" id c
-- | `obs_local_part`
-- |
-- | obs-local-part = `word *("." word)`
obs_local_part :: forall e. Parser e String
obs_local_part = do w <- word
ws <- A.many $ do _ <- char '.'
w1 <- word
pure $ "." <> w1
pure $ w <> A.fold ws
-- | `obs_domain`
-- |
-- | obs-domain = `atom *("." atom)`
obs_domain :: forall e. Parser e String
obs_domain = do a <- atom
xs <- A.many $ do _ <- char '.'
x <- atom
pure $ "." <> x
pure $ a <> A.fold xs
-- | `obs_dtext`: obsolete domain text.
-- |
-- | obs-dtext = `obs-NO-WS-CTL / quoted-pair`
obs_dtext :: forall e. Parser e String
obs_dtext = CU.singleton <$> obs_no_ws_ctl <|> quoted_pair
-- | obs-NO-WS-CTL: US-ASCII control characters without carriage return,
-- | line feed and white space characters.
-- |
-- | obs-NO-WS-CTL = `%d1-8 / %d11 / %d12 / %d14-31 / %d127`
obs_no_ws_ctl :: forall e. Parser e Char
obs_no_ws_ctl = sat cond
where cond x = let charcode = C.toCharCode x
in between 1 8 charcode
|| between 11 12 charcode
|| between 14 31 charcode
|| charcode == 127
-- | obs-ctext: obsolete comment text.
-- |
-- | obs-ctext = `obs-NO-WS-CTL`
obs_ctext :: forall e. Parser e Char
obs_ctext = obs_no_ws_ctl
-- | `obs_qtext`: obsolete accepted quoted text.
-- |
-- | obs-qtext = `obs-NO-WS-CTL`
obs_qtext :: forall e. Parser e Char
obs_qtext = obs_no_ws_ctl
-- | `obs_utext`: obsolete text.
-- |
-- | obs-utext = `%d0 / obs-NO-WS-CTL / VCHAR`
obs_utext :: forall e. Parser e Char
obs_utext = char_num 0 <|> obs_no_ws_ctl <|> vchar
-- | `obs_qp`: obsolete quoted-pair rule.
-- |
-- | obs-qp = `"\" (%d0 / obs-NO-WS-CTL / LF / CR)`
obs_qp :: forall e. Parser e String
obs_qp = do _ <- char '\\'
v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr
pure $ "\\" <> CU.singleton v
-- | `obs_body`: obsolete body.
-- |
-- | Note: the simpler version found in the errata is implemented, which basically accept everything.
-- |
-- | Note: `text` is replaced by `vchar`.
-- |
-- | (RFC)
-- | obs-body = `*((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)`
-- |
-- | (RFC Errata v1)
-- | obs-body = `*(%d0-127)`
-- |
-- | (RFC Errata v2)
-- | obs-body = `*(d0 /text / LF / CR)`
-- Errata v1
--obs_body :: forall e. Parser e String
--obs_body = CU.fromCharArray <$> A.many item
-- Errata v2
obs_body :: forall e. Parser e String
obs_body = CU.fromCharArray <$> do A.many (char_num 0 <|> vchar <|> lf <|> cr)
--obs_body original
--obs_body :: forall e. Parser e String
--obs_body = do A.many $ do _ <- A.many lf
-- _ <- A.many cr
-- v <- A.many $ do x <- char_num 0 <|> text
-- _ <- A.many lf
-- _ <- A.many cr
-- pure x
-- pure $ A.fold v
-- <|> _ <- crlf
-- pure ""
-- | `obs_unstruct`
-- |
-- | Note: implement the version found in the Errata page.
-- |
-- | obs-unstruct = `*((*LF *CR *(obs-utext *LF *CR)) / FWS)`
-- |
-- | (RFC Errata)
-- | obs-unstruct = `*( (*CR 1*(obs-utext / FWS)) / 1*LF ) *CR`
obs_unstruct :: forall e. Parser e String
obs_unstruct = (CU.fromCharArray <<< A.fold) <$> A.many do _ <- A.many cr
many1 (obs_utext <|> do _ <- fws
pure ' ')
<|> do _ <- fws
pure ""
-- | `obs_phrase`: obsolete "phrase".
-- |
-- | obs-phrase = `word *(word / "." / CFWS)`
obs_phrase :: forall e. Parser e String
obs_phrase = do w <- word
ws <- A.many (word <|> string "." <|> do _ <- cfws
pure "")
pure $ w <> A.fold ws
-- | `word`.
-- |
-- | word = `atom / quoted-string`
word :: forall e. Parser e String
word = atom <|> quoted_string
-- | `phrase`: list of words (at least one) or the obsolete phrase rule.
-- |
-- | phrase = `1*word / obs-phrase`
phrase :: forall e. Parser e String
phrase = do ws <- many1 word
pure $ A.fold ws
<|> obs_phrase
-- | `unstructured`
-- |
-- | unstructured = `(*([FWS] VCHAR) *WSP) / obs-unstruct`
unstructured :: forall e. Parser e String
unstructured = do v <- A.many $ do _ <- fws
vchar
_ <- A.many wsp
pure $ CU.fromCharArray v
<|> obs_unstruct
-- | `obs_phrase_list`: obsolete list of phrases.
-- |
-- | obs-phrase-list = `[phrase / CFWS] *("," [phrase / CFWS])`
obs_phrase_list :: forall e. Parser e String
obs_phrase_list = do first_phrase <- phrase_or_cfws
xs <- A.many $ do _ <- char ','
phrase_or_cfws
pure $ A.fold $ [first_phrase] <> xs
where phrase_or_cfws = do first_phrase <- tryMaybe phrase
case first_phrase of
Nothing -> do _ <- cfws
pure ""
Just x -> pure x
-- | `qtext`: printable US-ASCII characters not including "\" or the quote character.
-- |
-- | WARNING: contrary to the RFC, the `qtext` rule accepts a space character (`%d32`).
-- |
-- | qtext = `%d33 / %d35-91 / %d93-126 / obs-qtext`
qtext :: forall e. Parser e Char
qtext = char_range 32 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext
-- | `qcontent`
-- |
-- | qcontent = `qtext / quoted-pair`
qcontent :: forall e. Parser e String
qcontent = CU.singleton <$> qtext <|> quoted_pair
-- | `quoted_string`
-- |
-- | WARNING: this rule was changed in order to take into account the new `qtext` rule,
-- | which now accepts the space character. This new rule, as implemented, still allows
-- | for multiple line returns in the quoted string.
-- |
-- |
-- | Original RFC5322 rule:
-- | quoted-string = `[CFWS] DQUOTE *([FWS] qcontent) [FWS] DQUOTE [CFWS]`
-- |
-- | Implemented rule:
-- | quoted-string = `[CFWS] DQUOTE *([CRLF] qcontent) [FWS] DQUOTE [CFWS]`
quoted_string :: forall e. Parser e String
quoted_string = do s <- tryMaybe cfws
_ <- char '"'
m <- A.many do l <- tryMaybe crlf
c <- qcontent
pure $ maybe "" id l <> c
_ <- char '"'
e <- tryMaybe cfws
pure $ maybe "" id s <> "\"" <> A.fold m <> "\"" <> maybe "" id e
-- | TODO: For now, `local_part` only checks that -- | TODO: For now, `local_part` only checks that
-- | (a) the first character is a letter, -- | (a) the first character is a letter,
-- | (b) the last character is either a letter or a digit. -- | (b) the last character is either a letter or a digit.
-- | The rest can be any letter, digit, '-' or '.'. -- | The rest can be any letter, digit, '-' or '.'.
local_part' :: Parser EmailError String local_part :: Parser EmailError String
local_part' = do firstchar <- letter local_part = do firstchar <- letter
rest <- until end (letter <|> digit <|> char '-' <|> char '.') rest <- until end (letter <|> digit <|> char '-' <|> char '.')
lastchar <- letter <|> digit lastchar <- letter <|> digit
pure $ CU.fromCharArray $ [firstchar] <> rest <> [lastchar] pure $ CU.fromCharArray $ [firstchar] <> rest <> [lastchar]
where where
end :: forall e. Parser e Char end :: forall e. Parser e Char
end = do c <- item end = do c <- item
@ -577,7 +40,7 @@ local_part' = do firstchar <- letter
-- | `email` is the parser for email addresses. -- | `email` is the parser for email addresses.
email :: Parser EmailError String email :: Parser EmailError String
email = do login <- local_part' email = do login <- local_part
_ <- char '@' _ <- char '@'
input <- current_input input <- current_input
case parse sub_eof input of case parse sub_eof input of

View File

@ -0,0 +1,552 @@
-- | `EmailAddress` is a parser for email addresses, implementing the grammar found in RFC5322.
-- |
-- | STATUS: the parser mostly works, except for comments.
-- | Comments provoke a stack overflow, this must be investigated.
-- |
-- | Also, the parser needs a thorough review.
module GenericParser.RFC5322 where
import Prelude ((<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, unit)
import Control.Alt ((<|>))
import Control.Lazy (defer)
import Data.Array as A
import Data.Char as C
import Data.Maybe (Maybe(..), maybe)
import Data.String.CodeUnits as CU
import GenericParser.BaseFunctions (id)
import GenericParser.Parser (Parser, sat, char, char_num, char_range, string, many1, tryMaybe)
-- ABNF core rules.
import GenericParser.RFC5234 (crlf, wsp, vchar, lf, cr)
import GenericParser.SomeParsers (alphanum)
-- | obs-FWS: obsolete folding white space.
-- |
-- | obs-FWS = `1*WSP *(CRLF 1*WSP)`
obs_fws :: forall e. Parser e String
obs_fws = do x <- A.many wsp
xs <- A.many $ do v <- crlf
w <- many1 wsp
pure $ v <> CU.fromCharArray w
pure $ A.fold $ [CU.fromCharArray x] <> xs
-- | FWS: folding white space. This can be described in plain english as:
-- | 1. an OPTIONAL line with potential white spaces followed by at least one white space
-- | 2. or, by the obs-FWS rule (meaning: many empty lines)
-- |
-- | FWS = `([*WSP CRLF] 1*WSP) / obs-FWS`
fws :: forall e. Parser e String
fws = do x <- tryMaybe do xs <- A.many wsp
v <- crlf
pure $ CU.fromCharArray xs <> v
w <- many1 wsp
let first_part = maybe "" id x
second_part = CU.fromCharArray w
pure $ first_part <> second_part
<|> obs_fws
-- | ctext: comment text, meaning printable US-ASCII characters excluding '(', ')' and '\'.
-- |
-- | ctext = `%d33-39 / %d42-91 / %d93-126 / obs-ctext`
ctext :: forall e. Parser e Char
ctext = sat cond <|> obs_ctext
where cond x = let charcode = C.toCharCode x
in between 33 39 charcode
|| between 42 91 charcode
|| between 93 126 charcode
-- | `quoted_pair`: pair of characters.
-- |
-- | quoted-pair = `("\" (VCHAR / WSP)) / obs-qp`
quoted_pair :: forall e. Parser e String
quoted_pair = do _ <- char '\\'
v <- vchar <|> wsp
pure $ "\\" <> CU.singleton v
<|> obs_qp
-- | Comment content.
-- |
-- | ccontent = `ctext / quoted-pair / comment`
ccontent :: forall e. Parser e String
ccontent = CU.singleton <$> ctext <|> quoted_pair <|> defer \_ -> comment
-- | Comment. Nothing to return since comments aren't to be processed.
-- |
-- | comment = `"(" *([FWS] ccontent) [FWS] ")"`
comment :: forall e. Parser e String
comment = do _ <- char '('
xs <- A.many do _ <- A.many fws
ccontent
_ <- char ')'
pure $ "(" <> A.fold xs <> ")"
-- | CFWS: comment folding white space.
-- |
-- | CFWS = `(1*([FWS] comment) [FWS]) / FWS`
cfws :: forall e. Parser e String
cfws = do xs <- many1 $ do _ <- tryMaybe fws
comment
pure $ A.fold xs
<|> fws
-- | `address`: email address.
-- |
-- | address = `mailbox / group`
address :: forall e. Parser e String
address = mailbox <|> group
-- | `mailbox`: mail address.
-- |
-- | mailbox = `name-addr / addr-spec`
mailbox :: forall e. Parser e String
mailbox = name_addr <|> addr_spec
-- | `name_addr`: address name.
-- |
-- | name-addr = `[display-name] angle-addr`
name_addr :: forall e. Parser e String
name_addr = do _ <- tryMaybe display_name
angle_addr
-- | `angle_addr` address specification between '<' and '>' characters.
-- |
-- | angle-addr = `[CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr`
angle_addr :: forall e. Parser e String
angle_addr = do _ <- tryMaybe cfws
_ <- char '<'
a <- addr_spec
_ <- char '>'
_ <- tryMaybe cfws
pure $ "<" <> a <> ">"
<|> obs_angle_addr
-- | `group`: a list of email addresses.
-- |
-- | group = `display-name ":" [group-list] ";" [CFWS]`
group :: forall e. Parser e String
group = do _ <- display_name
_ <- char ':'
xs <- tryMaybe group_list
_ <- char ';'
_ <- tryMaybe cfws
let res = ":" <> maybe "" id xs <> ";"
pure res
-- | `display_name`: displayed name, not the actual email address.
-- |
-- | display-name = `phrase`
display_name :: forall e. Parser e String
display_name = phrase
-- | `mailbox_list`
-- |
-- | mailbox-list = `(mailbox *("," mailbox)) / obs-mbox-list`
mailbox_list :: forall e. Parser e String
mailbox_list = do mb <- mailbox
xs <- A.many $ do _ <- char ','
mx <- mailbox
pure $ "," <> mx
pure $ mb <> A.fold xs
<|> obs_mbox_list
-- | `address_list`
-- |
-- | address-list = `(address *("," address)) / obs-addr-list`
address_list :: forall e. Parser e String
address_list = do a <- address
xs <- A.many do _ <- char ','
address
pure $ A.fold $ A.intersperse "," $ [a] <> xs
<|> obs_addr_list
-- | `group_list`
-- |
-- | group-list = `mailbox-list / CFWS / obs-group-list`
group_list :: forall e. Parser e String
group_list = mailbox_list <|> cfws <|> obs_group_list
-- | `atext`: atom accepted characters.
-- | Printable US-ASCII characters not including specials.
-- |
-- | atext = `ALPHA / DIGIT / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" /`
-- | `"-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}" / "~"`
atext :: forall e. Parser e Char
atext = alphanum
<|> char '!' <|> char '#'
<|> char '$' <|> char '%'
<|> char '&' <|> char '\''
<|> char '*' <|> char '+'
<|> char '-' <|> char '/'
<|> char '=' <|> char '?'
<|> char '^' <|> char '_'
<|> char '`' <|> char '{'
<|> char '|' <|> char '}'
<|> char '~'
-- | `atom`
-- |
-- | atom = `[CFWS] 1*atext [CFWS]`
atom :: forall e. Parser e String
atom = CU.fromCharArray <$> do _ <- tryMaybe cfws
a <- many1 atext
_ <- tryMaybe cfws
pure a
-- | `dot_atom_text`
-- |
-- | dot-atom-text = `1*atext *("." 1*atext)`
dot_atom_text :: forall e. Parser e String
dot_atom_text = do xs0 <- many1 atext
xs1 <- A.many $ do _ <- char '.'
xs <- many1 atext
pure $ "." <> CU.fromCharArray xs
let str0 = CU.fromCharArray xs0
str1 = A.fold xs1
pure $ str0 <> str1
-- | `dot_atom`
-- |
-- | dot-atom = `[CFWS] dot-atom-text [CFWS]`
dot_atom :: forall e. Parser e String
dot_atom = do _ <- tryMaybe cfws
x <- dot_atom_text
_ <- tryMaybe cfws
pure x
-- | `specials`: special characters that do not appear in `atext`.
-- |
-- | specials = `"(" / ")" / "<" / ">" / "[" / "]" / ":" / ";" / "@" /`
-- | `"\" / "," / "." / DQUOTE`
specials :: forall e. Parser e Char
specials = char '('
<|> char ')'
<|> char '<'
<|> char '>'
<|> char '['
<|> char ']'
<|> char ':'
<|> char ';'
<|> char '@'
<|> char '\\'
<|> char ','
<|> char '.'
<|> char '"'
-- | `addr_spec`
-- |
-- | addr-spec = `local-part "@" domain`
addr_spec :: forall e. Parser e String
addr_spec = do lpart <- local_part
_ <- char '@'
dom <- domain
pure $ lpart <> "@" <> dom
-- | `local_part`
-- |
-- | local-part = `dot-atom / quoted-string / obs-local-part`
local_part :: forall e. Parser e String
local_part = dot_atom <|> quoted_string <|> obs_local_part
-- | `domain`: this is a parser for a domain.
-- | The parser from `GenericParser.DomainParser` cannot be used, these parsers are different.
-- | `DomainParser` is more strict, it doesn't allow comments or IP addresses for example.
-- | Therefore, errors are differents from a parser to another.
-- |
-- | domain = `dot-atom / domain-literal / obs-domain`
domain :: forall e. Parser e String
domain = dot_atom <|> domain_literal <|> obs_domain
-- | `domain_literal`
-- |
-- | domain-literal = `[CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]`
domain_literal :: forall e. Parser e String
domain_literal = do s <- tryMaybe cfws
_ <- char '['
xs <- A.many do _ <- tryMaybe fws
dtext
m <- tryMaybe fws
_ <- char ']'
e <- tryMaybe cfws
pure $ maybe "" id s
<> "[" <> A.fold xs <> maybe "" id m <> "]"
<> maybe "" id e
-- | dtext: characters in domains.
-- | Printable US-ASCII characters not including "[", "]", or "\".
-- |
-- | dtext = `%d33-90 / %d94-126 / obs-dtext`
dtext :: forall e. Parser e String
dtext = CU.singleton <$> sat cond <|> obs_dtext
where cond x = let charcode = C.toCharCode x
in between 33 90 charcode || between 94 126 charcode
-- | `obs_angle_addr`: obsolete address specification between '<' and '>' characters.
-- |
-- | obs-angle-addr = `[CFWS] "<" obs-route addr-spec ">" [CFWS]`
obs_angle_addr :: forall e. Parser e String
obs_angle_addr = do s <- tryMaybe cfws
_ <- char '<'
r <- obs_route
a <- addr_spec
_ <- char '>'
e <- tryMaybe cfws
pure $ maybe "" id s <> "<" <> r <> a <> ">" <> maybe "" id e
-- | `obs_route`
-- |
-- | obs-route = `obs-domain-list ":"`
obs_route :: forall e. Parser e String
obs_route = do l <- obs_domain_list
_ <- char ':'
pure $ l <> ":"
-- | `obs_domain_list`
-- |
-- | obs-domain-list = `*(CFWS / ",") "@" domain *("," [CFWS] ["@" domain])`
obs_domain_list :: forall e. Parser e String
obs_domain_list = do s <- A.many $ cfws <|> CU.singleton <$> char ','
_ <- char '@'
d <- domain
xs <- A.many do _ <- char ','
c <- tryMaybe cfws
_ <- char '@'
d2 <- domain
pure $ "," <> maybe "" id c <> d2
pure $ A.fold s <> d <> A.fold xs
-- | `obs_mbox_list`
-- |
-- | obs-mbox-list = `*([CFWS] ",") mailbox *("," [mailbox / CFWS])`
obs_mbox_list :: forall e. Parser e String
obs_mbox_list = do _ <- A.many $ do _ <- cfws
_ <- char ','
pure unit
mb <- mailbox
xs <- A.many $ do _ <- char ','
x <- mailbox <|> do _ <- cfws
pure ""
pure x
pure $ mb <> A.fold xs
-- | `obs_addr_list`
-- |
-- | obs-addr-list = `*([CFWS] ",") address *("," [address / CFWS])`
obs_addr_list :: forall e. Parser e String
obs_addr_list = do _ <- A.many do _ <- tryMaybe cfws
char ','
a <- address
xs <- A.many do _ <- char ','
address <|> do _ <- cfws
pure ""
let res = A.fold $ A.intersperse "," $ [a] <> xs
pure res
-- | `obs_group_list`
-- |
-- | obs-group-list = `1*([CFWS] ",") [CFWS]`
obs_group_list :: forall e. Parser e String
obs_group_list = do xs <- many1 $ do x <- tryMaybe cfws
_ <- char ','
pure $ maybe "" id x <> ","
c <- tryMaybe cfws
pure $ A.fold xs <> maybe "" id c
-- | `obs_local_part`
-- |
-- | obs-local-part = `word *("." word)`
obs_local_part :: forall e. Parser e String
obs_local_part = do w <- word
ws <- A.many $ do _ <- char '.'
w1 <- word
pure $ "." <> w1
pure $ w <> A.fold ws
-- | `obs_domain`
-- |
-- | obs-domain = `atom *("." atom)`
obs_domain :: forall e. Parser e String
obs_domain = do a <- atom
xs <- A.many $ do _ <- char '.'
x <- atom
pure $ "." <> x
pure $ a <> A.fold xs
-- | `obs_dtext`: obsolete domain text.
-- |
-- | obs-dtext = `obs-NO-WS-CTL / quoted-pair`
obs_dtext :: forall e. Parser e String
obs_dtext = CU.singleton <$> obs_no_ws_ctl <|> quoted_pair
-- | obs-NO-WS-CTL: US-ASCII control characters without carriage return,
-- | line feed and white space characters.
-- |
-- | obs-NO-WS-CTL = `%d1-8 / %d11 / %d12 / %d14-31 / %d127`
obs_no_ws_ctl :: forall e. Parser e Char
obs_no_ws_ctl = sat cond
where cond x = let charcode = C.toCharCode x
in between 1 8 charcode
|| between 11 12 charcode
|| between 14 31 charcode
|| charcode == 127
-- | obs-ctext: obsolete comment text.
-- |
-- | obs-ctext = `obs-NO-WS-CTL`
obs_ctext :: forall e. Parser e Char
obs_ctext = obs_no_ws_ctl
-- | `obs_qtext`: obsolete accepted quoted text.
-- |
-- | obs-qtext = `obs-NO-WS-CTL`
obs_qtext :: forall e. Parser e Char
obs_qtext = obs_no_ws_ctl
-- | `obs_utext`: obsolete text.
-- |
-- | obs-utext = `%d0 / obs-NO-WS-CTL / VCHAR`
obs_utext :: forall e. Parser e Char
obs_utext = char_num 0 <|> obs_no_ws_ctl <|> vchar
-- | `obs_qp`: obsolete quoted-pair rule.
-- |
-- | obs-qp = `"\" (%d0 / obs-NO-WS-CTL / LF / CR)`
obs_qp :: forall e. Parser e String
obs_qp = do _ <- char '\\'
v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr
pure $ "\\" <> CU.singleton v
-- | `obs_body`: obsolete body.
-- |
-- | Note: the simpler version found in the errata is implemented, which basically accept everything.
-- |
-- | Note: `text` is replaced by `vchar`.
-- |
-- | (RFC)
-- | obs-body = `*((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)`
-- |
-- | (RFC Errata v1)
-- | obs-body = `*(%d0-127)`
-- |
-- | (RFC Errata v2)
-- | obs-body = `*(d0 /text / LF / CR)`
-- Errata v1
--obs_body :: forall e. Parser e String
--obs_body = CU.fromCharArray <$> A.many item
-- Errata v2
obs_body :: forall e. Parser e String
obs_body = CU.fromCharArray <$> do A.many (char_num 0 <|> vchar <|> lf <|> cr)
--obs_body original
--obs_body :: forall e. Parser e String
--obs_body = do A.many $ do _ <- A.many lf
-- _ <- A.many cr
-- v <- A.many $ do x <- char_num 0 <|> text
-- _ <- A.many lf
-- _ <- A.many cr
-- pure x
-- pure $ A.fold v
-- <|> _ <- crlf
-- pure ""
-- | `obs_unstruct`
-- |
-- | Note: implement the version found in the Errata page.
-- |
-- | obs-unstruct = `*((*LF *CR *(obs-utext *LF *CR)) / FWS)`
-- |
-- | (RFC Errata)
-- | obs-unstruct = `*( (*CR 1*(obs-utext / FWS)) / 1*LF ) *CR`
obs_unstruct :: forall e. Parser e String
obs_unstruct = (CU.fromCharArray <<< A.fold) <$> A.many do _ <- A.many cr
many1 (obs_utext <|> do _ <- fws
pure ' ')
<|> do _ <- fws
pure ""
-- | `obs_phrase`: obsolete "phrase".
-- |
-- | obs-phrase = `word *(word / "." / CFWS)`
obs_phrase :: forall e. Parser e String
obs_phrase = do w <- word
ws <- A.many (word <|> string "." <|> do _ <- cfws
pure "")
pure $ w <> A.fold ws
-- | `word`.
-- |
-- | word = `atom / quoted-string`
word :: forall e. Parser e String
word = atom <|> quoted_string
-- | `phrase`: list of words (at least one) or the obsolete phrase rule.
-- |
-- | phrase = `1*word / obs-phrase`
phrase :: forall e. Parser e String
phrase = do ws <- many1 word
pure $ A.fold ws
<|> obs_phrase
-- | `unstructured`
-- |
-- | unstructured = `(*([FWS] VCHAR) *WSP) / obs-unstruct`
unstructured :: forall e. Parser e String
unstructured = do v <- A.many $ do _ <- fws
vchar
_ <- A.many wsp
pure $ CU.fromCharArray v
<|> obs_unstruct
-- | `obs_phrase_list`: obsolete list of phrases.
-- |
-- | obs-phrase-list = `[phrase / CFWS] *("," [phrase / CFWS])`
obs_phrase_list :: forall e. Parser e String
obs_phrase_list = do first_phrase <- phrase_or_cfws
xs <- A.many $ do _ <- char ','
phrase_or_cfws
pure $ A.fold $ [first_phrase] <> xs
where phrase_or_cfws = do first_phrase <- tryMaybe phrase
case first_phrase of
Nothing -> do _ <- cfws
pure ""
Just x -> pure x
-- | `qtext`: printable US-ASCII characters not including "\" or the quote character.
-- |
-- | WARNING: contrary to the RFC, the `qtext` rule accepts a space character (`%d32`).
-- |
-- | qtext = `%d33 / %d35-91 / %d93-126 / obs-qtext`
qtext :: forall e. Parser e Char
qtext = char_range 32 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext
-- | `qcontent`
-- |
-- | qcontent = `qtext / quoted-pair`
qcontent :: forall e. Parser e String
qcontent = CU.singleton <$> qtext <|> quoted_pair
-- | `quoted_string`
-- |
-- | WARNING: this rule was changed in order to take into account the new `qtext` rule,
-- | which now accepts the space character. This new rule, as implemented, still allows
-- | for multiple line returns in the quoted string.
-- |
-- |
-- | Original RFC5322 rule:
-- | quoted-string = `[CFWS] DQUOTE *([FWS] qcontent) [FWS] DQUOTE [CFWS]`
-- |
-- | Implemented rule:
-- | quoted-string = `[CFWS] DQUOTE *([CRLF] qcontent) [FWS] DQUOTE [CFWS]`
quoted_string :: forall e. Parser e String
quoted_string = do s <- tryMaybe cfws
_ <- char '"'
m <- A.many do l <- tryMaybe crlf
c <- qcontent
pure $ maybe "" id l <> c
_ <- char '"'
e <- tryMaybe cfws
pure $ maybe "" id s <> "\"" <> A.fold m <> "\"" <> maybe "" id e

View File

@ -14,7 +14,8 @@ import GenericParser.DomainParserRFC1035 as RFC1035
import GenericParser.DomainParser as ModernDomains import GenericParser.DomainParser as ModernDomains
import GenericParser.IPAddress (IPv4Error(..)) import GenericParser.IPAddress (IPv4Error(..))
import GenericParser.IPAddress as IP import GenericParser.IPAddress as IP
import GenericParser.EmailAddress as E import GenericParser.EmailAddress as EA
import GenericParser.RFC5322 as E
import Test.TestValues as T import Test.TestValues as T
run :: forall e v. Parser e v -> String -> P.Result e v run :: forall e v. Parser e v -> String -> P.Result e v
@ -77,9 +78,9 @@ showerror_ipv4 :: IP.IPv4Error -> String
showerror_ipv4 (IP4NumberTooBig x) = "value '" <> show x <> "' is > 255" showerror_ipv4 (IP4NumberTooBig x) = "value '" <> show x <> "' is > 255"
showerror_ipv4 IP4IrrelevantShortRepresentation = "useless double dots" showerror_ipv4 IP4IrrelevantShortRepresentation = "useless double dots"
showerror_email :: E.EmailError -> String showerror_email :: EA.EmailError -> String
showerror_email E.InvalidCharacter = "InvalidCharacter" showerror_email EA.InvalidCharacter = "InvalidCharacter"
showerror_email (E.InvalidDomain e) = "invalid domain: " <> maybe "no domain error provided, weird" showerror e showerror_email (EA.InvalidDomain e) = "invalid domain: " <> maybe "no domain error provided, weird" showerror e
main :: Effect Unit main :: Effect Unit
main = do main = do
@ -102,7 +103,7 @@ main = do
test_series "IP.ipv6" IP.ipv6 id showerror_ipv6 T.ipv6_addresses test_series "IP.ipv6" IP.ipv6 id showerror_ipv6 T.ipv6_addresses
log "" log ""
test_series "E.email" E.email id showerror_email T.valid_email_addresses test_series "EA.email" EA.email id showerror_email T.valid_email_addresses
log "" log ""
test_series "E.address (short)" E.address id showerror_email T.valid_email_addresses_short test_series "E.address (short)" E.address id showerror_email T.valid_email_addresses_short