diff --git a/src/GenericParser/EmailAddress.purs b/src/GenericParser/EmailAddress.purs index 9ff7eb0..c735796 100644 --- a/src/GenericParser/EmailAddress.purs +++ b/src/GenericParser/EmailAddress.purs @@ -6,569 +6,33 @@ -- | Also, the parser needs a thorough review. module GenericParser.EmailAddress where -import Prelude ((<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, unit) +import Prelude (bind, pure, ($), (<>)) import Control.Alt ((<|>)) -import Control.Lazy (defer) -import Data.Array as A -import Data.Char as C import Data.Either (Either(..)) -import Data.Maybe (Maybe(..), maybe) +import Data.Maybe (Maybe(..)) import Data.String.CodeUnits as CU -import GenericParser.BaseFunctions (id) -import GenericParser.Parser (Parser(..) - , sat, char, char_num, char_range, string, item, many1, tryMaybe - , current_input, failureError, parse, rollback, until) +import GenericParser.Parser (Parser(..), char, item, current_input, failureError, parse, rollback, until) import GenericParser.DomainParser.Common (DomainError) import GenericParser.DomainParser (sub_eof) -- ABNF core rules. -import GenericParser.RFC5234 (crlf, digit, wsp, vchar, lf, cr) -import GenericParser.SomeParsers (letter, alphanum) +import GenericParser.RFC5234 (digit) +import GenericParser.SomeParsers (letter) data EmailError = InvalidCharacter | InvalidDomain (Maybe DomainError) --- | obs-FWS: obsolete folding white space. --- | --- | obs-FWS = `1*WSP *(CRLF 1*WSP)` -obs_fws :: forall e. Parser e String -obs_fws = do x <- A.many wsp - xs <- A.many $ do v <- crlf - w <- many1 wsp - pure $ v <> CU.fromCharArray w - pure $ A.fold $ [CU.fromCharArray x] <> xs - --- | FWS: folding white space. This can be described in plain english as: --- | 1. an OPTIONAL line with potential white spaces followed by at least one white space --- | 2. or, by the obs-FWS rule (meaning: many empty lines) --- | --- | FWS = `([*WSP CRLF] 1*WSP) / obs-FWS` -fws :: forall e. Parser e String -fws = do x <- tryMaybe do xs <- A.many wsp - v <- crlf - pure $ CU.fromCharArray xs <> v - w <- many1 wsp - let first_part = maybe "" id x - second_part = CU.fromCharArray w - pure $ first_part <> second_part - <|> obs_fws - --- | ctext: comment text, meaning printable US-ASCII characters excluding '(', ')' and '\'. --- | --- | ctext = `%d33-39 / %d42-91 / %d93-126 / obs-ctext` -ctext :: forall e. Parser e Char -ctext = sat cond <|> obs_ctext - where cond x = let charcode = C.toCharCode x - in between 33 39 charcode - || between 42 91 charcode - || between 93 126 charcode - --- | `quoted_pair`: pair of characters. --- | --- | quoted-pair = `("\" (VCHAR / WSP)) / obs-qp` -quoted_pair :: forall e. Parser e String -quoted_pair = do _ <- char '\\' - v <- vchar <|> wsp - pure $ "\\" <> CU.singleton v - <|> obs_qp - --- | Comment content. --- | --- | ccontent = `ctext / quoted-pair / comment` -ccontent :: forall e. Parser e String -ccontent = CU.singleton <$> ctext <|> quoted_pair <|> defer \_ -> comment - --- | Comment. Nothing to return since comments aren't to be processed. --- | --- | comment = `"(" *([FWS] ccontent) [FWS] ")"` -comment :: forall e. Parser e String -comment = do _ <- char '(' - xs <- A.many do _ <- A.many fws - ccontent - _ <- char ')' - pure $ "(" <> A.fold xs <> ")" - --- | CFWS: comment folding white space. --- | --- | CFWS = `(1*([FWS] comment) [FWS]) / FWS` -cfws :: forall e. Parser e String -cfws = do xs <- many1 $ do _ <- tryMaybe fws - comment - pure $ A.fold xs - <|> fws - --- | `address`: email address. --- | --- | address = `mailbox / group` -address :: forall e. Parser e String -address = mailbox <|> group - --- | `mailbox`: mail address. --- | --- | mailbox = `name-addr / addr-spec` -mailbox :: forall e. Parser e String -mailbox = name_addr <|> addr_spec - --- | `name_addr`: address name. --- | --- | name-addr = `[display-name] angle-addr` -name_addr :: forall e. Parser e String -name_addr = do _ <- tryMaybe display_name - angle_addr - --- | `angle_addr` address specification between '<' and '>' characters. --- | --- | angle-addr = `[CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr` -angle_addr :: forall e. Parser e String -angle_addr = do _ <- tryMaybe cfws - _ <- char '<' - a <- addr_spec - _ <- char '>' - _ <- tryMaybe cfws - pure $ "<" <> a <> ">" - <|> obs_angle_addr - --- | `group`: a list of email addresses. --- | --- | group = `display-name ":" [group-list] ";" [CFWS]` -group :: forall e. Parser e String -group = do _ <- display_name - _ <- char ':' - xs <- tryMaybe group_list - _ <- char ';' - _ <- tryMaybe cfws - let res = ":" <> maybe "" id xs <> ";" - pure res - --- | `display_name`: displayed name, not the actual email address. --- | --- | display-name = `phrase` -display_name :: forall e. Parser e String -display_name = phrase - --- | `mailbox_list` --- | --- | mailbox-list = `(mailbox *("," mailbox)) / obs-mbox-list` -mailbox_list :: forall e. Parser e String -mailbox_list = do mb <- mailbox - xs <- A.many $ do _ <- char ',' - mx <- mailbox - pure $ "," <> mx - pure $ mb <> A.fold xs - <|> obs_mbox_list - --- | `address_list` --- | --- | address-list = `(address *("," address)) / obs-addr-list` -address_list :: forall e. Parser e String -address_list = do a <- address - xs <- A.many do _ <- char ',' - address - pure $ A.fold $ A.intersperse "," $ [a] <> xs - <|> obs_addr_list - --- | `group_list` --- | --- | group-list = `mailbox-list / CFWS / obs-group-list` -group_list :: forall e. Parser e String -group_list = mailbox_list <|> cfws <|> obs_group_list - - --- | `atext`: atom accepted characters. --- | Printable US-ASCII characters not including specials. --- | --- | atext = `ALPHA / DIGIT / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" /` --- | `"-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}" / "~"` -atext :: forall e. Parser e Char -atext = alphanum - <|> char '!' <|> char '#' - <|> char '$' <|> char '%' - <|> char '&' <|> char '\'' - <|> char '*' <|> char '+' - <|> char '-' <|> char '/' - <|> char '=' <|> char '?' - <|> char '^' <|> char '_' - <|> char '`' <|> char '{' - <|> char '|' <|> char '}' - <|> char '~' - - --- | `atom` --- | --- | atom = `[CFWS] 1*atext [CFWS]` -atom :: forall e. Parser e String -atom = CU.fromCharArray <$> do _ <- tryMaybe cfws - a <- many1 atext - _ <- tryMaybe cfws - pure a - --- | `dot_atom_text` --- | --- | dot-atom-text = `1*atext *("." 1*atext)` -dot_atom_text :: forall e. Parser e String -dot_atom_text = do xs0 <- many1 atext - xs1 <- A.many $ do _ <- char '.' - xs <- many1 atext - pure $ "." <> CU.fromCharArray xs - let str0 = CU.fromCharArray xs0 - str1 = A.fold xs1 - pure $ str0 <> str1 - --- | `dot_atom` --- | --- | dot-atom = `[CFWS] dot-atom-text [CFWS]` -dot_atom :: forall e. Parser e String -dot_atom = do _ <- tryMaybe cfws - x <- dot_atom_text - _ <- tryMaybe cfws - pure x - --- | `specials`: special characters that do not appear in `atext`. --- | --- | specials = `"(" / ")" / "<" / ">" / "[" / "]" / ":" / ";" / "@" /` --- | `"\" / "," / "." / DQUOTE` -specials :: forall e. Parser e Char -specials = char '(' - <|> char ')' - <|> char '<' - <|> char '>' - <|> char '[' - <|> char ']' - <|> char ':' - <|> char ';' - <|> char '@' - <|> char '\\' - <|> char ',' - <|> char '.' - <|> char '"' - --- | `addr_spec` --- | --- | addr-spec = `local-part "@" domain` -addr_spec :: forall e. Parser e String -addr_spec = do lpart <- local_part - _ <- char '@' - dom <- domain - pure $ lpart <> "@" <> dom - --- | `local_part` --- | --- | local-part = `dot-atom / quoted-string / obs-local-part` -local_part :: forall e. Parser e String -local_part = dot_atom <|> quoted_string <|> obs_local_part - --- | `domain`: this is a parser for a domain. --- | The parser from `GenericParser.DomainParser` cannot be used, these parsers are different. --- | `DomainParser` is more strict, it doesn't allow comments or IP addresses for example. --- | Therefore, errors are differents from a parser to another. --- | --- | domain = `dot-atom / domain-literal / obs-domain` -domain :: forall e. Parser e String -domain = dot_atom <|> domain_literal <|> obs_domain - --- | `domain_literal` --- | --- | domain-literal = `[CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]` -domain_literal :: forall e. Parser e String -domain_literal = do s <- tryMaybe cfws - _ <- char '[' - xs <- A.many do _ <- tryMaybe fws - dtext - m <- tryMaybe fws - _ <- char ']' - e <- tryMaybe cfws - pure $ maybe "" id s - <> "[" <> A.fold xs <> maybe "" id m <> "]" - <> maybe "" id e - --- | dtext: characters in domains. --- | Printable US-ASCII characters not including "[", "]", or "\". --- | --- | dtext = `%d33-90 / %d94-126 / obs-dtext` -dtext :: forall e. Parser e String -dtext = CU.singleton <$> sat cond <|> obs_dtext - where cond x = let charcode = C.toCharCode x - in between 33 90 charcode || between 94 126 charcode - --- | `obs_angle_addr`: obsolete address specification between '<' and '>' characters. --- | --- | obs-angle-addr = `[CFWS] "<" obs-route addr-spec ">" [CFWS]` -obs_angle_addr :: forall e. Parser e String -obs_angle_addr = do s <- tryMaybe cfws - _ <- char '<' - r <- obs_route - a <- addr_spec - _ <- char '>' - e <- tryMaybe cfws - pure $ maybe "" id s <> "<" <> r <> a <> ">" <> maybe "" id e - --- | `obs_route` --- | --- | obs-route = `obs-domain-list ":"` -obs_route :: forall e. Parser e String -obs_route = do l <- obs_domain_list - _ <- char ':' - pure $ l <> ":" - --- | `obs_domain_list` --- | --- | obs-domain-list = `*(CFWS / ",") "@" domain *("," [CFWS] ["@" domain])` -obs_domain_list :: forall e. Parser e String -obs_domain_list = do s <- A.many $ cfws <|> CU.singleton <$> char ',' - _ <- char '@' - d <- domain - xs <- A.many do _ <- char ',' - c <- tryMaybe cfws - _ <- char '@' - d2 <- domain - pure $ "," <> maybe "" id c <> d2 - pure $ A.fold s <> d <> A.fold xs - --- | `obs_mbox_list` --- | --- | obs-mbox-list = `*([CFWS] ",") mailbox *("," [mailbox / CFWS])` -obs_mbox_list :: forall e. Parser e String -obs_mbox_list = do _ <- A.many $ do _ <- cfws - _ <- char ',' - pure unit - mb <- mailbox - xs <- A.many $ do _ <- char ',' - x <- mailbox <|> do _ <- cfws - pure "" - pure x - pure $ mb <> A.fold xs - --- | `obs_addr_list` --- | --- | obs-addr-list = `*([CFWS] ",") address *("," [address / CFWS])` -obs_addr_list :: forall e. Parser e String -obs_addr_list = do _ <- A.many do _ <- tryMaybe cfws - char ',' - a <- address - xs <- A.many do _ <- char ',' - address <|> do _ <- cfws - pure "" - let res = A.fold $ A.intersperse "," $ [a] <> xs - pure res - --- | `obs_group_list` --- | --- | obs-group-list = `1*([CFWS] ",") [CFWS]` -obs_group_list :: forall e. Parser e String -obs_group_list = do xs <- many1 $ do x <- tryMaybe cfws - _ <- char ',' - pure $ maybe "" id x <> "," - c <- tryMaybe cfws - pure $ A.fold xs <> maybe "" id c - --- | `obs_local_part` --- | --- | obs-local-part = `word *("." word)` -obs_local_part :: forall e. Parser e String -obs_local_part = do w <- word - ws <- A.many $ do _ <- char '.' - w1 <- word - pure $ "." <> w1 - pure $ w <> A.fold ws - --- | `obs_domain` --- | --- | obs-domain = `atom *("." atom)` -obs_domain :: forall e. Parser e String -obs_domain = do a <- atom - xs <- A.many $ do _ <- char '.' - x <- atom - pure $ "." <> x - pure $ a <> A.fold xs - --- | `obs_dtext`: obsolete domain text. --- | --- | obs-dtext = `obs-NO-WS-CTL / quoted-pair` -obs_dtext :: forall e. Parser e String -obs_dtext = CU.singleton <$> obs_no_ws_ctl <|> quoted_pair - --- | obs-NO-WS-CTL: US-ASCII control characters without carriage return, --- | line feed and white space characters. --- | --- | obs-NO-WS-CTL = `%d1-8 / %d11 / %d12 / %d14-31 / %d127` -obs_no_ws_ctl :: forall e. Parser e Char -obs_no_ws_ctl = sat cond - where cond x = let charcode = C.toCharCode x - in between 1 8 charcode - || between 11 12 charcode - || between 14 31 charcode - || charcode == 127 - --- | obs-ctext: obsolete comment text. --- | --- | obs-ctext = `obs-NO-WS-CTL` -obs_ctext :: forall e. Parser e Char -obs_ctext = obs_no_ws_ctl - --- | `obs_qtext`: obsolete accepted quoted text. --- | --- | obs-qtext = `obs-NO-WS-CTL` -obs_qtext :: forall e. Parser e Char -obs_qtext = obs_no_ws_ctl - --- | `obs_utext`: obsolete text. --- | --- | obs-utext = `%d0 / obs-NO-WS-CTL / VCHAR` -obs_utext :: forall e. Parser e Char -obs_utext = char_num 0 <|> obs_no_ws_ctl <|> vchar - --- | `obs_qp`: obsolete quoted-pair rule. --- | --- | obs-qp = `"\" (%d0 / obs-NO-WS-CTL / LF / CR)` -obs_qp :: forall e. Parser e String -obs_qp = do _ <- char '\\' - v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr - pure $ "\\" <> CU.singleton v - --- | `obs_body`: obsolete body. --- | --- | Note: the simpler version found in the errata is implemented, which basically accept everything. --- | --- | Note: `text` is replaced by `vchar`. --- | --- | (RFC) --- | obs-body = `*((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)` --- | --- | (RFC Errata v1) --- | obs-body = `*(%d0-127)` --- | --- | (RFC Errata v2) --- | obs-body = `*(d0 /text / LF / CR)` --- Errata v1 ---obs_body :: forall e. Parser e String ---obs_body = CU.fromCharArray <$> A.many item - --- Errata v2 -obs_body :: forall e. Parser e String -obs_body = CU.fromCharArray <$> do A.many (char_num 0 <|> vchar <|> lf <|> cr) - ---obs_body original ---obs_body :: forall e. Parser e String ---obs_body = do A.many $ do _ <- A.many lf --- _ <- A.many cr --- v <- A.many $ do x <- char_num 0 <|> text --- _ <- A.many lf --- _ <- A.many cr --- pure x --- pure $ A.fold v --- <|> _ <- crlf --- pure "" - --- | `obs_unstruct` --- | --- | Note: implement the version found in the Errata page. --- | --- | obs-unstruct = `*((*LF *CR *(obs-utext *LF *CR)) / FWS)` --- | --- | (RFC Errata) --- | obs-unstruct = `*( (*CR 1*(obs-utext / FWS)) / 1*LF ) *CR` -obs_unstruct :: forall e. Parser e String -obs_unstruct = (CU.fromCharArray <<< A.fold) <$> A.many do _ <- A.many cr - many1 (obs_utext <|> do _ <- fws - pure ' ') - <|> do _ <- fws - pure "" - --- | `obs_phrase`: obsolete "phrase". --- | --- | obs-phrase = `word *(word / "." / CFWS)` -obs_phrase :: forall e. Parser e String -obs_phrase = do w <- word - ws <- A.many (word <|> string "." <|> do _ <- cfws - pure "") - pure $ w <> A.fold ws - --- | `word`. --- | --- | word = `atom / quoted-string` -word :: forall e. Parser e String -word = atom <|> quoted_string - --- | `phrase`: list of words (at least one) or the obsolete phrase rule. --- | --- | phrase = `1*word / obs-phrase` -phrase :: forall e. Parser e String -phrase = do ws <- many1 word - pure $ A.fold ws - <|> obs_phrase - --- | `unstructured` --- | --- | unstructured = `(*([FWS] VCHAR) *WSP) / obs-unstruct` -unstructured :: forall e. Parser e String -unstructured = do v <- A.many $ do _ <- fws - vchar - _ <- A.many wsp - pure $ CU.fromCharArray v - <|> obs_unstruct - - --- | `obs_phrase_list`: obsolete list of phrases. --- | --- | obs-phrase-list = `[phrase / CFWS] *("," [phrase / CFWS])` -obs_phrase_list :: forall e. Parser e String -obs_phrase_list = do first_phrase <- phrase_or_cfws - xs <- A.many $ do _ <- char ',' - phrase_or_cfws - pure $ A.fold $ [first_phrase] <> xs - where phrase_or_cfws = do first_phrase <- tryMaybe phrase - case first_phrase of - Nothing -> do _ <- cfws - pure "" - Just x -> pure x - --- | `qtext`: printable US-ASCII characters not including "\" or the quote character. --- | --- | WARNING: contrary to the RFC, the `qtext` rule accepts a space character (`%d32`). --- | --- | qtext = `%d33 / %d35-91 / %d93-126 / obs-qtext` -qtext :: forall e. Parser e Char -qtext = char_range 32 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext - --- | `qcontent` --- | --- | qcontent = `qtext / quoted-pair` -qcontent :: forall e. Parser e String -qcontent = CU.singleton <$> qtext <|> quoted_pair - --- | `quoted_string` --- | --- | WARNING: this rule was changed in order to take into account the new `qtext` rule, --- | which now accepts the space character. This new rule, as implemented, still allows --- | for multiple line returns in the quoted string. --- | --- | --- | Original RFC5322 rule: --- | quoted-string = `[CFWS] DQUOTE *([FWS] qcontent) [FWS] DQUOTE [CFWS]` --- | --- | Implemented rule: --- | quoted-string = `[CFWS] DQUOTE *([CRLF] qcontent) [FWS] DQUOTE [CFWS]` -quoted_string :: forall e. Parser e String -quoted_string = do s <- tryMaybe cfws - _ <- char '"' - m <- A.many do l <- tryMaybe crlf - c <- qcontent - pure $ maybe "" id l <> c - _ <- char '"' - e <- tryMaybe cfws - pure $ maybe "" id s <> "\"" <> A.fold m <> "\"" <> maybe "" id e - -- | TODO: For now, `local_part` only checks that -- | (a) the first character is a letter, -- | (b) the last character is either a letter or a digit. -- | The rest can be any letter, digit, '-' or '.'. -local_part' :: Parser EmailError String -local_part' = do firstchar <- letter - rest <- until end (letter <|> digit <|> char '-' <|> char '.') - lastchar <- letter <|> digit - pure $ CU.fromCharArray $ [firstchar] <> rest <> [lastchar] +local_part :: Parser EmailError String +local_part = do firstchar <- letter + rest <- until end (letter <|> digit <|> char '-' <|> char '.') + lastchar <- letter <|> digit + pure $ CU.fromCharArray $ [firstchar] <> rest <> [lastchar] where end :: forall e. Parser e Char end = do c <- item @@ -577,7 +41,7 @@ local_part' = do firstchar <- letter -- | `email` is the parser for email addresses. email :: Parser EmailError String -email = do login <- local_part' +email = do login <- local_part _ <- char '@' input <- current_input case parse sub_eof input of diff --git a/src/GenericParser/RFC5322.purs b/src/GenericParser/RFC5322.purs new file mode 100644 index 0000000..2c3f5a3 --- /dev/null +++ b/src/GenericParser/RFC5322.purs @@ -0,0 +1,552 @@ +-- | `EmailAddress` is a parser for email addresses, implementing the grammar found in RFC5322. +-- | +-- | STATUS: the parser mostly works, except for comments. +-- | Comments provoke a stack overflow, this must be investigated. +-- | +-- | Also, the parser needs a thorough review. +module GenericParser.RFC5322 where + +import Prelude ((<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, unit) + +import Control.Alt ((<|>)) +import Control.Lazy (defer) +import Data.Array as A +import Data.Char as C +import Data.Maybe (Maybe(..), maybe) +import Data.String.CodeUnits as CU + +import GenericParser.BaseFunctions (id) +import GenericParser.Parser (Parser, sat, char, char_num, char_range, string, many1, tryMaybe) +-- ABNF core rules. +import GenericParser.RFC5234 (crlf, wsp, vchar, lf, cr) +import GenericParser.SomeParsers (alphanum) + +-- | obs-FWS: obsolete folding white space. +-- | +-- | obs-FWS = `1*WSP *(CRLF 1*WSP)` +obs_fws :: forall e. Parser e String +obs_fws = do x <- A.many wsp + xs <- A.many $ do v <- crlf + w <- many1 wsp + pure $ v <> CU.fromCharArray w + pure $ A.fold $ [CU.fromCharArray x] <> xs + +-- | FWS: folding white space. This can be described in plain english as: +-- | 1. an OPTIONAL line with potential white spaces followed by at least one white space +-- | 2. or, by the obs-FWS rule (meaning: many empty lines) +-- | +-- | FWS = `([*WSP CRLF] 1*WSP) / obs-FWS` +fws :: forall e. Parser e String +fws = do x <- tryMaybe do xs <- A.many wsp + v <- crlf + pure $ CU.fromCharArray xs <> v + w <- many1 wsp + let first_part = maybe "" id x + second_part = CU.fromCharArray w + pure $ first_part <> second_part + <|> obs_fws + +-- | ctext: comment text, meaning printable US-ASCII characters excluding '(', ')' and '\'. +-- | +-- | ctext = `%d33-39 / %d42-91 / %d93-126 / obs-ctext` +ctext :: forall e. Parser e Char +ctext = sat cond <|> obs_ctext + where cond x = let charcode = C.toCharCode x + in between 33 39 charcode + || between 42 91 charcode + || between 93 126 charcode + +-- | `quoted_pair`: pair of characters. +-- | +-- | quoted-pair = `("\" (VCHAR / WSP)) / obs-qp` +quoted_pair :: forall e. Parser e String +quoted_pair = do _ <- char '\\' + v <- vchar <|> wsp + pure $ "\\" <> CU.singleton v + <|> obs_qp + +-- | Comment content. +-- | +-- | ccontent = `ctext / quoted-pair / comment` +ccontent :: forall e. Parser e String +ccontent = CU.singleton <$> ctext <|> quoted_pair <|> defer \_ -> comment + +-- | Comment. Nothing to return since comments aren't to be processed. +-- | +-- | comment = `"(" *([FWS] ccontent) [FWS] ")"` +comment :: forall e. Parser e String +comment = do _ <- char '(' + xs <- A.many do _ <- A.many fws + ccontent + _ <- char ')' + pure $ "(" <> A.fold xs <> ")" + +-- | CFWS: comment folding white space. +-- | +-- | CFWS = `(1*([FWS] comment) [FWS]) / FWS` +cfws :: forall e. Parser e String +cfws = do xs <- many1 $ do _ <- tryMaybe fws + comment + pure $ A.fold xs + <|> fws + +-- | `address`: email address. +-- | +-- | address = `mailbox / group` +address :: forall e. Parser e String +address = mailbox <|> group + +-- | `mailbox`: mail address. +-- | +-- | mailbox = `name-addr / addr-spec` +mailbox :: forall e. Parser e String +mailbox = name_addr <|> addr_spec + +-- | `name_addr`: address name. +-- | +-- | name-addr = `[display-name] angle-addr` +name_addr :: forall e. Parser e String +name_addr = do _ <- tryMaybe display_name + angle_addr + +-- | `angle_addr` address specification between '<' and '>' characters. +-- | +-- | angle-addr = `[CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr` +angle_addr :: forall e. Parser e String +angle_addr = do _ <- tryMaybe cfws + _ <- char '<' + a <- addr_spec + _ <- char '>' + _ <- tryMaybe cfws + pure $ "<" <> a <> ">" + <|> obs_angle_addr + +-- | `group`: a list of email addresses. +-- | +-- | group = `display-name ":" [group-list] ";" [CFWS]` +group :: forall e. Parser e String +group = do _ <- display_name + _ <- char ':' + xs <- tryMaybe group_list + _ <- char ';' + _ <- tryMaybe cfws + let res = ":" <> maybe "" id xs <> ";" + pure res + +-- | `display_name`: displayed name, not the actual email address. +-- | +-- | display-name = `phrase` +display_name :: forall e. Parser e String +display_name = phrase + +-- | `mailbox_list` +-- | +-- | mailbox-list = `(mailbox *("," mailbox)) / obs-mbox-list` +mailbox_list :: forall e. Parser e String +mailbox_list = do mb <- mailbox + xs <- A.many $ do _ <- char ',' + mx <- mailbox + pure $ "," <> mx + pure $ mb <> A.fold xs + <|> obs_mbox_list + +-- | `address_list` +-- | +-- | address-list = `(address *("," address)) / obs-addr-list` +address_list :: forall e. Parser e String +address_list = do a <- address + xs <- A.many do _ <- char ',' + address + pure $ A.fold $ A.intersperse "," $ [a] <> xs + <|> obs_addr_list + +-- | `group_list` +-- | +-- | group-list = `mailbox-list / CFWS / obs-group-list` +group_list :: forall e. Parser e String +group_list = mailbox_list <|> cfws <|> obs_group_list + + +-- | `atext`: atom accepted characters. +-- | Printable US-ASCII characters not including specials. +-- | +-- | atext = `ALPHA / DIGIT / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" /` +-- | `"-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}" / "~"` +atext :: forall e. Parser e Char +atext = alphanum + <|> char '!' <|> char '#' + <|> char '$' <|> char '%' + <|> char '&' <|> char '\'' + <|> char '*' <|> char '+' + <|> char '-' <|> char '/' + <|> char '=' <|> char '?' + <|> char '^' <|> char '_' + <|> char '`' <|> char '{' + <|> char '|' <|> char '}' + <|> char '~' + + +-- | `atom` +-- | +-- | atom = `[CFWS] 1*atext [CFWS]` +atom :: forall e. Parser e String +atom = CU.fromCharArray <$> do _ <- tryMaybe cfws + a <- many1 atext + _ <- tryMaybe cfws + pure a + +-- | `dot_atom_text` +-- | +-- | dot-atom-text = `1*atext *("." 1*atext)` +dot_atom_text :: forall e. Parser e String +dot_atom_text = do xs0 <- many1 atext + xs1 <- A.many $ do _ <- char '.' + xs <- many1 atext + pure $ "." <> CU.fromCharArray xs + let str0 = CU.fromCharArray xs0 + str1 = A.fold xs1 + pure $ str0 <> str1 + +-- | `dot_atom` +-- | +-- | dot-atom = `[CFWS] dot-atom-text [CFWS]` +dot_atom :: forall e. Parser e String +dot_atom = do _ <- tryMaybe cfws + x <- dot_atom_text + _ <- tryMaybe cfws + pure x + +-- | `specials`: special characters that do not appear in `atext`. +-- | +-- | specials = `"(" / ")" / "<" / ">" / "[" / "]" / ":" / ";" / "@" /` +-- | `"\" / "," / "." / DQUOTE` +specials :: forall e. Parser e Char +specials = char '(' + <|> char ')' + <|> char '<' + <|> char '>' + <|> char '[' + <|> char ']' + <|> char ':' + <|> char ';' + <|> char '@' + <|> char '\\' + <|> char ',' + <|> char '.' + <|> char '"' + +-- | `addr_spec` +-- | +-- | addr-spec = `local-part "@" domain` +addr_spec :: forall e. Parser e String +addr_spec = do lpart <- local_part + _ <- char '@' + dom <- domain + pure $ lpart <> "@" <> dom + +-- | `local_part` +-- | +-- | local-part = `dot-atom / quoted-string / obs-local-part` +local_part :: forall e. Parser e String +local_part = dot_atom <|> quoted_string <|> obs_local_part + +-- | `domain`: this is a parser for a domain. +-- | The parser from `GenericParser.DomainParser` cannot be used, these parsers are different. +-- | `DomainParser` is more strict, it doesn't allow comments or IP addresses for example. +-- | Therefore, errors are differents from a parser to another. +-- | +-- | domain = `dot-atom / domain-literal / obs-domain` +domain :: forall e. Parser e String +domain = dot_atom <|> domain_literal <|> obs_domain + +-- | `domain_literal` +-- | +-- | domain-literal = `[CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]` +domain_literal :: forall e. Parser e String +domain_literal = do s <- tryMaybe cfws + _ <- char '[' + xs <- A.many do _ <- tryMaybe fws + dtext + m <- tryMaybe fws + _ <- char ']' + e <- tryMaybe cfws + pure $ maybe "" id s + <> "[" <> A.fold xs <> maybe "" id m <> "]" + <> maybe "" id e + +-- | dtext: characters in domains. +-- | Printable US-ASCII characters not including "[", "]", or "\". +-- | +-- | dtext = `%d33-90 / %d94-126 / obs-dtext` +dtext :: forall e. Parser e String +dtext = CU.singleton <$> sat cond <|> obs_dtext + where cond x = let charcode = C.toCharCode x + in between 33 90 charcode || between 94 126 charcode + +-- | `obs_angle_addr`: obsolete address specification between '<' and '>' characters. +-- | +-- | obs-angle-addr = `[CFWS] "<" obs-route addr-spec ">" [CFWS]` +obs_angle_addr :: forall e. Parser e String +obs_angle_addr = do s <- tryMaybe cfws + _ <- char '<' + r <- obs_route + a <- addr_spec + _ <- char '>' + e <- tryMaybe cfws + pure $ maybe "" id s <> "<" <> r <> a <> ">" <> maybe "" id e + +-- | `obs_route` +-- | +-- | obs-route = `obs-domain-list ":"` +obs_route :: forall e. Parser e String +obs_route = do l <- obs_domain_list + _ <- char ':' + pure $ l <> ":" + +-- | `obs_domain_list` +-- | +-- | obs-domain-list = `*(CFWS / ",") "@" domain *("," [CFWS] ["@" domain])` +obs_domain_list :: forall e. Parser e String +obs_domain_list = do s <- A.many $ cfws <|> CU.singleton <$> char ',' + _ <- char '@' + d <- domain + xs <- A.many do _ <- char ',' + c <- tryMaybe cfws + _ <- char '@' + d2 <- domain + pure $ "," <> maybe "" id c <> d2 + pure $ A.fold s <> d <> A.fold xs + +-- | `obs_mbox_list` +-- | +-- | obs-mbox-list = `*([CFWS] ",") mailbox *("," [mailbox / CFWS])` +obs_mbox_list :: forall e. Parser e String +obs_mbox_list = do _ <- A.many $ do _ <- cfws + _ <- char ',' + pure unit + mb <- mailbox + xs <- A.many $ do _ <- char ',' + x <- mailbox <|> do _ <- cfws + pure "" + pure x + pure $ mb <> A.fold xs + +-- | `obs_addr_list` +-- | +-- | obs-addr-list = `*([CFWS] ",") address *("," [address / CFWS])` +obs_addr_list :: forall e. Parser e String +obs_addr_list = do _ <- A.many do _ <- tryMaybe cfws + char ',' + a <- address + xs <- A.many do _ <- char ',' + address <|> do _ <- cfws + pure "" + let res = A.fold $ A.intersperse "," $ [a] <> xs + pure res + +-- | `obs_group_list` +-- | +-- | obs-group-list = `1*([CFWS] ",") [CFWS]` +obs_group_list :: forall e. Parser e String +obs_group_list = do xs <- many1 $ do x <- tryMaybe cfws + _ <- char ',' + pure $ maybe "" id x <> "," + c <- tryMaybe cfws + pure $ A.fold xs <> maybe "" id c + +-- | `obs_local_part` +-- | +-- | obs-local-part = `word *("." word)` +obs_local_part :: forall e. Parser e String +obs_local_part = do w <- word + ws <- A.many $ do _ <- char '.' + w1 <- word + pure $ "." <> w1 + pure $ w <> A.fold ws + +-- | `obs_domain` +-- | +-- | obs-domain = `atom *("." atom)` +obs_domain :: forall e. Parser e String +obs_domain = do a <- atom + xs <- A.many $ do _ <- char '.' + x <- atom + pure $ "." <> x + pure $ a <> A.fold xs + +-- | `obs_dtext`: obsolete domain text. +-- | +-- | obs-dtext = `obs-NO-WS-CTL / quoted-pair` +obs_dtext :: forall e. Parser e String +obs_dtext = CU.singleton <$> obs_no_ws_ctl <|> quoted_pair + +-- | obs-NO-WS-CTL: US-ASCII control characters without carriage return, +-- | line feed and white space characters. +-- | +-- | obs-NO-WS-CTL = `%d1-8 / %d11 / %d12 / %d14-31 / %d127` +obs_no_ws_ctl :: forall e. Parser e Char +obs_no_ws_ctl = sat cond + where cond x = let charcode = C.toCharCode x + in between 1 8 charcode + || between 11 12 charcode + || between 14 31 charcode + || charcode == 127 + +-- | obs-ctext: obsolete comment text. +-- | +-- | obs-ctext = `obs-NO-WS-CTL` +obs_ctext :: forall e. Parser e Char +obs_ctext = obs_no_ws_ctl + +-- | `obs_qtext`: obsolete accepted quoted text. +-- | +-- | obs-qtext = `obs-NO-WS-CTL` +obs_qtext :: forall e. Parser e Char +obs_qtext = obs_no_ws_ctl + +-- | `obs_utext`: obsolete text. +-- | +-- | obs-utext = `%d0 / obs-NO-WS-CTL / VCHAR` +obs_utext :: forall e. Parser e Char +obs_utext = char_num 0 <|> obs_no_ws_ctl <|> vchar + +-- | `obs_qp`: obsolete quoted-pair rule. +-- | +-- | obs-qp = `"\" (%d0 / obs-NO-WS-CTL / LF / CR)` +obs_qp :: forall e. Parser e String +obs_qp = do _ <- char '\\' + v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr + pure $ "\\" <> CU.singleton v + +-- | `obs_body`: obsolete body. +-- | +-- | Note: the simpler version found in the errata is implemented, which basically accept everything. +-- | +-- | Note: `text` is replaced by `vchar`. +-- | +-- | (RFC) +-- | obs-body = `*((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)` +-- | +-- | (RFC Errata v1) +-- | obs-body = `*(%d0-127)` +-- | +-- | (RFC Errata v2) +-- | obs-body = `*(d0 /text / LF / CR)` +-- Errata v1 +--obs_body :: forall e. Parser e String +--obs_body = CU.fromCharArray <$> A.many item + +-- Errata v2 +obs_body :: forall e. Parser e String +obs_body = CU.fromCharArray <$> do A.many (char_num 0 <|> vchar <|> lf <|> cr) + +--obs_body original +--obs_body :: forall e. Parser e String +--obs_body = do A.many $ do _ <- A.many lf +-- _ <- A.many cr +-- v <- A.many $ do x <- char_num 0 <|> text +-- _ <- A.many lf +-- _ <- A.many cr +-- pure x +-- pure $ A.fold v +-- <|> _ <- crlf +-- pure "" + +-- | `obs_unstruct` +-- | +-- | Note: implement the version found in the Errata page. +-- | +-- | obs-unstruct = `*((*LF *CR *(obs-utext *LF *CR)) / FWS)` +-- | +-- | (RFC Errata) +-- | obs-unstruct = `*( (*CR 1*(obs-utext / FWS)) / 1*LF ) *CR` +obs_unstruct :: forall e. Parser e String +obs_unstruct = (CU.fromCharArray <<< A.fold) <$> A.many do _ <- A.many cr + many1 (obs_utext <|> do _ <- fws + pure ' ') + <|> do _ <- fws + pure "" + +-- | `obs_phrase`: obsolete "phrase". +-- | +-- | obs-phrase = `word *(word / "." / CFWS)` +obs_phrase :: forall e. Parser e String +obs_phrase = do w <- word + ws <- A.many (word <|> string "." <|> do _ <- cfws + pure "") + pure $ w <> A.fold ws + +-- | `word`. +-- | +-- | word = `atom / quoted-string` +word :: forall e. Parser e String +word = atom <|> quoted_string + +-- | `phrase`: list of words (at least one) or the obsolete phrase rule. +-- | +-- | phrase = `1*word / obs-phrase` +phrase :: forall e. Parser e String +phrase = do ws <- many1 word + pure $ A.fold ws + <|> obs_phrase + +-- | `unstructured` +-- | +-- | unstructured = `(*([FWS] VCHAR) *WSP) / obs-unstruct` +unstructured :: forall e. Parser e String +unstructured = do v <- A.many $ do _ <- fws + vchar + _ <- A.many wsp + pure $ CU.fromCharArray v + <|> obs_unstruct + + +-- | `obs_phrase_list`: obsolete list of phrases. +-- | +-- | obs-phrase-list = `[phrase / CFWS] *("," [phrase / CFWS])` +obs_phrase_list :: forall e. Parser e String +obs_phrase_list = do first_phrase <- phrase_or_cfws + xs <- A.many $ do _ <- char ',' + phrase_or_cfws + pure $ A.fold $ [first_phrase] <> xs + where phrase_or_cfws = do first_phrase <- tryMaybe phrase + case first_phrase of + Nothing -> do _ <- cfws + pure "" + Just x -> pure x + +-- | `qtext`: printable US-ASCII characters not including "\" or the quote character. +-- | +-- | WARNING: contrary to the RFC, the `qtext` rule accepts a space character (`%d32`). +-- | +-- | qtext = `%d33 / %d35-91 / %d93-126 / obs-qtext` +qtext :: forall e. Parser e Char +qtext = char_range 32 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext + +-- | `qcontent` +-- | +-- | qcontent = `qtext / quoted-pair` +qcontent :: forall e. Parser e String +qcontent = CU.singleton <$> qtext <|> quoted_pair + +-- | `quoted_string` +-- | +-- | WARNING: this rule was changed in order to take into account the new `qtext` rule, +-- | which now accepts the space character. This new rule, as implemented, still allows +-- | for multiple line returns in the quoted string. +-- | +-- | +-- | Original RFC5322 rule: +-- | quoted-string = `[CFWS] DQUOTE *([FWS] qcontent) [FWS] DQUOTE [CFWS]` +-- | +-- | Implemented rule: +-- | quoted-string = `[CFWS] DQUOTE *([CRLF] qcontent) [FWS] DQUOTE [CFWS]` +quoted_string :: forall e. Parser e String +quoted_string = do s <- tryMaybe cfws + _ <- char '"' + m <- A.many do l <- tryMaybe crlf + c <- qcontent + pure $ maybe "" id l <> c + _ <- char '"' + e <- tryMaybe cfws + pure $ maybe "" id s <> "\"" <> A.fold m <> "\"" <> maybe "" id e diff --git a/test/Main.purs b/test/Main.purs index 6f811f1..7edd797 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -14,7 +14,8 @@ import GenericParser.DomainParserRFC1035 as RFC1035 import GenericParser.DomainParser as ModernDomains import GenericParser.IPAddress (IPv4Error(..)) import GenericParser.IPAddress as IP -import GenericParser.EmailAddress as E +import GenericParser.EmailAddress as EA +import GenericParser.RFC5322 as E import Test.TestValues as T run :: forall e v. Parser e v -> String -> P.Result e v @@ -77,9 +78,9 @@ showerror_ipv4 :: IP.IPv4Error -> String showerror_ipv4 (IP4NumberTooBig x) = "value '" <> show x <> "' is > 255" showerror_ipv4 IP4IrrelevantShortRepresentation = "useless double dots" -showerror_email :: E.EmailError -> String -showerror_email E.InvalidCharacter = "InvalidCharacter" -showerror_email (E.InvalidDomain e) = "invalid domain: " <> maybe "no domain error provided, weird" showerror e +showerror_email :: EA.EmailError -> String +showerror_email EA.InvalidCharacter = "InvalidCharacter" +showerror_email (EA.InvalidDomain e) = "invalid domain: " <> maybe "no domain error provided, weird" showerror e main :: Effect Unit main = do @@ -102,7 +103,7 @@ main = do test_series "IP.ipv6" IP.ipv6 id showerror_ipv6 T.ipv6_addresses log "" - test_series "E.email" E.email id showerror_email T.valid_email_addresses + test_series "EA.email" EA.email id showerror_email T.valid_email_addresses log "" test_series "E.address (short)" E.address id showerror_email T.valid_email_addresses_short