From 9c7a534030a24774545a94c4f52c2713fa0ccc2b Mon Sep 17 00:00:00 2001 From: Philippe Pittoli Date: Sun, 28 Jan 2024 13:14:07 +0100 Subject: [PATCH] Even more RFC5322 rules! IT FUCKING NEVER STOPS GODDAMN IT. --- src/GenericParser/EmailAddress.purs | 177 ++++++++++++++++++++++++++-- 1 file changed, 167 insertions(+), 10 deletions(-) diff --git a/src/GenericParser/EmailAddress.purs b/src/GenericParser/EmailAddress.purs index 21689f8..5cda2ac 100644 --- a/src/GenericParser/EmailAddress.purs +++ b/src/GenericParser/EmailAddress.purs @@ -2,7 +2,7 @@ -- | This module is experimental and doesn't follow every rule for an email address, yet. module GenericParser.EmailAddress where -import Prelude (Unit, (<$>), bind, pure, ($), (<>), (==), (||), between, void) +import Prelude (Unit, (<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, void, unit) import Control.Alt ((<|>)) import Data.Array as A @@ -12,13 +12,13 @@ import Data.Maybe (Maybe(..)) import Data.String.CodeUnits as CU import GenericParser.Parser (Parser(..) - , sat, char, char_num, char_range, item, many1, tryMaybe + , sat, char, char_num, char_range, string, item, many1, tryMaybe , current_input, failureError, parse, rollback, until) import GenericParser.DomainParser.Common (DomainError) import GenericParser.DomainParser (sub_eof) -- ABNF core rules. import GenericParser.RFC5234 (crlf, digit, wsp, vchar, lf, cr) -import GenericParser.SomeParsers (letter) +import GenericParser.SomeParsers (letter, alphanum) data EmailError = InvalidCharacter @@ -134,8 +134,12 @@ cfws = do void $ many1 $ do _ <- tryMaybe fws -- _ <- tryMaybe cfws -- pure $ maybe [] xs ---display-name = phrase --- +-- | `display_name`: displayed name, not the actual email address. +-- | +-- | display-name = phrase +display_name :: forall e. Parser e String +display_name = phrase + --mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list -- --address-list = (address *("," address)) / obs-addr-list @@ -143,6 +147,63 @@ cfws = do void $ many1 $ do _ <- tryMaybe fws --group-list = mailbox-list / CFWS / obs-group-list +-- | `atext`: atom accepted characters. +-- | +-- | atext = ALPHA / DIGIT / ; Printable US-ASCII +-- | "!" / "#" / ; characters not including +-- | "$" / "%" / ; specials. Used for atoms. +-- | "&" / "'" / +-- | "*" / "+" / +-- | "-" / "/" / +-- | "=" / "?" / +-- | "^" / "_" / +-- | "`" / "{" / +-- | "|" / "}" / +-- | "~" +atext :: forall e. Parser e Char +atext = alphanum + <|> char '!' <|> char '#' + <|> char '$' <|> char '%' + <|> char '&' <|> char '\'' + <|> char '*' <|> char '+' + <|> char '-' <|> char '/' + <|> char '=' <|> char '?' + <|> char '^' <|> char '_' + <|> char '`' <|> char '{' + <|> char '|' <|> char '}' + <|> char '~' + + +-- | `atom` = [CFWS] 1*atext [CFWS] +-- | +-- | atom = [CFWS] 1*atext [CFWS] +atom :: forall e. Parser e String +atom = CU.fromCharArray <$> do A.many atext + +-- | dot-atom-text = 1*atext *("." 1*atext) + +-- | dot-atom = [CFWS] dot-atom-text [CFWS] + +-- | `specials`: special characters that do not appear in `atext`. +-- | +-- | specials = "(" / ")" / "<" / ">" / "[" / "]" / ":" / ";" / "@" / +-- | "\" / "," / "." / DQUOTE +specials :: forall e. Parser e Char +specials = char '(' + <|> char ')' + <|> char '<' + <|> char '>' + <|> char '[' + <|> char ']' + <|> char ':' + <|> char ';' + <|> char '@' + <|> char '\\' + <|> char ',' + <|> char '.' + <|> char '"' + + -- addr-spec = local-part "@" domain -- -- local-part = dot-atom / quoted-string / obs-local-part @@ -221,9 +282,11 @@ obs_ctext = obs_no_ws_ctl obs_qtext :: forall e. Parser e Char obs_qtext = obs_no_ws_ctl --- | TODO: `obs_utext`: obsolete text. +-- | `obs_utext`: obsolete text. -- | -- | obs-utext = %d0 / obs-NO-WS-CTL / VCHAR +obs_utext :: forall e. Parser e Char +obs_utext = char_num 0 <|> obs_no_ws_ctl <|> vchar -- | `obs_qp`: obsolete quoted-pair rule. -- | @@ -233,14 +296,108 @@ obs_qp = do _ <- char '\\' v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr pure $ "\\" <> CU.singleton v ---obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF) +-- | TODO: `obs_body`: obsolete body. +-- | +-- | Note: the simpler version found in the errata is implemented, which basically accept everything. +-- | +-- | Note: `text` is replaced by `vchar`. +-- | +-- | (RFC) +-- | obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF) +-- | +-- | (RFC Errata v1) +-- | obs-body = *(%d0-127) +-- | +-- | (RFC Errata v2) +-- | obs-body = *(d0 /text / LF / CR) +-- Errata v1 +--obs_body :: forall e. Parser e String +--obs_body = A.fold <$> A.many item -- ---obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS) --- ---obs-phrase = word *(word / "." / CFWS) +-- Errata v2 +--obs_body :: forall e. Parser e String +--obs_body = A.fold <$> do A.many (char_num 0 <|> vchar <|> lf <|> cr) -- +--obs_body original +--obs_body :: forall e. Parser e String +--obs_body = do A.many $ do _ <- A.many lf +-- _ <- A.many cr +-- v <- A.many $ do x <- char_num 0 <|> text +-- _ <- A.many lf +-- _ <- A.many cr +-- pure x +-- pure $ A.fold v +-- <|> _ <- crlf +-- pure "" + +-- | TODO: `obs_unstruct` +-- | +-- | Note: implement the version found in the Errata page. +-- | +-- | obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS) +-- | +-- | (RFC Errata) +-- | obs-unstruct = *( (*CR 1*(obs-utext / FWS)) / 1*LF ) *CR +obs_unstruct :: forall e. Parser e String +obs_unstruct = (CU.fromCharArray <<< A.fold) <$> A.many do _ <- A.many cr + many1 (obs_utext <|> do _ <- fws + pure ' ') + <|> do _ <- fws + pure "" + +-- | `obs_phrase`: obsolete "phrase". +-- | +-- | obs-phrase = word *(word / "." / CFWS) +obs_phrase :: forall e. Parser e String +obs_phrase = do w <- word + ws <- A.many (word <|> string "." <|> do _ <- cfws + pure "") + pure $ w <> A.fold ws + +-- | `word`. +-- | +-- | word = atom / quoted-string +word :: forall e. Parser e String +word = atom <|> quoted_string + +-- | `phrase`: list of words (at least one) or the obsolete phrase rule. +-- | +-- | phrase = 1*word / obs-phrase +phrase :: forall e. Parser e String +phrase = do ws <- many1 word + pure $ A.fold ws + <|> obs_phrase + +--unstructured = (*([FWS] VCHAR) *WSP) / obs-unstruct + --obs-phrase-list = [phrase / CFWS] *("," [phrase / CFWS]) +-- | `qtext`: printable US-ASCII characters not including "\" or the quote character. +-- +-- | qtext = %d33 / %d35-91 / %d93-126 / obs-qtext +qtext :: forall e. Parser e Char +qtext = char_num 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext + +-- | qcontent = qtext / quoted-pair +-- | +-- | qcontent = qtext / quoted-pair +qcontent :: forall e. Parser e String +qcontent = CU.singleton <$> qtext <|> quoted_pair + +-- | `quoted_string` +-- DQUOTE *([FWS] qcontent) [FWS] DQUOTE +-- [CFWS] +-- | quoted-string = [CFWS] DQUOTE *([FWS] qcontent) [FWS] DQUOTE [CFWS] +quoted_string :: forall e. Parser e String +quoted_string = do _ <- tryMaybe cfws + _ <- char '"' + s <- A.many $ do _ <- tryMaybe fws + qcontent + _ <- char '"' + _ <- tryMaybe cfws + pure $ "\"" <> A.fold s <> "\"" + + -- | TODO: For now, `local_part` only checks that -- | (a) the first character is a letter, -- | (b) the last character is either a letter or a digit.