Even more RFC5322 rules! IT FUCKING NEVER STOPS GODDAMN IT.

This commit is contained in:
Philippe Pittoli 2024-01-28 13:14:07 +01:00
parent 061aed023f
commit 9c7a534030

View File

@ -2,7 +2,7 @@
-- | This module is experimental and doesn't follow every rule for an email address, yet.
module GenericParser.EmailAddress where
import Prelude (Unit, (<$>), bind, pure, ($), (<>), (==), (||), between, void)
import Prelude (Unit, (<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, void, unit)
import Control.Alt ((<|>))
import Data.Array as A
@ -12,13 +12,13 @@ import Data.Maybe (Maybe(..))
import Data.String.CodeUnits as CU
import GenericParser.Parser (Parser(..)
, sat, char, char_num, char_range, item, many1, tryMaybe
, sat, char, char_num, char_range, string, item, many1, tryMaybe
, current_input, failureError, parse, rollback, until)
import GenericParser.DomainParser.Common (DomainError)
import GenericParser.DomainParser (sub_eof)
-- ABNF core rules.
import GenericParser.RFC5234 (crlf, digit, wsp, vchar, lf, cr)
import GenericParser.SomeParsers (letter)
import GenericParser.SomeParsers (letter, alphanum)
data EmailError
= InvalidCharacter
@ -134,8 +134,12 @@ cfws = do void $ many1 $ do _ <- tryMaybe fws
-- _ <- tryMaybe cfws
-- pure $ maybe [] xs
--display-name = phrase
--
-- | `display_name`: displayed name, not the actual email address.
-- |
-- | display-name = phrase
display_name :: forall e. Parser e String
display_name = phrase
--mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
--
--address-list = (address *("," address)) / obs-addr-list
@ -143,6 +147,63 @@ cfws = do void $ many1 $ do _ <- tryMaybe fws
--group-list = mailbox-list / CFWS / obs-group-list
-- | `atext`: atom accepted characters.
-- |
-- | atext = ALPHA / DIGIT / ; Printable US-ASCII
-- | "!" / "#" / ; characters not including
-- | "$" / "%" / ; specials. Used for atoms.
-- | "&" / "'" /
-- | "*" / "+" /
-- | "-" / "/" /
-- | "=" / "?" /
-- | "^" / "_" /
-- | "`" / "{" /
-- | "|" / "}" /
-- | "~"
atext :: forall e. Parser e Char
atext = alphanum
<|> char '!' <|> char '#'
<|> char '$' <|> char '%'
<|> char '&' <|> char '\''
<|> char '*' <|> char '+'
<|> char '-' <|> char '/'
<|> char '=' <|> char '?'
<|> char '^' <|> char '_'
<|> char '`' <|> char '{'
<|> char '|' <|> char '}'
<|> char '~'
-- | `atom` = [CFWS] 1*atext [CFWS]
-- |
-- | atom = [CFWS] 1*atext [CFWS]
atom :: forall e. Parser e String
atom = CU.fromCharArray <$> do A.many atext
-- | dot-atom-text = 1*atext *("." 1*atext)
-- | dot-atom = [CFWS] dot-atom-text [CFWS]
-- | `specials`: special characters that do not appear in `atext`.
-- |
-- | specials = "(" / ")" / "<" / ">" / "[" / "]" / ":" / ";" / "@" /
-- | "\" / "," / "." / DQUOTE
specials :: forall e. Parser e Char
specials = char '('
<|> char ')'
<|> char '<'
<|> char '>'
<|> char '['
<|> char ']'
<|> char ':'
<|> char ';'
<|> char '@'
<|> char '\\'
<|> char ','
<|> char '.'
<|> char '"'
-- addr-spec = local-part "@" domain
--
-- local-part = dot-atom / quoted-string / obs-local-part
@ -221,9 +282,11 @@ obs_ctext = obs_no_ws_ctl
obs_qtext :: forall e. Parser e Char
obs_qtext = obs_no_ws_ctl
-- | TODO: `obs_utext`: obsolete text.
-- | `obs_utext`: obsolete text.
-- |
-- | obs-utext = %d0 / obs-NO-WS-CTL / VCHAR
obs_utext :: forall e. Parser e Char
obs_utext = char_num 0 <|> obs_no_ws_ctl <|> vchar
-- | `obs_qp`: obsolete quoted-pair rule.
-- |
@ -233,14 +296,108 @@ obs_qp = do _ <- char '\\'
v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr
pure $ "\\" <> CU.singleton v
--obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)
-- | TODO: `obs_body`: obsolete body.
-- |
-- | Note: the simpler version found in the errata is implemented, which basically accept everything.
-- |
-- | Note: `text` is replaced by `vchar`.
-- |
-- | (RFC)
-- | obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)
-- |
-- | (RFC Errata v1)
-- | obs-body = *(%d0-127)
-- |
-- | (RFC Errata v2)
-- | obs-body = *(d0 /text / LF / CR)
-- Errata v1
--obs_body :: forall e. Parser e String
--obs_body = A.fold <$> A.many item
--
--obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS)
--
--obs-phrase = word *(word / "." / CFWS)
-- Errata v2
--obs_body :: forall e. Parser e String
--obs_body = A.fold <$> do A.many (char_num 0 <|> vchar <|> lf <|> cr)
--
--obs_body original
--obs_body :: forall e. Parser e String
--obs_body = do A.many $ do _ <- A.many lf
-- _ <- A.many cr
-- v <- A.many $ do x <- char_num 0 <|> text
-- _ <- A.many lf
-- _ <- A.many cr
-- pure x
-- pure $ A.fold v
-- <|> _ <- crlf
-- pure ""
-- | TODO: `obs_unstruct`
-- |
-- | Note: implement the version found in the Errata page.
-- |
-- | obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS)
-- |
-- | (RFC Errata)
-- | obs-unstruct = *( (*CR 1*(obs-utext / FWS)) / 1*LF ) *CR
obs_unstruct :: forall e. Parser e String
obs_unstruct = (CU.fromCharArray <<< A.fold) <$> A.many do _ <- A.many cr
many1 (obs_utext <|> do _ <- fws
pure ' ')
<|> do _ <- fws
pure ""
-- | `obs_phrase`: obsolete "phrase".
-- |
-- | obs-phrase = word *(word / "." / CFWS)
obs_phrase :: forall e. Parser e String
obs_phrase = do w <- word
ws <- A.many (word <|> string "." <|> do _ <- cfws
pure "")
pure $ w <> A.fold ws
-- | `word`.
-- |
-- | word = atom / quoted-string
word :: forall e. Parser e String
word = atom <|> quoted_string
-- | `phrase`: list of words (at least one) or the obsolete phrase rule.
-- |
-- | phrase = 1*word / obs-phrase
phrase :: forall e. Parser e String
phrase = do ws <- many1 word
pure $ A.fold ws
<|> obs_phrase
--unstructured = (*([FWS] VCHAR) *WSP) / obs-unstruct
--obs-phrase-list = [phrase / CFWS] *("," [phrase / CFWS])
-- | `qtext`: printable US-ASCII characters not including "\" or the quote character.
--
-- | qtext = %d33 / %d35-91 / %d93-126 / obs-qtext
qtext :: forall e. Parser e Char
qtext = char_num 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext
-- | qcontent = qtext / quoted-pair
-- |
-- | qcontent = qtext / quoted-pair
qcontent :: forall e. Parser e String
qcontent = CU.singleton <$> qtext <|> quoted_pair
-- | `quoted_string`
-- DQUOTE *([FWS] qcontent) [FWS] DQUOTE
-- [CFWS]
-- | quoted-string = [CFWS] DQUOTE *([FWS] qcontent) [FWS] DQUOTE [CFWS]
quoted_string :: forall e. Parser e String
quoted_string = do _ <- tryMaybe cfws
_ <- char '"'
s <- A.many $ do _ <- tryMaybe fws
qcontent
_ <- char '"'
_ <- tryMaybe cfws
pure $ "\"" <> A.fold s <> "\""
-- | TODO: For now, `local_part` only checks that
-- | (a) the first character is a letter,
-- | (b) the last character is either a letter or a digit.