Even more RFC5322 rules! IT FUCKING NEVER STOPS GODDAMN IT.

2024-01-28 13:14:07 +01:00 · 2024-01-28 13:14:07 +01:00 · 9c7a534030
commit 9c7a534030
parent 061aed023f
1 changed files with 167 additions and 10 deletions
--- a/src/GenericParser/EmailAddress.purs
+++ b/src/GenericParser/EmailAddress.purs
@ -2,7 +2,7 @@
 -- | This module is experimental and doesn't follow every rule for an email address, yet.
 module GenericParser.EmailAddress where

-import Prelude (Unit, (<$>), bind, pure, ($), (<>), (==), (||), between, void)
+import Prelude (Unit, (<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, void, unit)

 import Control.Alt ((<|>))
 import Data.Array as A
@ -12,13 +12,13 @@ import Data.Maybe (Maybe(..))
 import Data.String.CodeUnits as CU

 import GenericParser.Parser (Parser(..)
-                            , sat, char, char_num, char_range, item, many1, tryMaybe
+                            , sat, char, char_num, char_range, string, item, many1, tryMaybe
                            , current_input, failureError, parse, rollback, until)
 import GenericParser.DomainParser.Common (DomainError)
 import GenericParser.DomainParser (sub_eof)
 -- ABNF core rules.
 import GenericParser.RFC5234 (crlf, digit, wsp, vchar, lf, cr)
-import GenericParser.SomeParsers (letter)
+import GenericParser.SomeParsers (letter, alphanum)

 data EmailError
  = InvalidCharacter
@ -134,8 +134,12 @@ cfws = do void $ many1 $ do _ <- tryMaybe fws
 --           _ <- tryMaybe cfws
 --           pure $ maybe [] xs

--display-name    =   phrase
--
+-- | `display_name`: displayed name, not the actual email address.
+-- |
+-- | display-name    =   phrase
+display_name :: forall e. Parser e String
+display_name = phrase
+
 --mailbox-list    =   (mailbox *("," mailbox)) / obs-mbox-list
 --
 --address-list    =   (address *("," address)) / obs-addr-list
@ -143,6 +147,63 @@ cfws = do void $ many1 $ do _ <- tryMaybe fws
 --group-list      =   mailbox-list / CFWS / obs-group-list


+-- | `atext`: atom accepted characters.
+-- |
+-- | atext           =   ALPHA / DIGIT /    ; Printable US-ASCII
+-- |                     "!" / "#" /        ;  characters not including
+-- |                     "$" / "%" /        ;  specials.  Used for atoms.
+-- |                     "&" / "'" /
+-- |                     "*" / "+" /
+-- |                     "-" / "/" /
+-- |                     "=" / "?" /
+-- |                     "^" / "_" /
+-- |                     "`" / "{" /
+-- |                     "|" / "}" /
+-- |                     "~"
+atext :: forall e. Parser e Char
+atext = alphanum
+        <|> char '!' <|> char '#'
+        <|> char '$' <|> char '%'
+        <|> char '&' <|> char '\''
+        <|> char '*' <|> char '+'
+        <|> char '-' <|> char '/'
+        <|> char '=' <|> char '?'
+        <|> char '^' <|> char '_'
+        <|> char '`' <|> char '{'
+        <|> char '|' <|> char '}'
+        <|> char '~'
+
+
+-- | `atom`            =   [CFWS] 1*atext [CFWS]
+-- |
+-- | atom            =   [CFWS] 1*atext [CFWS]
+atom :: forall e. Parser e String
+atom = CU.fromCharArray <$> do A.many atext
+
+-- | dot-atom-text   =   1*atext *("." 1*atext)
+
+-- | dot-atom        =   [CFWS] dot-atom-text [CFWS]
+
+-- | `specials`: special characters that do not appear in `atext`.
+-- |
+-- | specials        =   "(" / ")" / "<" / ">" / "[" / "]" / ":" / ";" / "@" /
+-- |                     "\" / "," / "." / DQUOTE
+specials :: forall e. Parser e Char
+specials = char '('
+           <|> char ')'
+           <|> char '<'
+           <|> char '>'
+           <|> char '['
+           <|> char ']'
+           <|> char ':'
+           <|> char ';'
+           <|> char '@'
+           <|> char '\\'
+           <|> char ','
+           <|> char '.'
+           <|> char '"'
+
+
 -- addr-spec       =   local-part "@" domain
 --
 -- local-part      =   dot-atom / quoted-string / obs-local-part
@ -221,9 +282,11 @@ obs_ctext = obs_no_ws_ctl
 obs_qtext :: forall e. Parser e Char
 obs_qtext = obs_no_ws_ctl

-- | TODO: `obs_utext`: obsolete text.
+-- | `obs_utext`: obsolete text.
 -- |
 -- | obs-utext       =   %d0 / obs-NO-WS-CTL / VCHAR
+obs_utext :: forall e. Parser e Char
+obs_utext = char_num 0 <|> obs_no_ws_ctl <|> vchar

 -- | `obs_qp`: obsolete quoted-pair rule.
 -- |
@ -233,14 +296,108 @@ obs_qp = do _ <- char '\\'
            v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr
            pure $ "\\" <> CU.singleton v

--obs-body        =   *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)
+-- | TODO: `obs_body`: obsolete body.
+-- |
+-- | Note: the simpler version found in the errata is implemented, which basically accept everything.
+-- |
+-- | Note: `text` is replaced by `vchar`.
+-- |
+-- | (RFC)
+-- | obs-body        =   *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)
+-- |
+-- | (RFC Errata v1)
+-- | obs-body        =   *(%d0-127)
+-- |
+-- | (RFC Errata v2)
+-- | obs-body        =   *(d0 /text / LF / CR)
+-- Errata v1
+--obs_body :: forall e. Parser e String
+--obs_body = A.fold <$> A.many item
 --
--obs-unstruct    =   *((*LF *CR *(obs-utext *LF *CR)) / FWS)
--
--obs-phrase      =   word *(word / "." / CFWS)
+-- Errata v2
+--obs_body :: forall e. Parser e String
+--obs_body = A.fold <$> do A.many (char_num 0 <|> vchar <|> lf <|> cr)
 --
+--obs_body original
+--obs_body :: forall e. Parser e String
+--obs_body = do A.many $ do _ <- A.many lf
+--                          _ <- A.many cr
+--                          v <- A.many $ do x <- char_num 0 <|> text
+--                                           _ <- A.many lf
+--                                           _ <- A.many cr
+--                                           pure x
+--                          pure $ A.fold v
+--                        <|> _ <- crlf
+--                            pure ""
+
+-- | TODO: `obs_unstruct`
+-- |
+-- | Note: implement the version found in the Errata page.
+-- |
+-- | obs-unstruct    =   *((*LF *CR *(obs-utext *LF *CR)) / FWS)
+-- |
+-- | (RFC Errata)
+-- | obs-unstruct    =   *( (*CR 1*(obs-utext / FWS)) / 1*LF ) *CR
+obs_unstruct :: forall e. Parser e String
+obs_unstruct = (CU.fromCharArray <<< A.fold) <$> A.many do _ <- A.many cr
+                                                           many1 (obs_utext <|> do _ <- fws
+                                                                                   pure ' ')
+                                                         <|> do _ <- fws
+                                                                pure ""
+
+-- | `obs_phrase`: obsolete "phrase".
+-- |
+-- | obs-phrase      =   word *(word / "." / CFWS)
+obs_phrase :: forall e. Parser e String
+obs_phrase = do w <- word
+                ws <- A.many (word <|> string "." <|> do _ <- cfws
+                                                         pure "")
+                pure $ w <> A.fold ws
+
+-- | `word`.
+-- |
+-- | word            =   atom / quoted-string
+word :: forall e. Parser e String
+word = atom <|> quoted_string
+
+-- | `phrase`: list of words (at least one) or the obsolete phrase rule.
+-- |
+-- | phrase          =   1*word / obs-phrase
+phrase :: forall e. Parser e String
+phrase = do ws <- many1 word
+            pure $ A.fold ws
+          <|> obs_phrase
+
+--unstructured    =   (*([FWS] VCHAR) *WSP) / obs-unstruct
+
 --obs-phrase-list =   [phrase / CFWS] *("," [phrase / CFWS])

+-- | `qtext`: printable US-ASCII characters not including "\" or the quote character.
+--
+-- | qtext           =   %d33 / %d35-91 / %d93-126 / obs-qtext
+qtext :: forall e. Parser e Char
+qtext = char_num 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext
+
+-- | qcontent        =   qtext / quoted-pair
+-- |
+-- | qcontent        =   qtext / quoted-pair
+qcontent :: forall e. Parser e String
+qcontent = CU.singleton <$> qtext <|> quoted_pair
+
+-- | `quoted_string`
+--                    DQUOTE *([FWS] qcontent) [FWS] DQUOTE
+--                    [CFWS]
+-- | quoted-string   =   [CFWS] DQUOTE *([FWS] qcontent) [FWS] DQUOTE [CFWS]
+quoted_string :: forall e. Parser e String
+quoted_string = do _ <- tryMaybe cfws
+                   _ <- char '"'
+                   s <- A.many $ do _ <- tryMaybe fws
+                                    qcontent
+                   _ <- char '"'
+                   _ <- tryMaybe cfws
+                   pure $ "\"" <> A.fold s <> "\""
+
+
 -- | TODO: For now, `local_part` only checks that
 -- | (a) the first character is a letter,
 -- | (b) the last character is either a letter or a digit.