Even more RFC5322 rules! IT FUCKING NEVER STOPS GODDAMN IT.
This commit is contained in:
parent
061aed023f
commit
9c7a534030
1 changed files with 167 additions and 10 deletions
|
@ -2,7 +2,7 @@
|
||||||
-- | This module is experimental and doesn't follow every rule for an email address, yet.
|
-- | This module is experimental and doesn't follow every rule for an email address, yet.
|
||||||
module GenericParser.EmailAddress where
|
module GenericParser.EmailAddress where
|
||||||
|
|
||||||
import Prelude (Unit, (<$>), bind, pure, ($), (<>), (==), (||), between, void)
|
import Prelude (Unit, (<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, void, unit)
|
||||||
|
|
||||||
import Control.Alt ((<|>))
|
import Control.Alt ((<|>))
|
||||||
import Data.Array as A
|
import Data.Array as A
|
||||||
|
@ -12,13 +12,13 @@ import Data.Maybe (Maybe(..))
|
||||||
import Data.String.CodeUnits as CU
|
import Data.String.CodeUnits as CU
|
||||||
|
|
||||||
import GenericParser.Parser (Parser(..)
|
import GenericParser.Parser (Parser(..)
|
||||||
, sat, char, char_num, char_range, item, many1, tryMaybe
|
, sat, char, char_num, char_range, string, item, many1, tryMaybe
|
||||||
, current_input, failureError, parse, rollback, until)
|
, current_input, failureError, parse, rollback, until)
|
||||||
import GenericParser.DomainParser.Common (DomainError)
|
import GenericParser.DomainParser.Common (DomainError)
|
||||||
import GenericParser.DomainParser (sub_eof)
|
import GenericParser.DomainParser (sub_eof)
|
||||||
-- ABNF core rules.
|
-- ABNF core rules.
|
||||||
import GenericParser.RFC5234 (crlf, digit, wsp, vchar, lf, cr)
|
import GenericParser.RFC5234 (crlf, digit, wsp, vchar, lf, cr)
|
||||||
import GenericParser.SomeParsers (letter)
|
import GenericParser.SomeParsers (letter, alphanum)
|
||||||
|
|
||||||
data EmailError
|
data EmailError
|
||||||
= InvalidCharacter
|
= InvalidCharacter
|
||||||
|
@ -134,8 +134,12 @@ cfws = do void $ many1 $ do _ <- tryMaybe fws
|
||||||
-- _ <- tryMaybe cfws
|
-- _ <- tryMaybe cfws
|
||||||
-- pure $ maybe [] xs
|
-- pure $ maybe [] xs
|
||||||
|
|
||||||
--display-name = phrase
|
-- | `display_name`: displayed name, not the actual email address.
|
||||||
--
|
-- |
|
||||||
|
-- | display-name = phrase
|
||||||
|
display_name :: forall e. Parser e String
|
||||||
|
display_name = phrase
|
||||||
|
|
||||||
--mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
|
--mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
|
||||||
--
|
--
|
||||||
--address-list = (address *("," address)) / obs-addr-list
|
--address-list = (address *("," address)) / obs-addr-list
|
||||||
|
@ -143,6 +147,63 @@ cfws = do void $ many1 $ do _ <- tryMaybe fws
|
||||||
--group-list = mailbox-list / CFWS / obs-group-list
|
--group-list = mailbox-list / CFWS / obs-group-list
|
||||||
|
|
||||||
|
|
||||||
|
-- | `atext`: atom accepted characters.
|
||||||
|
-- |
|
||||||
|
-- | atext = ALPHA / DIGIT / ; Printable US-ASCII
|
||||||
|
-- | "!" / "#" / ; characters not including
|
||||||
|
-- | "$" / "%" / ; specials. Used for atoms.
|
||||||
|
-- | "&" / "'" /
|
||||||
|
-- | "*" / "+" /
|
||||||
|
-- | "-" / "/" /
|
||||||
|
-- | "=" / "?" /
|
||||||
|
-- | "^" / "_" /
|
||||||
|
-- | "`" / "{" /
|
||||||
|
-- | "|" / "}" /
|
||||||
|
-- | "~"
|
||||||
|
atext :: forall e. Parser e Char
|
||||||
|
atext = alphanum
|
||||||
|
<|> char '!' <|> char '#'
|
||||||
|
<|> char '$' <|> char '%'
|
||||||
|
<|> char '&' <|> char '\''
|
||||||
|
<|> char '*' <|> char '+'
|
||||||
|
<|> char '-' <|> char '/'
|
||||||
|
<|> char '=' <|> char '?'
|
||||||
|
<|> char '^' <|> char '_'
|
||||||
|
<|> char '`' <|> char '{'
|
||||||
|
<|> char '|' <|> char '}'
|
||||||
|
<|> char '~'
|
||||||
|
|
||||||
|
|
||||||
|
-- | `atom` = [CFWS] 1*atext [CFWS]
|
||||||
|
-- |
|
||||||
|
-- | atom = [CFWS] 1*atext [CFWS]
|
||||||
|
atom :: forall e. Parser e String
|
||||||
|
atom = CU.fromCharArray <$> do A.many atext
|
||||||
|
|
||||||
|
-- | dot-atom-text = 1*atext *("." 1*atext)
|
||||||
|
|
||||||
|
-- | dot-atom = [CFWS] dot-atom-text [CFWS]
|
||||||
|
|
||||||
|
-- | `specials`: special characters that do not appear in `atext`.
|
||||||
|
-- |
|
||||||
|
-- | specials = "(" / ")" / "<" / ">" / "[" / "]" / ":" / ";" / "@" /
|
||||||
|
-- | "\" / "," / "." / DQUOTE
|
||||||
|
specials :: forall e. Parser e Char
|
||||||
|
specials = char '('
|
||||||
|
<|> char ')'
|
||||||
|
<|> char '<'
|
||||||
|
<|> char '>'
|
||||||
|
<|> char '['
|
||||||
|
<|> char ']'
|
||||||
|
<|> char ':'
|
||||||
|
<|> char ';'
|
||||||
|
<|> char '@'
|
||||||
|
<|> char '\\'
|
||||||
|
<|> char ','
|
||||||
|
<|> char '.'
|
||||||
|
<|> char '"'
|
||||||
|
|
||||||
|
|
||||||
-- addr-spec = local-part "@" domain
|
-- addr-spec = local-part "@" domain
|
||||||
--
|
--
|
||||||
-- local-part = dot-atom / quoted-string / obs-local-part
|
-- local-part = dot-atom / quoted-string / obs-local-part
|
||||||
|
@ -221,9 +282,11 @@ obs_ctext = obs_no_ws_ctl
|
||||||
obs_qtext :: forall e. Parser e Char
|
obs_qtext :: forall e. Parser e Char
|
||||||
obs_qtext = obs_no_ws_ctl
|
obs_qtext = obs_no_ws_ctl
|
||||||
|
|
||||||
-- | TODO: `obs_utext`: obsolete text.
|
-- | `obs_utext`: obsolete text.
|
||||||
-- |
|
-- |
|
||||||
-- | obs-utext = %d0 / obs-NO-WS-CTL / VCHAR
|
-- | obs-utext = %d0 / obs-NO-WS-CTL / VCHAR
|
||||||
|
obs_utext :: forall e. Parser e Char
|
||||||
|
obs_utext = char_num 0 <|> obs_no_ws_ctl <|> vchar
|
||||||
|
|
||||||
-- | `obs_qp`: obsolete quoted-pair rule.
|
-- | `obs_qp`: obsolete quoted-pair rule.
|
||||||
-- |
|
-- |
|
||||||
|
@ -233,14 +296,108 @@ obs_qp = do _ <- char '\\'
|
||||||
v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr
|
v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr
|
||||||
pure $ "\\" <> CU.singleton v
|
pure $ "\\" <> CU.singleton v
|
||||||
|
|
||||||
--obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)
|
-- | TODO: `obs_body`: obsolete body.
|
||||||
|
-- |
|
||||||
|
-- | Note: the simpler version found in the errata is implemented, which basically accept everything.
|
||||||
|
-- |
|
||||||
|
-- | Note: `text` is replaced by `vchar`.
|
||||||
|
-- |
|
||||||
|
-- | (RFC)
|
||||||
|
-- | obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)
|
||||||
|
-- |
|
||||||
|
-- | (RFC Errata v1)
|
||||||
|
-- | obs-body = *(%d0-127)
|
||||||
|
-- |
|
||||||
|
-- | (RFC Errata v2)
|
||||||
|
-- | obs-body = *(d0 /text / LF / CR)
|
||||||
|
-- Errata v1
|
||||||
|
--obs_body :: forall e. Parser e String
|
||||||
|
--obs_body = A.fold <$> A.many item
|
||||||
--
|
--
|
||||||
--obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS)
|
-- Errata v2
|
||||||
--
|
--obs_body :: forall e. Parser e String
|
||||||
--obs-phrase = word *(word / "." / CFWS)
|
--obs_body = A.fold <$> do A.many (char_num 0 <|> vchar <|> lf <|> cr)
|
||||||
--
|
--
|
||||||
|
--obs_body original
|
||||||
|
--obs_body :: forall e. Parser e String
|
||||||
|
--obs_body = do A.many $ do _ <- A.many lf
|
||||||
|
-- _ <- A.many cr
|
||||||
|
-- v <- A.many $ do x <- char_num 0 <|> text
|
||||||
|
-- _ <- A.many lf
|
||||||
|
-- _ <- A.many cr
|
||||||
|
-- pure x
|
||||||
|
-- pure $ A.fold v
|
||||||
|
-- <|> _ <- crlf
|
||||||
|
-- pure ""
|
||||||
|
|
||||||
|
-- | TODO: `obs_unstruct`
|
||||||
|
-- |
|
||||||
|
-- | Note: implement the version found in the Errata page.
|
||||||
|
-- |
|
||||||
|
-- | obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS)
|
||||||
|
-- |
|
||||||
|
-- | (RFC Errata)
|
||||||
|
-- | obs-unstruct = *( (*CR 1*(obs-utext / FWS)) / 1*LF ) *CR
|
||||||
|
obs_unstruct :: forall e. Parser e String
|
||||||
|
obs_unstruct = (CU.fromCharArray <<< A.fold) <$> A.many do _ <- A.many cr
|
||||||
|
many1 (obs_utext <|> do _ <- fws
|
||||||
|
pure ' ')
|
||||||
|
<|> do _ <- fws
|
||||||
|
pure ""
|
||||||
|
|
||||||
|
-- | `obs_phrase`: obsolete "phrase".
|
||||||
|
-- |
|
||||||
|
-- | obs-phrase = word *(word / "." / CFWS)
|
||||||
|
obs_phrase :: forall e. Parser e String
|
||||||
|
obs_phrase = do w <- word
|
||||||
|
ws <- A.many (word <|> string "." <|> do _ <- cfws
|
||||||
|
pure "")
|
||||||
|
pure $ w <> A.fold ws
|
||||||
|
|
||||||
|
-- | `word`.
|
||||||
|
-- |
|
||||||
|
-- | word = atom / quoted-string
|
||||||
|
word :: forall e. Parser e String
|
||||||
|
word = atom <|> quoted_string
|
||||||
|
|
||||||
|
-- | `phrase`: list of words (at least one) or the obsolete phrase rule.
|
||||||
|
-- |
|
||||||
|
-- | phrase = 1*word / obs-phrase
|
||||||
|
phrase :: forall e. Parser e String
|
||||||
|
phrase = do ws <- many1 word
|
||||||
|
pure $ A.fold ws
|
||||||
|
<|> obs_phrase
|
||||||
|
|
||||||
|
--unstructured = (*([FWS] VCHAR) *WSP) / obs-unstruct
|
||||||
|
|
||||||
--obs-phrase-list = [phrase / CFWS] *("," [phrase / CFWS])
|
--obs-phrase-list = [phrase / CFWS] *("," [phrase / CFWS])
|
||||||
|
|
||||||
|
-- | `qtext`: printable US-ASCII characters not including "\" or the quote character.
|
||||||
|
--
|
||||||
|
-- | qtext = %d33 / %d35-91 / %d93-126 / obs-qtext
|
||||||
|
qtext :: forall e. Parser e Char
|
||||||
|
qtext = char_num 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext
|
||||||
|
|
||||||
|
-- | qcontent = qtext / quoted-pair
|
||||||
|
-- |
|
||||||
|
-- | qcontent = qtext / quoted-pair
|
||||||
|
qcontent :: forall e. Parser e String
|
||||||
|
qcontent = CU.singleton <$> qtext <|> quoted_pair
|
||||||
|
|
||||||
|
-- | `quoted_string`
|
||||||
|
-- DQUOTE *([FWS] qcontent) [FWS] DQUOTE
|
||||||
|
-- [CFWS]
|
||||||
|
-- | quoted-string = [CFWS] DQUOTE *([FWS] qcontent) [FWS] DQUOTE [CFWS]
|
||||||
|
quoted_string :: forall e. Parser e String
|
||||||
|
quoted_string = do _ <- tryMaybe cfws
|
||||||
|
_ <- char '"'
|
||||||
|
s <- A.many $ do _ <- tryMaybe fws
|
||||||
|
qcontent
|
||||||
|
_ <- char '"'
|
||||||
|
_ <- tryMaybe cfws
|
||||||
|
pure $ "\"" <> A.fold s <> "\""
|
||||||
|
|
||||||
|
|
||||||
-- | TODO: For now, `local_part` only checks that
|
-- | TODO: For now, `local_part` only checks that
|
||||||
-- | (a) the first character is a letter,
|
-- | (a) the first character is a letter,
|
||||||
-- | (b) the last character is either a letter or a digit.
|
-- | (b) the last character is either a letter or a digit.
|
||||||
|
|
Loading…
Add table
Reference in a new issue