Even more RFC5322 rules! IT FUCKING NEVER STOPS GODDAMN IT.
parent
061aed023f
commit
9c7a534030
|
@ -2,7 +2,7 @@
|
|||
-- | This module is experimental and doesn't follow every rule for an email address, yet.
|
||||
module GenericParser.EmailAddress where
|
||||
|
||||
import Prelude (Unit, (<$>), bind, pure, ($), (<>), (==), (||), between, void)
|
||||
import Prelude (Unit, (<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, void, unit)
|
||||
|
||||
import Control.Alt ((<|>))
|
||||
import Data.Array as A
|
||||
|
@ -12,13 +12,13 @@ import Data.Maybe (Maybe(..))
|
|||
import Data.String.CodeUnits as CU
|
||||
|
||||
import GenericParser.Parser (Parser(..)
|
||||
, sat, char, char_num, char_range, item, many1, tryMaybe
|
||||
, sat, char, char_num, char_range, string, item, many1, tryMaybe
|
||||
, current_input, failureError, parse, rollback, until)
|
||||
import GenericParser.DomainParser.Common (DomainError)
|
||||
import GenericParser.DomainParser (sub_eof)
|
||||
-- ABNF core rules.
|
||||
import GenericParser.RFC5234 (crlf, digit, wsp, vchar, lf, cr)
|
||||
import GenericParser.SomeParsers (letter)
|
||||
import GenericParser.SomeParsers (letter, alphanum)
|
||||
|
||||
data EmailError
|
||||
= InvalidCharacter
|
||||
|
@ -134,8 +134,12 @@ cfws = do void $ many1 $ do _ <- tryMaybe fws
|
|||
-- _ <- tryMaybe cfws
|
||||
-- pure $ maybe [] xs
|
||||
|
||||
--display-name = phrase
|
||||
--
|
||||
-- | `display_name`: displayed name, not the actual email address.
|
||||
-- |
|
||||
-- | display-name = phrase
|
||||
display_name :: forall e. Parser e String
|
||||
display_name = phrase
|
||||
|
||||
--mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
|
||||
--
|
||||
--address-list = (address *("," address)) / obs-addr-list
|
||||
|
@ -143,6 +147,63 @@ cfws = do void $ many1 $ do _ <- tryMaybe fws
|
|||
--group-list = mailbox-list / CFWS / obs-group-list
|
||||
|
||||
|
||||
-- | `atext`: atom accepted characters.
|
||||
-- |
|
||||
-- | atext = ALPHA / DIGIT / ; Printable US-ASCII
|
||||
-- | "!" / "#" / ; characters not including
|
||||
-- | "$" / "%" / ; specials. Used for atoms.
|
||||
-- | "&" / "'" /
|
||||
-- | "*" / "+" /
|
||||
-- | "-" / "/" /
|
||||
-- | "=" / "?" /
|
||||
-- | "^" / "_" /
|
||||
-- | "`" / "{" /
|
||||
-- | "|" / "}" /
|
||||
-- | "~"
|
||||
atext :: forall e. Parser e Char
|
||||
atext = alphanum
|
||||
<|> char '!' <|> char '#'
|
||||
<|> char '$' <|> char '%'
|
||||
<|> char '&' <|> char '\''
|
||||
<|> char '*' <|> char '+'
|
||||
<|> char '-' <|> char '/'
|
||||
<|> char '=' <|> char '?'
|
||||
<|> char '^' <|> char '_'
|
||||
<|> char '`' <|> char '{'
|
||||
<|> char '|' <|> char '}'
|
||||
<|> char '~'
|
||||
|
||||
|
||||
-- | `atom` = [CFWS] 1*atext [CFWS]
|
||||
-- |
|
||||
-- | atom = [CFWS] 1*atext [CFWS]
|
||||
atom :: forall e. Parser e String
|
||||
atom = CU.fromCharArray <$> do A.many atext
|
||||
|
||||
-- | dot-atom-text = 1*atext *("." 1*atext)
|
||||
|
||||
-- | dot-atom = [CFWS] dot-atom-text [CFWS]
|
||||
|
||||
-- | `specials`: special characters that do not appear in `atext`.
|
||||
-- |
|
||||
-- | specials = "(" / ")" / "<" / ">" / "[" / "]" / ":" / ";" / "@" /
|
||||
-- | "\" / "," / "." / DQUOTE
|
||||
specials :: forall e. Parser e Char
|
||||
specials = char '('
|
||||
<|> char ')'
|
||||
<|> char '<'
|
||||
<|> char '>'
|
||||
<|> char '['
|
||||
<|> char ']'
|
||||
<|> char ':'
|
||||
<|> char ';'
|
||||
<|> char '@'
|
||||
<|> char '\\'
|
||||
<|> char ','
|
||||
<|> char '.'
|
||||
<|> char '"'
|
||||
|
||||
|
||||
-- addr-spec = local-part "@" domain
|
||||
--
|
||||
-- local-part = dot-atom / quoted-string / obs-local-part
|
||||
|
@ -221,9 +282,11 @@ obs_ctext = obs_no_ws_ctl
|
|||
obs_qtext :: forall e. Parser e Char
|
||||
obs_qtext = obs_no_ws_ctl
|
||||
|
||||
-- | TODO: `obs_utext`: obsolete text.
|
||||
-- | `obs_utext`: obsolete text.
|
||||
-- |
|
||||
-- | obs-utext = %d0 / obs-NO-WS-CTL / VCHAR
|
||||
obs_utext :: forall e. Parser e Char
|
||||
obs_utext = char_num 0 <|> obs_no_ws_ctl <|> vchar
|
||||
|
||||
-- | `obs_qp`: obsolete quoted-pair rule.
|
||||
-- |
|
||||
|
@ -233,14 +296,108 @@ obs_qp = do _ <- char '\\'
|
|||
v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr
|
||||
pure $ "\\" <> CU.singleton v
|
||||
|
||||
--obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)
|
||||
-- | TODO: `obs_body`: obsolete body.
|
||||
-- |
|
||||
-- | Note: the simpler version found in the errata is implemented, which basically accept everything.
|
||||
-- |
|
||||
-- | Note: `text` is replaced by `vchar`.
|
||||
-- |
|
||||
-- | (RFC)
|
||||
-- | obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)
|
||||
-- |
|
||||
-- | (RFC Errata v1)
|
||||
-- | obs-body = *(%d0-127)
|
||||
-- |
|
||||
-- | (RFC Errata v2)
|
||||
-- | obs-body = *(d0 /text / LF / CR)
|
||||
-- Errata v1
|
||||
--obs_body :: forall e. Parser e String
|
||||
--obs_body = A.fold <$> A.many item
|
||||
--
|
||||
--obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS)
|
||||
--
|
||||
--obs-phrase = word *(word / "." / CFWS)
|
||||
-- Errata v2
|
||||
--obs_body :: forall e. Parser e String
|
||||
--obs_body = A.fold <$> do A.many (char_num 0 <|> vchar <|> lf <|> cr)
|
||||
--
|
||||
--obs_body original
|
||||
--obs_body :: forall e. Parser e String
|
||||
--obs_body = do A.many $ do _ <- A.many lf
|
||||
-- _ <- A.many cr
|
||||
-- v <- A.many $ do x <- char_num 0 <|> text
|
||||
-- _ <- A.many lf
|
||||
-- _ <- A.many cr
|
||||
-- pure x
|
||||
-- pure $ A.fold v
|
||||
-- <|> _ <- crlf
|
||||
-- pure ""
|
||||
|
||||
-- | TODO: `obs_unstruct`
|
||||
-- |
|
||||
-- | Note: implement the version found in the Errata page.
|
||||
-- |
|
||||
-- | obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS)
|
||||
-- |
|
||||
-- | (RFC Errata)
|
||||
-- | obs-unstruct = *( (*CR 1*(obs-utext / FWS)) / 1*LF ) *CR
|
||||
obs_unstruct :: forall e. Parser e String
|
||||
obs_unstruct = (CU.fromCharArray <<< A.fold) <$> A.many do _ <- A.many cr
|
||||
many1 (obs_utext <|> do _ <- fws
|
||||
pure ' ')
|
||||
<|> do _ <- fws
|
||||
pure ""
|
||||
|
||||
-- | `obs_phrase`: obsolete "phrase".
|
||||
-- |
|
||||
-- | obs-phrase = word *(word / "." / CFWS)
|
||||
obs_phrase :: forall e. Parser e String
|
||||
obs_phrase = do w <- word
|
||||
ws <- A.many (word <|> string "." <|> do _ <- cfws
|
||||
pure "")
|
||||
pure $ w <> A.fold ws
|
||||
|
||||
-- | `word`.
|
||||
-- |
|
||||
-- | word = atom / quoted-string
|
||||
word :: forall e. Parser e String
|
||||
word = atom <|> quoted_string
|
||||
|
||||
-- | `phrase`: list of words (at least one) or the obsolete phrase rule.
|
||||
-- |
|
||||
-- | phrase = 1*word / obs-phrase
|
||||
phrase :: forall e. Parser e String
|
||||
phrase = do ws <- many1 word
|
||||
pure $ A.fold ws
|
||||
<|> obs_phrase
|
||||
|
||||
--unstructured = (*([FWS] VCHAR) *WSP) / obs-unstruct
|
||||
|
||||
--obs-phrase-list = [phrase / CFWS] *("," [phrase / CFWS])
|
||||
|
||||
-- | `qtext`: printable US-ASCII characters not including "\" or the quote character.
|
||||
--
|
||||
-- | qtext = %d33 / %d35-91 / %d93-126 / obs-qtext
|
||||
qtext :: forall e. Parser e Char
|
||||
qtext = char_num 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext
|
||||
|
||||
-- | qcontent = qtext / quoted-pair
|
||||
-- |
|
||||
-- | qcontent = qtext / quoted-pair
|
||||
qcontent :: forall e. Parser e String
|
||||
qcontent = CU.singleton <$> qtext <|> quoted_pair
|
||||
|
||||
-- | `quoted_string`
|
||||
-- DQUOTE *([FWS] qcontent) [FWS] DQUOTE
|
||||
-- [CFWS]
|
||||
-- | quoted-string = [CFWS] DQUOTE *([FWS] qcontent) [FWS] DQUOTE [CFWS]
|
||||
quoted_string :: forall e. Parser e String
|
||||
quoted_string = do _ <- tryMaybe cfws
|
||||
_ <- char '"'
|
||||
s <- A.many $ do _ <- tryMaybe fws
|
||||
qcontent
|
||||
_ <- char '"'
|
||||
_ <- tryMaybe cfws
|
||||
pure $ "\"" <> A.fold s <> "\""
|
||||
|
||||
|
||||
-- | TODO: For now, `local_part` only checks that
|
||||
-- | (a) the first character is a letter,
|
||||
-- | (b) the last character is either a letter or a digit.
|
||||
|
|
Loading…
Reference in New Issue