From ef336bd2de2f97eb8b85a7ea6b134b50ef26b289 Mon Sep 17 00:00:00 2001 From: Philippe Pittoli Date: Sat, 27 Jan 2024 04:46:15 +0100 Subject: [PATCH] WIP RFC5234 (augmented bnf) and RFC5322 (email addresses). --- src/GenericParser/EmailAddress.purs | 118 +++++++++++++++++++++++----- 1 file changed, 98 insertions(+), 20 deletions(-) diff --git a/src/GenericParser/EmailAddress.purs b/src/GenericParser/EmailAddress.purs index 3f2832e..331f50d 100644 --- a/src/GenericParser/EmailAddress.purs +++ b/src/GenericParser/EmailAddress.purs @@ -2,7 +2,7 @@ -- | This module is experimental and doesn't follow every rule for an email address, yet. module GenericParser.EmailAddress where -import Prelude (Unit, unit, bind, pure, ($), (<>), (==), (||), between) +import Prelude (Unit, unit, bind, pure, ($), (<>), (==), (||), between, void) import Control.Alt ((<|>)) import Data.Array as A @@ -21,22 +21,83 @@ data EmailError = InvalidCharacter | InvalidDomain (Maybe DomainError) +-- | RFC 5234: + +--ALPHA = %x41-5A / %x61-7A ; A-Z / a-z +-- +--BIT = "0" / "1" +-- +--CHAR = %x01-7F +-- ; any 7-bit US-ASCII character, +-- ; excluding NUL +-- +--CR = %x0D +-- ; carriage return +-- +--CRLF = CR LF +-- ; Internet standard newline +-- +--CTL = %x00-1F / %x7F +-- ; controls +-- +--DIGIT = %x30-39 +-- ; 0-9 +-- +--DQUOTE = %x22 +-- ; " (Double Quote) +-- +--HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" + +-- | HTAB: horizontal tab. +-- | HTAB = %x09 +htab :: forall e. Parser e Char +htab = char '\t' + +--LF = %x0A +-- ; linefeed +-- +--LWSP = *(WSP / CRLF WSP) +-- ; Use of this linear-white-space rule +-- ; permits lines containing only white +-- ; space that are no longer legal in +-- ; mail headers and have caused +-- ; interoperability problems in other +-- ; contexts. +-- ; Do not use when defining mail +-- ; headers and use with caution in +-- ; other contexts. +-- +--OCTET = %x00-FF +-- ; 8 bits of data +-- +-- | SP: space. +-- | +-- | SP = %x20 +sp :: forall e. Parser e Char +sp = char ' ' + +-- | VCHAR = %x21-7E +-- | +-- | Visible printing characters. +vchar :: forall e. Parser e Char +vchar = sat (\x -> between 33 126 $ C.toCharCode x) + +-- | WSP: white space. +-- | +-- | WSP = SP / HTAB +wsp :: forall e. Parser e Char +wsp = sp <|> htab + + crlf :: forall e. Parser e Unit crlf = do _ <- char '\r' _ <- char '\n' pure unit --- | WSP = a white space. --- | --- | TODO: I assumed it's just a space or tab. Verify and fix. -wsp :: Parser EmailError Char -wsp = char ' ' <|> char '\t' ---wsp = space -- in case we want any possible space value - -- | obs-FWS = 1*WSP *(CRLF 1*WSP) -- | -- | Obsolete FWS. -obs_fws :: Parser EmailError Unit +obs_fws :: forall e. Parser e Unit obs_fws = do _ <- A.many wsp _ <- A.many $ do _ <- crlf _ <- many1 wsp @@ -48,7 +109,7 @@ obs_fws = do _ <- A.many wsp -- In english: FWS is described as: -- 1. an OPTIONAL line with potential white spaces followed by at least one white space -- 2. or, by the obs-FWS rule (meaning: many empty lines) -fws :: Parser EmailError Unit +fws :: forall e. Parser e Unit fws = do _ <- tryMaybe do _ <- A.many wsp _ <- crlf pure unit @@ -60,27 +121,44 @@ fws = do _ <- tryMaybe do _ <- A.many wsp -- %d42-91 / ; characters not including -- %d93-126 / ; "(", ")", or "\" -- obs-ctext -ctext :: Parser EmailError Char +ctext :: forall e. Parser e Char ctext = sat cond <|> obs_ctext where cond x = let charcode = C.toCharCode x in between 33 39 charcode || between 42 91 charcode || between 93 126 charcode --- ccontent = ctext / quoted-pair / comment ---ccontent :: Parser EmailError String ---ccontent = do +-- | TODO +quoted_pair :: forall e. Parser e Char +quoted_pair = char ' ' --- comment = "(" *([FWS] ccontent) [FWS] ")" ---comment :: Parser EmailError String ---comment = do +-- | ccontent = ctext / quoted-pair / comment +-- | +-- | Comment content. TODO +ccontent :: forall e. Parser e Unit +ccontent = many_ctext <|> a_quoted_pair <|> comment + where many_ctext :: Parser e Unit + many_ctext = do void $ A.many ctext + a_quoted_pair :: Parser e Unit + a_quoted_pair = do void quoted_pair + +-- | comment = "(" *([FWS] ccontent) [FWS] ")" +-- | +-- | Comment. Nothing to return. +comment :: forall e. Parser e Unit +comment = do _ <- char '(' + _ <- A.many (do _ <- A.many fws + _ <- ccontent + pure unit) + _ <- char ')' + pure unit -- CFWS = (1*([FWS] comment) [FWS]) / FWS ---cfws :: Parser EmailError String +--cfws :: forall e. Parser e String --cfws = do -- address = mailbox / group ---address :: Parser EmailError String +--address :: forall e. Parser e String --address = do --mailbox = name-addr / addr-spec @@ -138,7 +216,7 @@ dtext = sat cond <|> obs_dtext -- | TODO: Obsolete domain text. -- | obs-dtext = obs-NO-WS-CTL / quoted-pair obs_dtext :: forall e. Parser e Char -obs_dtext = obs_no_ws_ctl --<|> quoted_pair +obs_dtext = obs_no_ws_ctl <|> quoted_pair --obs-NO-WS-CTL = %d1-8 / ; US-ASCII control -- %d11 / ; characters that do not