diff --git a/src/GenericParser/EmailAddress.purs b/src/GenericParser/EmailAddress.purs index eaa0711..e04ea6f 100644 --- a/src/GenericParser/EmailAddress.purs +++ b/src/GenericParser/EmailAddress.purs @@ -1,5 +1,9 @@ --- | `EmailAddress` is a parser for simple email addresses. --- | This module is experimental and doesn't follow every rule for an email address, yet. +-- | `EmailAddress` is a parser for email addresses, implementing the grammar found in RFC5322. +-- | +-- | STATUS: the parser mostly works, except for comments. +-- | Comments provoke a stack overflow, this must be investigated. +-- | +-- | Also, the parser needs a thorough review. module GenericParser.EmailAddress where import Prelude ((<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, unit) @@ -28,7 +32,7 @@ data EmailError -- | obs-FWS: obsolete folding white space. -- | --- | obs-FWS = 1*WSP *(CRLF 1*WSP) +-- | obs-FWS = `1*WSP *(CRLF 1*WSP)` obs_fws :: forall e. Parser e String obs_fws = do x <- A.many wsp xs <- A.many $ do v <- crlf @@ -40,7 +44,7 @@ obs_fws = do x <- A.many wsp -- | 1. an OPTIONAL line with potential white spaces followed by at least one white space -- | 2. or, by the obs-FWS rule (meaning: many empty lines) -- | --- | FWS = ([*WSP CRLF] 1*WSP) / obs-FWS +-- | FWS = `([*WSP CRLF] 1*WSP) / obs-FWS` fws :: forall e. Parser e String fws = do x <- tryMaybe do xs <- A.many wsp v <- crlf @@ -53,10 +57,7 @@ fws = do x <- tryMaybe do xs <- A.many wsp -- | ctext: comment text, meaning printable US-ASCII characters excluding '(', ')' and '\'. -- | --- | ctext = %d33-39 / ; Printable US-ASCII --- | %d42-91 / ; characters not including --- | %d93-126 / ; "(", ")", or "\" --- | obs-ctext +-- | ctext = `%d33-39 / %d42-91 / %d93-126 / obs-ctext` ctext :: forall e. Parser e Char ctext = sat cond <|> obs_ctext where cond x = let charcode = C.toCharCode x @@ -66,7 +67,7 @@ ctext = sat cond <|> obs_ctext -- | `quoted_pair`: pair of characters. -- | --- | quoted-pair = ("\" (VCHAR / WSP)) / obs-qp +-- | quoted-pair = `("\" (VCHAR / WSP)) / obs-qp` quoted_pair :: forall e. Parser e String quoted_pair = do _ <- char '\\' v <- vchar <|> wsp @@ -75,13 +76,13 @@ quoted_pair = do _ <- char '\\' -- | Comment content. -- | --- | ccontent = ctext / quoted-pair / comment +-- | ccontent = `ctext / quoted-pair / comment` ccontent :: forall e. Parser e String ccontent = CU.singleton <$> ctext <|> quoted_pair <|> defer \_ -> comment -- | Comment. Nothing to return since comments aren't to be processed. -- | --- | comment = "(" *([FWS] ccontent) [FWS] ")" +-- | comment = `"(" *([FWS] ccontent) [FWS] ")"` comment :: forall e. Parser e String comment = do _ <- char '(' xs <- A.many do _ <- A.many fws @@ -91,7 +92,7 @@ comment = do _ <- char '(' -- | CFWS: comment folding white space. -- | --- | CFWS = (1*([FWS] comment) [FWS]) / FWS +-- | CFWS = `(1*([FWS] comment) [FWS]) / FWS` cfws :: forall e. Parser e String cfws = do xs <- many1 $ do _ <- tryMaybe fws comment @@ -100,26 +101,26 @@ cfws = do xs <- many1 $ do _ <- tryMaybe fws -- | `address`: email address. -- | --- | address = mailbox / group +-- | address = `mailbox / group` address :: forall e. Parser e String address = mailbox <|> group -- | `mailbox`: mail address. -- | --- | mailbox = name-addr / addr-spec +-- | mailbox = `name-addr / addr-spec` mailbox :: forall e. Parser e String mailbox = name_addr <|> addr_spec -- | `name_addr`: address name. -- | --- | name-addr = [display-name] angle-addr +-- | name-addr = `[display-name] angle-addr` name_addr :: forall e. Parser e String name_addr = do _ <- tryMaybe display_name angle_addr -- | `angle_addr` address specification between '<' and '>' characters. -- | --- | angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr +-- | angle-addr = `[CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr` angle_addr :: forall e. Parser e String angle_addr = do _ <- tryMaybe cfws _ <- char '<' @@ -131,7 +132,7 @@ angle_addr = do _ <- tryMaybe cfws -- | `group`: a list of email addresses. -- | --- | group = display-name ":" [group-list] ";" [CFWS] +-- | group = `display-name ":" [group-list] ";" [CFWS]` group :: forall e. Parser e String group = do _ <- display_name _ <- char ':' @@ -143,13 +144,13 @@ group = do _ <- display_name -- | `display_name`: displayed name, not the actual email address. -- | --- | display-name = phrase +-- | display-name = `phrase` display_name :: forall e. Parser e String display_name = phrase -- | `mailbox_list` -- | --- | mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list +-- | mailbox-list = `(mailbox *("," mailbox)) / obs-mbox-list` mailbox_list :: forall e. Parser e String mailbox_list = do mb <- mailbox xs <- A.many $ do _ <- char ',' @@ -160,7 +161,7 @@ mailbox_list = do mb <- mailbox -- | `address_list` -- | --- | address-list = (address *("," address)) / obs-addr-list +-- | address-list = `(address *("," address)) / obs-addr-list` address_list :: forall e. Parser e String address_list = do a <- address xs <- A.many do _ <- char ',' @@ -170,24 +171,16 @@ address_list = do a <- address -- | `group_list` -- | --- | group-list = mailbox-list / CFWS / obs-group-list +-- | group-list = `mailbox-list / CFWS / obs-group-list` group_list :: forall e. Parser e String group_list = mailbox_list <|> cfws <|> obs_group_list -- | `atext`: atom accepted characters. +-- | Printable US-ASCII characters not including specials. -- | --- | atext = ALPHA / DIGIT / ; Printable US-ASCII --- | "!" / "#" / ; characters not including --- | "$" / "%" / ; specials. Used for atoms. --- | "&" / "'" / --- | "*" / "+" / --- | "-" / "/" / --- | "=" / "?" / --- | "^" / "_" / --- | "`" / "{" / --- | "|" / "}" / --- | "~" +-- | atext = `ALPHA / DIGIT / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" /` +-- | `"-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}" / "~"` atext :: forall e. Parser e Char atext = alphanum <|> char '!' <|> char '#' @@ -204,7 +197,7 @@ atext = alphanum -- | `atom` -- | --- | atom = [CFWS] 1*atext [CFWS] +-- | atom = `[CFWS] 1*atext [CFWS]` atom :: forall e. Parser e String atom = CU.fromCharArray <$> do _ <- tryMaybe cfws a <- many1 atext @@ -213,7 +206,7 @@ atom = CU.fromCharArray <$> do _ <- tryMaybe cfws -- | `dot_atom_text` -- | --- | dot-atom-text = 1*atext *("." 1*atext) +-- | dot-atom-text = `1*atext *("." 1*atext)` dot_atom_text :: forall e. Parser e String dot_atom_text = do xs0 <- many1 atext xs1 <- A.many $ do _ <- char '.' @@ -225,7 +218,7 @@ dot_atom_text = do xs0 <- many1 atext -- | `dot_atom` -- | --- | dot-atom = [CFWS] dot-atom-text [CFWS] +-- | dot-atom = `[CFWS] dot-atom-text [CFWS]` dot_atom :: forall e. Parser e String dot_atom = do _ <- tryMaybe cfws x <- dot_atom_text @@ -234,8 +227,8 @@ dot_atom = do _ <- tryMaybe cfws -- | `specials`: special characters that do not appear in `atext`. -- | --- | specials = "(" / ")" / "<" / ">" / "[" / "]" / ":" / ";" / "@" / --- | "\" / "," / "." / DQUOTE +-- | specials = `"(" / ")" / "<" / ">" / "[" / "]" / ":" / ";" / "@" /` +-- | `"\" / "," / "." / DQUOTE` specials :: forall e. Parser e Char specials = char '(' <|> char ')' @@ -251,10 +244,9 @@ specials = char '(' <|> char '.' <|> char '"' - -- | `addr_spec` -- | --- | addr-spec = local-part "@" domain +-- | addr-spec = `local-part "@" domain` addr_spec :: forall e. Parser e String addr_spec = do lpart <- local_part _ <- char '@' @@ -263,56 +255,58 @@ addr_spec = do lpart <- local_part -- | `local_part` -- | --- | local-part = dot-atom / quoted-string / obs-local-part +-- | local-part = `dot-atom / quoted-string / obs-local-part` local_part :: forall e. Parser e String local_part = dot_atom <|> quoted_string <|> obs_local_part - --- | `domain` +-- | `domain`: this is a parser for a domain. +-- | The parser from `GenericParser.DomainParser` cannot be used, these parsers are different. +-- | `DomainParser` is more strict, it doesn't allow comments or IP addresses for example. +-- | Therefore, errors are differents from a parser to another. -- | --- | domain = dot-atom / domain-literal / obs-domain +-- | domain = `dot-atom / domain-literal / obs-domain` domain :: forall e. Parser e String domain = dot_atom <|> domain_literal <|> obs_domain -- | `domain_literal` -- | --- | domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] +-- | domain-literal = `[CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]` domain_literal :: forall e. Parser e String -domain_literal = do _ <- tryMaybe cfws +domain_literal = do s <- tryMaybe cfws _ <- char '[' xs <- A.many do _ <- tryMaybe fws dtext - _ <- tryMaybe fws + m <- tryMaybe fws _ <- char ']' - _ <- tryMaybe cfws - pure $ "[" <> A.fold xs <> "]" + e <- tryMaybe cfws + pure $ maybe "" id s + <> "[" <> A.fold xs <> maybe "" id m <> "]" + <> maybe "" id e -- | dtext: characters in domains. +-- | Printable US-ASCII characters not including "[", "]", or "\". -- | --- | dtext = %d33-90 / ; Printable US-ASCII --- | %d94-126 / ; characters not including --- | obs-dtext ; "[", "]", or "\" +-- | dtext = `%d33-90 / %d94-126 / obs-dtext` dtext :: forall e. Parser e String dtext = CU.singleton <$> sat cond <|> obs_dtext where cond x = let charcode = C.toCharCode x in between 33 90 charcode || between 94 126 charcode - -- | `obs_angle_addr`: obsolete address specification between '<' and '>' characters. -- | --- | obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS] +-- | obs-angle-addr = `[CFWS] "<" obs-route addr-spec ">" [CFWS]` obs_angle_addr :: forall e. Parser e String -obs_angle_addr = do _ <- tryMaybe cfws +obs_angle_addr = do s <- tryMaybe cfws _ <- char '<' r <- obs_route a <- addr_spec _ <- char '>' - _ <- tryMaybe cfws - pure $ r <> a + e <- tryMaybe cfws + pure $ maybe "" id s <> "<" <> r <> a <> ">" <> maybe "" id e -- | `obs_route` -- | --- | obs-route = obs-domain-list ":" +-- | obs-route = `obs-domain-list ":"` obs_route :: forall e. Parser e String obs_route = do l <- obs_domain_list _ <- char ':' @@ -320,16 +314,21 @@ obs_route = do l <- obs_domain_list -- | `obs_domain_list` -- | --- | obs-domain-list = *(CFWS / ",") "@" domain --- | *("," [CFWS] ["@" domain]) +-- | obs-domain-list = `*(CFWS / ",") "@" domain *("," [CFWS] ["@" domain])` obs_domain_list :: forall e. Parser e String -obs_domain_list = do _ <- A.many $ cfws <|> CU.singleton <$> char ',' +obs_domain_list = do s <- A.many $ cfws <|> CU.singleton <$> char ',' _ <- char '@' - domain + d <- domain + xs <- A.many do _ <- char ',' + c <- tryMaybe cfws + _ <- char '@' + d2 <- domain + pure $ "," <> maybe "" id c <> d2 + pure $ A.fold s <> d <> A.fold xs -- | `obs_mbox_list` -- | --- | obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS]) +-- | obs-mbox-list = `*([CFWS] ",") mailbox *("," [mailbox / CFWS])` obs_mbox_list :: forall e. Parser e String obs_mbox_list = do _ <- A.many $ do _ <- cfws _ <- char ',' @@ -343,7 +342,7 @@ obs_mbox_list = do _ <- A.many $ do _ <- cfws -- | `obs_addr_list` -- | --- | obs-addr-list = *([CFWS] ",") address *("," [address / CFWS]) +-- | obs-addr-list = `*([CFWS] ",") address *("," [address / CFWS])` obs_addr_list :: forall e. Parser e String obs_addr_list = do _ <- A.many do _ <- tryMaybe cfws char ',' @@ -356,7 +355,7 @@ obs_addr_list = do _ <- A.many do _ <- tryMaybe cfws -- | `obs_group_list` -- | --- | obs-group-list = 1*([CFWS] ",") [CFWS] +-- | obs-group-list = `1*([CFWS] ",") [CFWS]` obs_group_list :: forall e. Parser e String obs_group_list = do xs <- many1 $ do x <- tryMaybe cfws _ <- char ',' @@ -366,7 +365,7 @@ obs_group_list = do xs <- many1 $ do x <- tryMaybe cfws -- | `obs_local_part` -- | --- | obs-local-part = word *("." word) +-- | obs-local-part = `word *("." word)` obs_local_part :: forall e. Parser e String obs_local_part = do w <- word ws <- A.many $ do _ <- char '.' @@ -376,7 +375,7 @@ obs_local_part = do w <- word -- | `obs_domain` -- | --- | obs-domain = atom *("." atom) +-- | obs-domain = `atom *("." atom)` obs_domain :: forall e. Parser e String obs_domain = do a <- atom xs <- A.many $ do _ <- char '.' @@ -386,14 +385,14 @@ obs_domain = do a <- atom -- | `obs_dtext`: obsolete domain text. -- | --- | obs-dtext = obs-NO-WS-CTL / quoted-pair +-- | obs-dtext = `obs-NO-WS-CTL / quoted-pair` obs_dtext :: forall e. Parser e String obs_dtext = CU.singleton <$> obs_no_ws_ctl <|> quoted_pair -- | obs-NO-WS-CTL: US-ASCII control characters without carriage return, -- | line feed and white space characters. -- | --- | obs-NO-WS-CTL = %d1-8 / ; US-ASCII control +-- | obs-NO-WS-CTL = `%d1-8 / ; US-ASCII control` -- | %d11 / ; characters that do not -- | %d12 / ; include the carriage -- | %d14-31 / ; return, line feed, and @@ -408,25 +407,25 @@ obs_no_ws_ctl = sat cond -- | obs-ctext: obsolete comment text. -- | --- | obs-ctext = obs-NO-WS-CTL +-- | obs-ctext = `obs-NO-WS-CTL` obs_ctext :: forall e. Parser e Char obs_ctext = obs_no_ws_ctl -- | `obs_qtext`: obsolete accepted quoted text. -- | --- | obs-qtext = obs-NO-WS-CTL +-- | obs-qtext = `obs-NO-WS-CTL` obs_qtext :: forall e. Parser e Char obs_qtext = obs_no_ws_ctl -- | `obs_utext`: obsolete text. -- | --- | obs-utext = %d0 / obs-NO-WS-CTL / VCHAR +-- | obs-utext = `%d0 / obs-NO-WS-CTL / VCHAR` obs_utext :: forall e. Parser e Char obs_utext = char_num 0 <|> obs_no_ws_ctl <|> vchar -- | `obs_qp`: obsolete quoted-pair rule. -- | --- | obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR) +-- | obs-qp = `"\" (%d0 / obs-NO-WS-CTL / LF / CR)` obs_qp :: forall e. Parser e String obs_qp = do _ <- char '\\' v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr @@ -439,13 +438,13 @@ obs_qp = do _ <- char '\\' -- | Note: `text` is replaced by `vchar`. -- | -- | (RFC) --- | obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF) +-- | obs-body = `*((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)` -- | -- | (RFC Errata v1) --- | obs-body = *(%d0-127) +-- | obs-body = `*(%d0-127)` -- | -- | (RFC Errata v2) --- | obs-body = *(d0 /text / LF / CR) +-- | obs-body = `*(d0 /text / LF / CR)` -- Errata v1 --obs_body :: forall e. Parser e String --obs_body = CU.fromCharArray <$> A.many item @@ -470,10 +469,10 @@ obs_body = CU.fromCharArray <$> do A.many (char_num 0 <|> vchar <|> lf <|> cr) -- | -- | Note: implement the version found in the Errata page. -- | --- | obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS) +-- | obs-unstruct = `*((*LF *CR *(obs-utext *LF *CR)) / FWS)` -- | -- | (RFC Errata) --- | obs-unstruct = *( (*CR 1*(obs-utext / FWS)) / 1*LF ) *CR +-- | obs-unstruct = `*( (*CR 1*(obs-utext / FWS)) / 1*LF ) *CR` obs_unstruct :: forall e. Parser e String obs_unstruct = (CU.fromCharArray <<< A.fold) <$> A.many do _ <- A.many cr many1 (obs_utext <|> do _ <- fws @@ -483,7 +482,7 @@ obs_unstruct = (CU.fromCharArray <<< A.fold) <$> A.many do _ <- A.many cr -- | `obs_phrase`: obsolete "phrase". -- | --- | obs-phrase = word *(word / "." / CFWS) +-- | obs-phrase = `word *(word / "." / CFWS)` obs_phrase :: forall e. Parser e String obs_phrase = do w <- word ws <- A.many (word <|> string "." <|> do _ <- cfws @@ -492,13 +491,13 @@ obs_phrase = do w <- word -- | `word`. -- | --- | word = atom / quoted-string +-- | word = `atom / quoted-string` word :: forall e. Parser e String word = atom <|> quoted_string -- | `phrase`: list of words (at least one) or the obsolete phrase rule. -- | --- | phrase = 1*word / obs-phrase +-- | phrase = `1*word / obs-phrase` phrase :: forall e. Parser e String phrase = do ws <- many1 word pure $ A.fold ws @@ -506,7 +505,7 @@ phrase = do ws <- many1 word -- | `unstructured` -- | --- | unstructured = (*([FWS] VCHAR) *WSP) / obs-unstruct +-- | unstructured = `(*([FWS] VCHAR) *WSP) / obs-unstruct` unstructured :: forall e. Parser e String unstructured = do v <- A.many $ do _ <- fws vchar @@ -517,7 +516,7 @@ unstructured = do v <- A.many $ do _ <- fws -- | `obs_phrase_list`: obsolete list of phrases. -- | --- | obs-phrase-list = [phrase / CFWS] *("," [phrase / CFWS]) +-- | obs-phrase-list = `[phrase / CFWS] *("," [phrase / CFWS])` obs_phrase_list :: forall e. Parser e String obs_phrase_list = do first_phrase <- phrase_or_cfws xs <- A.many $ do _ <- char ',' @@ -530,29 +529,31 @@ obs_phrase_list = do first_phrase <- phrase_or_cfws Just x -> pure x -- | `qtext`: printable US-ASCII characters not including "\" or the quote character. --- --- | qtext = %d33 / %d35-91 / %d93-126 / obs-qtext +-- | +-- | WARNING: contrary to the RFC, the `qtext` rule accepts a space character (`%d32`). +-- | +-- | qtext = `%d33 / %d35-91 / %d93-126 / obs-qtext` qtext :: forall e. Parser e Char qtext = char_range 32 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext --- | qcontent = qtext / quoted-pair +-- | `qcontent` -- | --- | qcontent = qtext / quoted-pair +-- | qcontent = `qtext / quoted-pair` qcontent :: forall e. Parser e String qcontent = CU.singleton <$> qtext <|> quoted_pair -- | `quoted_string` -- | --- | quoted-string = [CFWS] DQUOTE *([FWS] qcontent) [FWS] DQUOTE [CFWS] +-- | quoted-string = `[CFWS] DQUOTE *([FWS] qcontent) [FWS] DQUOTE [CFWS]` quoted_string :: forall e. Parser e String -quoted_string = do _ <- tryMaybe cfws +quoted_string = do s <- tryMaybe cfws _ <- char '"' - s <- A.many $ do xs <- tryMaybe fws + m <- A.many $ do xs <- tryMaybe fws c <- qcontent pure $ maybe "" id xs <> c _ <- char '"' - _ <- tryMaybe cfws - pure $ "\"" <> A.fold s <> "\"" + e <- tryMaybe cfws + pure $ maybe "" id s <> "\"" <> A.fold m <> "\"" <> maybe "" id e -- | TODO: For now, `local_part` only checks that diff --git a/test/Main.purs b/test/Main.purs index abf686d..ac67d66 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -83,3 +83,8 @@ main = do log "" test_series "E.address (short)" E.address id showerror_email T.valid_email_addresses_short + + log "" + let spaces = [ """" """", """ " """" ] + test_series "E.quoted_string (short)" E.quoted_string id showerror_email spaces + test_series "E.qcontent (short)" E.qcontent id showerror_email spaces