diff --git a/src/GenericParser/EmailAddress.purs b/src/GenericParser/EmailAddress.purs index d35190a..3f2832e 100644 --- a/src/GenericParser/EmailAddress.purs +++ b/src/GenericParser/EmailAddress.purs @@ -2,29 +2,183 @@ -- | This module is experimental and doesn't follow every rule for an email address, yet. module GenericParser.EmailAddress where -import Prelude (bind, pure, ($), (<>)) +import Prelude (Unit, unit, bind, pure, ($), (<>), (==), (||), between) import Control.Alt ((<|>)) -import Data.Maybe (Maybe(..)) +import Data.Array as A +import Data.Char as C import Data.Either (Either(..)) +import Data.Maybe (Maybe(..)) import Data.String.CodeUnits as CU import GenericParser.DomainParser.Common (DomainError) import GenericParser.DomainParser (sub_eof) import GenericParser.Parser (Parser(..) - , char , digit , letter, item + , sat, char , digit , letter, item, many1, tryMaybe , current_input, failureError, parse, rollback, until) data EmailError = InvalidCharacter | InvalidDomain (Maybe DomainError) --- | TODO: For now, `login_part` only checks that +crlf :: forall e. Parser e Unit +crlf = do _ <- char '\r' + _ <- char '\n' + pure unit + +-- | WSP = a white space. +-- | +-- | TODO: I assumed it's just a space or tab. Verify and fix. +wsp :: Parser EmailError Char +wsp = char ' ' <|> char '\t' +--wsp = space -- in case we want any possible space value + +-- | obs-FWS = 1*WSP *(CRLF 1*WSP) +-- | +-- | Obsolete FWS. +obs_fws :: Parser EmailError Unit +obs_fws = do _ <- A.many wsp + _ <- A.many $ do _ <- crlf + _ <- many1 wsp + pure unit + pure unit + +-- FWS = ([*WSP CRLF] 1*WSP) / obs-FWS +-- ; Folding white space +-- In english: FWS is described as: +-- 1. an OPTIONAL line with potential white spaces followed by at least one white space +-- 2. or, by the obs-FWS rule (meaning: many empty lines) +fws :: Parser EmailError Unit +fws = do _ <- tryMaybe do _ <- A.many wsp + _ <- crlf + pure unit + _ <- many1 wsp + pure unit + <|> obs_fws + +-- ctext = %d33-39 / ; Printable US-ASCII +-- %d42-91 / ; characters not including +-- %d93-126 / ; "(", ")", or "\" +-- obs-ctext +ctext :: Parser EmailError Char +ctext = sat cond <|> obs_ctext + where cond x = let charcode = C.toCharCode x + in between 33 39 charcode + || between 42 91 charcode + || between 93 126 charcode + +-- ccontent = ctext / quoted-pair / comment +--ccontent :: Parser EmailError String +--ccontent = do + +-- comment = "(" *([FWS] ccontent) [FWS] ")" +--comment :: Parser EmailError String +--comment = do + +-- CFWS = (1*([FWS] comment) [FWS]) / FWS +--cfws :: Parser EmailError String +--cfws = do + +-- address = mailbox / group +--address :: Parser EmailError String +--address = do + +--mailbox = name-addr / addr-spec +-- +--name-addr = [display-name] angle-addr +-- +--angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / +-- obs-angle-addr +-- +--group = display-name ":" [group-list] ";" [CFWS] +-- +--display-name = phrase +-- +--mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list +-- +--address-list = (address *("," address)) / obs-addr-list +-- +--group-list = mailbox-list / CFWS / obs-group-list + + +-- addr-spec = local-part "@" domain +-- +-- local-part = dot-atom / quoted-string / obs-local-part +-- +-- domain = dot-atom / domain-literal / obs-domain +-- +-- domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] +-- +-- dtext = %d33-90 / ; Printable US-ASCII +-- %d94-126 / ; characters not including +-- obs-dtext ; "[", "]", or "\" +dtext :: forall e. Parser e Char +dtext = sat cond <|> obs_dtext + where cond x = let charcode = C.toCharCode x + in between 33 90 charcode || between 94 126 charcode + + +--obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS] +-- +--obs-route = obs-domain-list ":" +-- +--obs-domain-list = *(CFWS / ",") "@" domain +-- *("," [CFWS] ["@" domain]) +-- +--obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS]) +-- +--obs-addr-list = *([CFWS] ",") address *("," [address / CFWS]) +-- +--obs-group-list = 1*([CFWS] ",") [CFWS] +-- +--obs-local-part = word *("." word) +-- +--obs-domain = atom *("." atom) + +-- | TODO: Obsolete domain text. +-- | obs-dtext = obs-NO-WS-CTL / quoted-pair +obs_dtext :: forall e. Parser e Char +obs_dtext = obs_no_ws_ctl --<|> quoted_pair + +--obs-NO-WS-CTL = %d1-8 / ; US-ASCII control +-- %d11 / ; characters that do not +-- %d12 / ; include the carriage +-- %d14-31 / ; return, line feed, and +-- %d127 ; white space characters +obs_no_ws_ctl :: forall e. Parser e Char +obs_no_ws_ctl = sat cond + where cond x = let charcode = C.toCharCode x + in between 1 8 charcode + || between 11 12 charcode + || between 14 31 charcode + || charcode == 127 + +-- | obs-ctext = obs-NO-WS-CTL +obs_ctext :: forall e. Parser e Char +obs_ctext = obs_no_ws_ctl + +-- | obs-qtext = obs-NO-WS-CTL +obs_qtext :: forall e. Parser e Char +obs_qtext = obs_no_ws_ctl + +--obs-utext = %d0 / obs-NO-WS-CTL / VCHAR +-- +--obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR) +-- +--obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF) +-- +--obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS) +-- +--obs-phrase = word *(word / "." / CFWS) +-- +--obs-phrase-list = [phrase / CFWS] *("," [phrase / CFWS]) + +-- | TODO: For now, `local_part` only checks that -- | (a) the first character is a letter, -- | (b) the last character is either a letter or a digit. -- | The rest can be any letter, digit, '-' or '.'. -login_part :: Parser EmailError String -login_part = do firstchar <- letter +local_part :: Parser EmailError String +local_part = do firstchar <- letter rest <- until end (letter <|> digit <|> char '-' <|> char '.') lastchar <- letter <|> digit pure $ CU.fromCharArray $ [firstchar] <> rest <> [lastchar] @@ -36,7 +190,7 @@ login_part = do firstchar <- letter -- | `email` is the parser for email addresses. email :: Parser EmailError String -email = do login <- login_part +email = do login <- local_part _ <- char '@' input <- current_input case parse sub_eof input of diff --git a/src/GenericParser/IPAddress.purs b/src/GenericParser/IPAddress.purs index b6ecfa7..11fa362 100644 --- a/src/GenericParser/IPAddress.purs +++ b/src/GenericParser/IPAddress.purs @@ -47,6 +47,7 @@ ipv6_chunk'' :: Parser IPv6Error String ipv6_chunk'' = do _ <- char ':' ipv6_chunk +-- | `ipv6_full''` parses a representation without shortcuts ("::"). ipv6_full :: Parser IPv6Error String ipv6_full = do chunks <- many1 ipv6_chunk' pos <- current_position diff --git a/test/Main.purs b/test/Main.purs index 9a0d8d2..5467b66 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -125,6 +125,8 @@ main = do log "" test_ipv6 "2001:0" + test_ipv6 "2001::x:0" + test_ipv6 "2001:x::0" test_ipv6 "2001::0" test_ipv6 "2001::1:" test_ipv6 "::"