Email: a few more rules.
This commit is contained in:
		
							parent
							
								
									82056ba5b9
								
							
						
					
					
						commit
						e2a919d78c
					
				
					 3 changed files with 164 additions and 7 deletions
				
			
		|  | @ -2,29 +2,183 @@ | |||
| -- | This module is experimental and doesn't follow every rule for an email address, yet. | ||||
| module GenericParser.EmailAddress where | ||||
| 
 | ||||
| import Prelude (bind, pure, ($), (<>)) | ||||
| import Prelude (Unit, unit, bind, pure, ($), (<>), (==), (||), between) | ||||
| 
 | ||||
| import Control.Alt ((<|>)) | ||||
| import Data.Maybe (Maybe(..)) | ||||
| import Data.Array as A | ||||
| import Data.Char as C | ||||
| import Data.Either (Either(..)) | ||||
| import Data.Maybe (Maybe(..)) | ||||
| import Data.String.CodeUnits as CU | ||||
| 
 | ||||
| import GenericParser.DomainParser.Common (DomainError) | ||||
| import GenericParser.DomainParser (sub_eof) | ||||
| import GenericParser.Parser (Parser(..) | ||||
|                             , char , digit , letter, item | ||||
|                             , sat, char , digit , letter, item, many1, tryMaybe | ||||
|                             , current_input, failureError, parse, rollback, until) | ||||
| 
 | ||||
| data EmailError | ||||
|   = InvalidCharacter | ||||
|   | InvalidDomain (Maybe DomainError) | ||||
| 
 | ||||
| -- | TODO: For now, `login_part` only checks that | ||||
| crlf :: forall e. Parser e Unit | ||||
| crlf = do _ <- char '\r' | ||||
|           _ <- char '\n' | ||||
|           pure unit | ||||
| 
 | ||||
| -- | WSP = a white space. | ||||
| -- | | ||||
| -- | TODO: I assumed it's just a space or tab. Verify and fix. | ||||
| wsp :: Parser EmailError Char | ||||
| wsp = char ' ' <|> char '\t' | ||||
| --wsp = space -- in case we want any possible space value | ||||
| 
 | ||||
| -- | obs-FWS         =   1*WSP *(CRLF 1*WSP) | ||||
| -- | | ||||
| -- | Obsolete FWS. | ||||
| obs_fws :: Parser EmailError Unit | ||||
| obs_fws = do _ <- A.many wsp | ||||
|              _ <- A.many $ do _ <- crlf | ||||
|                               _ <- many1 wsp | ||||
|                               pure unit | ||||
|              pure unit | ||||
| 
 | ||||
| -- FWS             =   ([*WSP CRLF] 1*WSP) /  obs-FWS | ||||
| --                     ; Folding white space | ||||
| -- In english: FWS is described as: | ||||
| --   1. an OPTIONAL line with potential white spaces followed by at least one white space | ||||
| --   2. or, by the obs-FWS rule (meaning: many empty lines) | ||||
| fws :: Parser EmailError Unit | ||||
| fws = do _ <- tryMaybe do _ <- A.many wsp | ||||
|                           _ <- crlf | ||||
|                           pure unit | ||||
|          _ <- many1 wsp | ||||
|          pure unit | ||||
|        <|> obs_fws | ||||
| 
 | ||||
| -- ctext           =   %d33-39 /          ; Printable US-ASCII | ||||
| --                     %d42-91 /          ;  characters not including | ||||
| --                     %d93-126 /         ;  "(", ")", or "\" | ||||
| --                     obs-ctext | ||||
| ctext :: Parser EmailError Char | ||||
| ctext = sat cond <|> obs_ctext | ||||
|   where cond x = let charcode = C.toCharCode x | ||||
|                  in  between 33 39 charcode | ||||
|                    || between 42 91 charcode | ||||
|                    || between 93 126 charcode | ||||
| 
 | ||||
| -- ccontent        =   ctext / quoted-pair / comment | ||||
| --ccontent :: Parser EmailError String | ||||
| --ccontent = do | ||||
| 
 | ||||
| -- comment         =   "(" *([FWS] ccontent) [FWS] ")" | ||||
| --comment :: Parser EmailError String | ||||
| --comment = do | ||||
| 
 | ||||
| -- CFWS            =   (1*([FWS] comment) [FWS]) / FWS | ||||
| --cfws :: Parser EmailError String | ||||
| --cfws = do | ||||
| 
 | ||||
| -- address         =   mailbox / group | ||||
| --address :: Parser EmailError String | ||||
| --address = do | ||||
| 
 | ||||
| --mailbox         =   name-addr / addr-spec | ||||
| -- | ||||
| --name-addr       =   [display-name] angle-addr | ||||
| -- | ||||
| --angle-addr      =   [CFWS] "<" addr-spec ">" [CFWS] / | ||||
| --                    obs-angle-addr | ||||
| -- | ||||
| --group           =   display-name ":" [group-list] ";" [CFWS] | ||||
| -- | ||||
| --display-name    =   phrase | ||||
| -- | ||||
| --mailbox-list    =   (mailbox *("," mailbox)) / obs-mbox-list | ||||
| -- | ||||
| --address-list    =   (address *("," address)) / obs-addr-list | ||||
| -- | ||||
| --group-list      =   mailbox-list / CFWS / obs-group-list | ||||
| 
 | ||||
| 
 | ||||
| -- addr-spec       =   local-part "@" domain | ||||
| -- | ||||
| -- local-part      =   dot-atom / quoted-string / obs-local-part | ||||
| -- | ||||
| -- domain          =   dot-atom / domain-literal / obs-domain | ||||
| -- | ||||
| -- domain-literal  =   [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] | ||||
| -- | ||||
| -- dtext           =   %d33-90 /          ; Printable US-ASCII | ||||
| --                     %d94-126 /         ;  characters not including | ||||
| --                     obs-dtext          ;  "[", "]", or "\" | ||||
| dtext :: forall e. Parser e Char | ||||
| dtext = sat cond <|> obs_dtext | ||||
|   where cond x = let charcode = C.toCharCode x | ||||
|                  in  between 33 90 charcode || between 94 126 charcode | ||||
| 
 | ||||
| 
 | ||||
| --obs-angle-addr  =   [CFWS] "<" obs-route addr-spec ">" [CFWS] | ||||
| -- | ||||
| --obs-route       =   obs-domain-list ":" | ||||
| -- | ||||
| --obs-domain-list =   *(CFWS / ",") "@" domain | ||||
| --                    *("," [CFWS] ["@" domain]) | ||||
| -- | ||||
| --obs-mbox-list   =   *([CFWS] ",") mailbox *("," [mailbox / CFWS]) | ||||
| -- | ||||
| --obs-addr-list   =   *([CFWS] ",") address *("," [address / CFWS]) | ||||
| -- | ||||
| --obs-group-list  =   1*([CFWS] ",") [CFWS] | ||||
| -- | ||||
| --obs-local-part  =   word *("." word) | ||||
| -- | ||||
| --obs-domain      =   atom *("." atom) | ||||
| 
 | ||||
| -- | TODO: Obsolete domain text. | ||||
| -- | obs-dtext       =   obs-NO-WS-CTL / quoted-pair | ||||
| obs_dtext :: forall e. Parser e Char | ||||
| obs_dtext = obs_no_ws_ctl --<|> quoted_pair | ||||
| 
 | ||||
| --obs-NO-WS-CTL   =   %d1-8 /            ; US-ASCII control | ||||
| --                    %d11 /             ;  characters that do not | ||||
| --                    %d12 /             ;  include the carriage | ||||
| --                    %d14-31 /          ;  return, line feed, and | ||||
| --                    %d127              ;  white space characters | ||||
| obs_no_ws_ctl :: forall e. Parser e Char | ||||
| obs_no_ws_ctl = sat cond | ||||
|   where cond x = let charcode = C.toCharCode x | ||||
|                  in  between 1 8 charcode | ||||
|                    || between 11 12 charcode | ||||
|                    || between 14 31 charcode | ||||
|                    || charcode == 127 | ||||
| 
 | ||||
| -- | obs-ctext       =   obs-NO-WS-CTL | ||||
| obs_ctext :: forall e. Parser e Char | ||||
| obs_ctext = obs_no_ws_ctl | ||||
| 
 | ||||
| -- | obs-qtext       =   obs-NO-WS-CTL | ||||
| obs_qtext :: forall e. Parser e Char | ||||
| obs_qtext = obs_no_ws_ctl | ||||
| 
 | ||||
| --obs-utext       =   %d0 / obs-NO-WS-CTL / VCHAR | ||||
| -- | ||||
| --obs-qp          =   "\" (%d0 / obs-NO-WS-CTL / LF / CR) | ||||
| -- | ||||
| --obs-body        =   *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF) | ||||
| -- | ||||
| --obs-unstruct    =   *((*LF *CR *(obs-utext *LF *CR)) / FWS) | ||||
| -- | ||||
| --obs-phrase      =   word *(word / "." / CFWS) | ||||
| -- | ||||
| --obs-phrase-list =   [phrase / CFWS] *("," [phrase / CFWS]) | ||||
| 
 | ||||
| -- | TODO: For now, `local_part` only checks that | ||||
| -- | (a) the first character is a letter, | ||||
| -- | (b) the last character is either a letter or a digit. | ||||
| -- | The rest can be any letter, digit, '-' or '.'. | ||||
| login_part :: Parser EmailError String | ||||
| login_part = do firstchar <- letter | ||||
| local_part :: Parser EmailError String | ||||
| local_part = do firstchar <- letter | ||||
|                 rest <- until end (letter <|> digit <|> char '-' <|> char '.') | ||||
|                 lastchar <- letter <|> digit | ||||
|                 pure $ CU.fromCharArray $ [firstchar] <> rest <> [lastchar] | ||||
|  | @ -36,7 +190,7 @@ login_part = do firstchar <- letter | |||
| 
 | ||||
| -- | `email` is the parser for email addresses. | ||||
| email :: Parser EmailError String | ||||
| email = do login <- login_part | ||||
| email = do login <- local_part | ||||
|            _ <- char '@' | ||||
|            input <- current_input | ||||
|            case parse sub_eof input of | ||||
|  |  | |||
|  | @ -47,6 +47,7 @@ ipv6_chunk'' :: Parser IPv6Error String | |||
| ipv6_chunk'' = do _ <- char ':' | ||||
|                   ipv6_chunk | ||||
| 
 | ||||
| -- | `ipv6_full''` parses a representation without shortcuts ("::"). | ||||
| ipv6_full :: Parser IPv6Error String | ||||
| ipv6_full = do chunks <- many1 ipv6_chunk' | ||||
|                pos <- current_position | ||||
|  |  | |||
|  | @ -125,6 +125,8 @@ main = do | |||
| 
 | ||||
|   log "" | ||||
|   test_ipv6 "2001:0" | ||||
|   test_ipv6 "2001::x:0" | ||||
|   test_ipv6 "2001:x::0" | ||||
|   test_ipv6 "2001::0" | ||||
|   test_ipv6 "2001::1:" | ||||
|   test_ipv6 "::" | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue