Compare commits

..

No commits in common. "068428680590026d8b2e0a56e9a41669cd645df8" and "d7f713839cd865c058d25240c66640baefe423c2" have entirely different histories.

7 changed files with 583 additions and 674 deletions

View file

@ -12,11 +12,8 @@ run:
t: t:
spago test spago test
docs-with-search:
spago docs
docs: docs:
spago docs --no-search spago docs
DOCS_HTTPD_ACCESS_LOGS ?= /tmp/parser-docs-access.log DOCS_HTTPD_ACCESS_LOGS ?= /tmp/parser-docs-access.log
DOCS_HTTPD_ADDR ?= 127.0.0.1 DOCS_HTTPD_ADDR ?= 127.0.0.1

View file

@ -2,13 +2,11 @@ module GenericParser
( module GenericParser.Parser ( module GenericParser.Parser
, module GenericParser.DomainParser.Common , module GenericParser.DomainParser.Common
, module GenericParser.DomainParser , module GenericParser.DomainParser
, module GenericParser.RFC5322
--, module GenericParser.RFC5234 --, module GenericParser.RFC5234
) where ) where
import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size) import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size)
import GenericParser.DomainParser (domain, label, subdomain, sub_eof) import GenericParser.DomainParser (domain, label, subdomain, sub_eof)
import GenericParser.Parser (char, current_input, current_position, Error, errorParser, error_functor, failure, failureError, Input, item, lookahead, many1, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, string, success, try, tryMaybe, until, Value) import GenericParser.Parser (char, current_input, current_position, Error, failure, failureError, Input, item, lookahead, many1, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, string, success, try, tryMaybe, until, Value)
import GenericParser.RFC5322 (obs_fws, fws, ctext, quoted_pair, ccontent, comment, cfws, address, mailbox, name_addr, angle_addr, group, display_name, mailbox_list, address_list, group_list, atext, atom, dot_atom_text, dot_atom, specials, addr_spec, local_part, {- domain ,-} domain_literal, dtext, obs_angle_addr, obs_route, obs_domain_list, obs_mbox_list, obs_addr_list, obs_group_list, obs_local_part, obs_domain, obs_dtext, obs_no_ws_ctl, obs_ctext, obs_qtext, obs_utext, obs_qp, obs_body, obs_unstruct, obs_phrase, word, phrase, unstructured, obs_phrase_list, qtext, qcontent, quoted_string)
-- import GenericParser.RFC5234
-- import GenericParser.SomeParsers (alphanum, eof, ident, identifier, int, integer, letter, lower, nat, natural, space, symbol, token, upper) -- import GenericParser.SomeParsers (alphanum, eof, ident, identifier, int, integer, letter, lower, nat, natural, space, symbol, token, upper)
-- import GenericParser.RFC5234 ()

View file

@ -1,34 +1,571 @@
-- | `EmailAddress` is a simplistic parser for email addresses. -- | `EmailAddress` is a parser for email addresses, implementing the grammar found in RFC5322.
-- | For a more serious parser, see the `RFC5322` module.
-- | -- |
-- | STATUS: the parser works for very simplistic email addresses. -- | STATUS: the parser mostly works, except for comments.
-- | This shouldn't be used in a serious environment. -- | Comments provoke a stack overflow, this must be investigated.
-- |
-- | Also, the parser needs a thorough review.
module GenericParser.EmailAddress where module GenericParser.EmailAddress where
import Prelude (bind, pure, ($), (<>)) import Prelude ((<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, unit)
import Control.Alt ((<|>)) import Control.Alt ((<|>))
import Control.Lazy (defer)
import Data.Array as A
import Data.Char as C
import Data.Either (Either(..)) import Data.Either (Either(..))
import Data.Maybe (Maybe(..)) import Data.Maybe (Maybe(..), maybe)
import Data.String.CodeUnits as CU import Data.String.CodeUnits as CU
import GenericParser.Parser (Parser(..), char, item, current_input, failureError, parse, rollback, until) import GenericParser.BaseFunctions (id)
import GenericParser.Parser (Parser(..)
, sat, char, char_num, char_range, string, item, many1, tryMaybe
, current_input, failureError, parse, rollback, until)
import GenericParser.DomainParser.Common (DomainError) import GenericParser.DomainParser.Common (DomainError)
import GenericParser.DomainParser (sub_eof) import GenericParser.DomainParser (sub_eof)
-- ABNF core rules. -- ABNF core rules.
import GenericParser.RFC5234 (digit) import GenericParser.RFC5234 (crlf, digit, wsp, vchar, lf, cr)
import GenericParser.SomeParsers (letter) import GenericParser.SomeParsers (letter, alphanum)
data EmailError data EmailError
= InvalidCharacter = InvalidCharacter
| InvalidDomain (Maybe DomainError) | InvalidDomain (Maybe DomainError)
-- | obs-FWS: obsolete folding white space.
-- |
-- | obs-FWS = `1*WSP *(CRLF 1*WSP)`
obs_fws :: forall e. Parser e String
obs_fws = do x <- A.many wsp
xs <- A.many $ do v <- crlf
w <- many1 wsp
pure $ v <> CU.fromCharArray w
pure $ A.fold $ [CU.fromCharArray x] <> xs
-- | FWS: folding white space. This can be described in plain english as:
-- | 1. an OPTIONAL line with potential white spaces followed by at least one white space
-- | 2. or, by the obs-FWS rule (meaning: many empty lines)
-- |
-- | FWS = `([*WSP CRLF] 1*WSP) / obs-FWS`
fws :: forall e. Parser e String
fws = do x <- tryMaybe do xs <- A.many wsp
v <- crlf
pure $ CU.fromCharArray xs <> v
w <- many1 wsp
let first_part = maybe "" id x
second_part = CU.fromCharArray w
pure $ first_part <> second_part
<|> obs_fws
-- | ctext: comment text, meaning printable US-ASCII characters excluding '(', ')' and '\'.
-- |
-- | ctext = `%d33-39 / %d42-91 / %d93-126 / obs-ctext`
ctext :: forall e. Parser e Char
ctext = sat cond <|> obs_ctext
where cond x = let charcode = C.toCharCode x
in between 33 39 charcode
|| between 42 91 charcode
|| between 93 126 charcode
-- | `quoted_pair`: pair of characters.
-- |
-- | quoted-pair = `("\" (VCHAR / WSP)) / obs-qp`
quoted_pair :: forall e. Parser e String
quoted_pair = do _ <- char '\\'
v <- vchar <|> wsp
pure $ "\\" <> CU.singleton v
<|> obs_qp
-- | Comment content.
-- |
-- | ccontent = `ctext / quoted-pair / comment`
ccontent :: forall e. Parser e String
ccontent = CU.singleton <$> ctext <|> quoted_pair <|> defer \_ -> comment
-- | Comment. Nothing to return since comments aren't to be processed.
-- |
-- | comment = `"(" *([FWS] ccontent) [FWS] ")"`
comment :: forall e. Parser e String
comment = do _ <- char '('
xs <- A.many do _ <- A.many fws
ccontent
_ <- char ')'
pure $ "(" <> A.fold xs <> ")"
-- | CFWS: comment folding white space.
-- |
-- | CFWS = `(1*([FWS] comment) [FWS]) / FWS`
cfws :: forall e. Parser e String
cfws = do xs <- many1 $ do _ <- tryMaybe fws
comment
pure $ A.fold xs
<|> fws
-- | `address`: email address.
-- |
-- | address = `mailbox / group`
address :: forall e. Parser e String
address = mailbox <|> group
-- | `mailbox`: mail address.
-- |
-- | mailbox = `name-addr / addr-spec`
mailbox :: forall e. Parser e String
mailbox = name_addr <|> addr_spec
-- | `name_addr`: address name.
-- |
-- | name-addr = `[display-name] angle-addr`
name_addr :: forall e. Parser e String
name_addr = do _ <- tryMaybe display_name
angle_addr
-- | `angle_addr` address specification between '<' and '>' characters.
-- |
-- | angle-addr = `[CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr`
angle_addr :: forall e. Parser e String
angle_addr = do _ <- tryMaybe cfws
_ <- char '<'
a <- addr_spec
_ <- char '>'
_ <- tryMaybe cfws
pure $ "<" <> a <> ">"
<|> obs_angle_addr
-- | `group`: a list of email addresses.
-- |
-- | group = `display-name ":" [group-list] ";" [CFWS]`
group :: forall e. Parser e String
group = do _ <- display_name
_ <- char ':'
xs <- tryMaybe group_list
_ <- char ';'
_ <- tryMaybe cfws
let res = ":" <> maybe "" id xs <> ";"
pure res
-- | `display_name`: displayed name, not the actual email address.
-- |
-- | display-name = `phrase`
display_name :: forall e. Parser e String
display_name = phrase
-- | `mailbox_list`
-- |
-- | mailbox-list = `(mailbox *("," mailbox)) / obs-mbox-list`
mailbox_list :: forall e. Parser e String
mailbox_list = do mb <- mailbox
xs <- A.many $ do _ <- char ','
mx <- mailbox
pure $ "," <> mx
pure $ mb <> A.fold xs
<|> obs_mbox_list
-- | `address_list`
-- |
-- | address-list = `(address *("," address)) / obs-addr-list`
address_list :: forall e. Parser e String
address_list = do a <- address
xs <- A.many do _ <- char ','
address
pure $ A.fold $ A.intersperse "," $ [a] <> xs
<|> obs_addr_list
-- | `group_list`
-- |
-- | group-list = `mailbox-list / CFWS / obs-group-list`
group_list :: forall e. Parser e String
group_list = mailbox_list <|> cfws <|> obs_group_list
-- | `atext`: atom accepted characters.
-- | Printable US-ASCII characters not including specials.
-- |
-- | atext = `ALPHA / DIGIT / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" /`
-- | `"-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}" / "~"`
atext :: forall e. Parser e Char
atext = alphanum
<|> char '!' <|> char '#'
<|> char '$' <|> char '%'
<|> char '&' <|> char '\''
<|> char '*' <|> char '+'
<|> char '-' <|> char '/'
<|> char '=' <|> char '?'
<|> char '^' <|> char '_'
<|> char '`' <|> char '{'
<|> char '|' <|> char '}'
<|> char '~'
-- | `atom`
-- |
-- | atom = `[CFWS] 1*atext [CFWS]`
atom :: forall e. Parser e String
atom = CU.fromCharArray <$> do _ <- tryMaybe cfws
a <- many1 atext
_ <- tryMaybe cfws
pure a
-- | `dot_atom_text`
-- |
-- | dot-atom-text = `1*atext *("." 1*atext)`
dot_atom_text :: forall e. Parser e String
dot_atom_text = do xs0 <- many1 atext
xs1 <- A.many $ do _ <- char '.'
xs <- many1 atext
pure $ "." <> CU.fromCharArray xs
let str0 = CU.fromCharArray xs0
str1 = A.fold xs1
pure $ str0 <> str1
-- | `dot_atom`
-- |
-- | dot-atom = `[CFWS] dot-atom-text [CFWS]`
dot_atom :: forall e. Parser e String
dot_atom = do _ <- tryMaybe cfws
x <- dot_atom_text
_ <- tryMaybe cfws
pure x
-- | `specials`: special characters that do not appear in `atext`.
-- |
-- | specials = `"(" / ")" / "<" / ">" / "[" / "]" / ":" / ";" / "@" /`
-- | `"\" / "," / "." / DQUOTE`
specials :: forall e. Parser e Char
specials = char '('
<|> char ')'
<|> char '<'
<|> char '>'
<|> char '['
<|> char ']'
<|> char ':'
<|> char ';'
<|> char '@'
<|> char '\\'
<|> char ','
<|> char '.'
<|> char '"'
-- | `addr_spec`
-- |
-- | addr-spec = `local-part "@" domain`
addr_spec :: forall e. Parser e String
addr_spec = do lpart <- local_part
_ <- char '@'
dom <- domain
pure $ lpart <> "@" <> dom
-- | `local_part`
-- |
-- | local-part = `dot-atom / quoted-string / obs-local-part`
local_part :: forall e. Parser e String
local_part = dot_atom <|> quoted_string <|> obs_local_part
-- | `domain`: this is a parser for a domain.
-- | The parser from `GenericParser.DomainParser` cannot be used, these parsers are different.
-- | `DomainParser` is more strict, it doesn't allow comments or IP addresses for example.
-- | Therefore, errors are differents from a parser to another.
-- |
-- | domain = `dot-atom / domain-literal / obs-domain`
domain :: forall e. Parser e String
domain = dot_atom <|> domain_literal <|> obs_domain
-- | `domain_literal`
-- |
-- | domain-literal = `[CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]`
domain_literal :: forall e. Parser e String
domain_literal = do s <- tryMaybe cfws
_ <- char '['
xs <- A.many do _ <- tryMaybe fws
dtext
m <- tryMaybe fws
_ <- char ']'
e <- tryMaybe cfws
pure $ maybe "" id s
<> "[" <> A.fold xs <> maybe "" id m <> "]"
<> maybe "" id e
-- | dtext: characters in domains.
-- | Printable US-ASCII characters not including "[", "]", or "\".
-- |
-- | dtext = `%d33-90 / %d94-126 / obs-dtext`
dtext :: forall e. Parser e String
dtext = CU.singleton <$> sat cond <|> obs_dtext
where cond x = let charcode = C.toCharCode x
in between 33 90 charcode || between 94 126 charcode
-- | `obs_angle_addr`: obsolete address specification between '<' and '>' characters.
-- |
-- | obs-angle-addr = `[CFWS] "<" obs-route addr-spec ">" [CFWS]`
obs_angle_addr :: forall e. Parser e String
obs_angle_addr = do s <- tryMaybe cfws
_ <- char '<'
r <- obs_route
a <- addr_spec
_ <- char '>'
e <- tryMaybe cfws
pure $ maybe "" id s <> "<" <> r <> a <> ">" <> maybe "" id e
-- | `obs_route`
-- |
-- | obs-route = `obs-domain-list ":"`
obs_route :: forall e. Parser e String
obs_route = do l <- obs_domain_list
_ <- char ':'
pure $ l <> ":"
-- | `obs_domain_list`
-- |
-- | obs-domain-list = `*(CFWS / ",") "@" domain *("," [CFWS] ["@" domain])`
obs_domain_list :: forall e. Parser e String
obs_domain_list = do s <- A.many $ cfws <|> CU.singleton <$> char ','
_ <- char '@'
d <- domain
xs <- A.many do _ <- char ','
c <- tryMaybe cfws
_ <- char '@'
d2 <- domain
pure $ "," <> maybe "" id c <> d2
pure $ A.fold s <> d <> A.fold xs
-- | `obs_mbox_list`
-- |
-- | obs-mbox-list = `*([CFWS] ",") mailbox *("," [mailbox / CFWS])`
obs_mbox_list :: forall e. Parser e String
obs_mbox_list = do _ <- A.many $ do _ <- cfws
_ <- char ','
pure unit
mb <- mailbox
xs <- A.many $ do _ <- char ','
x <- mailbox <|> do _ <- cfws
pure ""
pure x
pure $ mb <> A.fold xs
-- | `obs_addr_list`
-- |
-- | obs-addr-list = `*([CFWS] ",") address *("," [address / CFWS])`
obs_addr_list :: forall e. Parser e String
obs_addr_list = do _ <- A.many do _ <- tryMaybe cfws
char ','
a <- address
xs <- A.many do _ <- char ','
address <|> do _ <- cfws
pure ""
let res = A.fold $ A.intersperse "," $ [a] <> xs
pure res
-- | `obs_group_list`
-- |
-- | obs-group-list = `1*([CFWS] ",") [CFWS]`
obs_group_list :: forall e. Parser e String
obs_group_list = do xs <- many1 $ do x <- tryMaybe cfws
_ <- char ','
pure $ maybe "" id x <> ","
c <- tryMaybe cfws
pure $ A.fold xs <> maybe "" id c
-- | `obs_local_part`
-- |
-- | obs-local-part = `word *("." word)`
obs_local_part :: forall e. Parser e String
obs_local_part = do w <- word
ws <- A.many $ do _ <- char '.'
w1 <- word
pure $ "." <> w1
pure $ w <> A.fold ws
-- | `obs_domain`
-- |
-- | obs-domain = `atom *("." atom)`
obs_domain :: forall e. Parser e String
obs_domain = do a <- atom
xs <- A.many $ do _ <- char '.'
x <- atom
pure $ "." <> x
pure $ a <> A.fold xs
-- | `obs_dtext`: obsolete domain text.
-- |
-- | obs-dtext = `obs-NO-WS-CTL / quoted-pair`
obs_dtext :: forall e. Parser e String
obs_dtext = CU.singleton <$> obs_no_ws_ctl <|> quoted_pair
-- | obs-NO-WS-CTL: US-ASCII control characters without carriage return,
-- | line feed and white space characters.
-- |
-- | obs-NO-WS-CTL = `%d1-8 / %d11 / %d12 / %d14-31 / %d127`
obs_no_ws_ctl :: forall e. Parser e Char
obs_no_ws_ctl = sat cond
where cond x = let charcode = C.toCharCode x
in between 1 8 charcode
|| between 11 12 charcode
|| between 14 31 charcode
|| charcode == 127
-- | obs-ctext: obsolete comment text.
-- |
-- | obs-ctext = `obs-NO-WS-CTL`
obs_ctext :: forall e. Parser e Char
obs_ctext = obs_no_ws_ctl
-- | `obs_qtext`: obsolete accepted quoted text.
-- |
-- | obs-qtext = `obs-NO-WS-CTL`
obs_qtext :: forall e. Parser e Char
obs_qtext = obs_no_ws_ctl
-- | `obs_utext`: obsolete text.
-- |
-- | obs-utext = `%d0 / obs-NO-WS-CTL / VCHAR`
obs_utext :: forall e. Parser e Char
obs_utext = char_num 0 <|> obs_no_ws_ctl <|> vchar
-- | `obs_qp`: obsolete quoted-pair rule.
-- |
-- | obs-qp = `"\" (%d0 / obs-NO-WS-CTL / LF / CR)`
obs_qp :: forall e. Parser e String
obs_qp = do _ <- char '\\'
v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr
pure $ "\\" <> CU.singleton v
-- | `obs_body`: obsolete body.
-- |
-- | Note: the simpler version found in the errata is implemented, which basically accept everything.
-- |
-- | Note: `text` is replaced by `vchar`.
-- |
-- | (RFC)
-- | obs-body = `*((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)`
-- |
-- | (RFC Errata v1)
-- | obs-body = `*(%d0-127)`
-- |
-- | (RFC Errata v2)
-- | obs-body = `*(d0 /text / LF / CR)`
-- Errata v1
--obs_body :: forall e. Parser e String
--obs_body = CU.fromCharArray <$> A.many item
-- Errata v2
obs_body :: forall e. Parser e String
obs_body = CU.fromCharArray <$> do A.many (char_num 0 <|> vchar <|> lf <|> cr)
--obs_body original
--obs_body :: forall e. Parser e String
--obs_body = do A.many $ do _ <- A.many lf
-- _ <- A.many cr
-- v <- A.many $ do x <- char_num 0 <|> text
-- _ <- A.many lf
-- _ <- A.many cr
-- pure x
-- pure $ A.fold v
-- <|> _ <- crlf
-- pure ""
-- | `obs_unstruct`
-- |
-- | Note: implement the version found in the Errata page.
-- |
-- | obs-unstruct = `*((*LF *CR *(obs-utext *LF *CR)) / FWS)`
-- |
-- | (RFC Errata)
-- | obs-unstruct = `*( (*CR 1*(obs-utext / FWS)) / 1*LF ) *CR`
obs_unstruct :: forall e. Parser e String
obs_unstruct = (CU.fromCharArray <<< A.fold) <$> A.many do _ <- A.many cr
many1 (obs_utext <|> do _ <- fws
pure ' ')
<|> do _ <- fws
pure ""
-- | `obs_phrase`: obsolete "phrase".
-- |
-- | obs-phrase = `word *(word / "." / CFWS)`
obs_phrase :: forall e. Parser e String
obs_phrase = do w <- word
ws <- A.many (word <|> string "." <|> do _ <- cfws
pure "")
pure $ w <> A.fold ws
-- | `word`.
-- |
-- | word = `atom / quoted-string`
word :: forall e. Parser e String
word = atom <|> quoted_string
-- | `phrase`: list of words (at least one) or the obsolete phrase rule.
-- |
-- | phrase = `1*word / obs-phrase`
phrase :: forall e. Parser e String
phrase = do ws <- many1 word
pure $ A.fold ws
<|> obs_phrase
-- | `unstructured`
-- |
-- | unstructured = `(*([FWS] VCHAR) *WSP) / obs-unstruct`
unstructured :: forall e. Parser e String
unstructured = do v <- A.many $ do _ <- fws
vchar
_ <- A.many wsp
pure $ CU.fromCharArray v
<|> obs_unstruct
-- | `obs_phrase_list`: obsolete list of phrases.
-- |
-- | obs-phrase-list = `[phrase / CFWS] *("," [phrase / CFWS])`
obs_phrase_list :: forall e. Parser e String
obs_phrase_list = do first_phrase <- phrase_or_cfws
xs <- A.many $ do _ <- char ','
phrase_or_cfws
pure $ A.fold $ [first_phrase] <> xs
where phrase_or_cfws = do first_phrase <- tryMaybe phrase
case first_phrase of
Nothing -> do _ <- cfws
pure ""
Just x -> pure x
-- | `qtext`: printable US-ASCII characters not including "\" or the quote character.
-- |
-- | WARNING: contrary to the RFC, the `qtext` rule accepts a space character (`%d32`).
-- |
-- | qtext = `%d33 / %d35-91 / %d93-126 / obs-qtext`
qtext :: forall e. Parser e Char
qtext = char_range 32 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext
-- | `qcontent`
-- |
-- | qcontent = `qtext / quoted-pair`
qcontent :: forall e. Parser e String
qcontent = CU.singleton <$> qtext <|> quoted_pair
-- | `quoted_string`
-- |
-- | WARNING: this rule was changed in order to take into account the new `qtext` rule,
-- | which now accepts the space character. This new rule, as implemented, still allows
-- | for multiple line returns in the quoted string.
-- |
-- |
-- | Original RFC5322 rule:
-- | quoted-string = `[CFWS] DQUOTE *([FWS] qcontent) [FWS] DQUOTE [CFWS]`
-- |
-- | Implemented rule:
-- | quoted-string = `[CFWS] DQUOTE *([CRLF] qcontent) [FWS] DQUOTE [CFWS]`
quoted_string :: forall e. Parser e String
quoted_string = do s <- tryMaybe cfws
_ <- char '"'
m <- A.many do l <- tryMaybe crlf
c <- qcontent
pure $ maybe "" id l <> c
_ <- char '"'
e <- tryMaybe cfws
pure $ maybe "" id s <> "\"" <> A.fold m <> "\"" <> maybe "" id e
-- | TODO: For now, `local_part` only checks that -- | TODO: For now, `local_part` only checks that
-- | (a) the first character is a letter, -- | (a) the first character is a letter,
-- | (b) the last character is either a letter or a digit. -- | (b) the last character is either a letter or a digit.
-- | The rest can be any letter, digit, '-' or '.'. -- | The rest can be any letter, digit, '-' or '.'.
local_part :: Parser EmailError String local_part' :: Parser EmailError String
local_part = do firstchar <- letter local_part' = do firstchar <- letter
rest <- until end (letter <|> digit <|> char '-' <|> char '.') rest <- until end (letter <|> digit <|> char '-' <|> char '.')
lastchar <- letter <|> digit lastchar <- letter <|> digit
pure $ CU.fromCharArray $ [firstchar] <> rest <> [lastchar] pure $ CU.fromCharArray $ [firstchar] <> rest <> [lastchar]
@ -40,7 +577,7 @@ local_part = do firstchar <- letter
-- | `email` is the parser for email addresses. -- | `email` is the parser for email addresses.
email :: Parser EmailError String email :: Parser EmailError String
email = do login <- local_part email = do login <- local_part'
_ <- char '@' _ <- char '@'
input <- current_input input <- current_input
case parse sub_eof input of case parse sub_eof input of

View file

@ -1,7 +1,7 @@
-- | `IPAddress` is a parser for internet protocol addresses (both IPv4 and IPv6). -- | `IPAddress` is a parser for internet protocol addresses (both IPv4 and IPv6).
module GenericParser.IPAddress where module GenericParser.IPAddress where
import Prelude (Ordering(..), compare, (==), (<>), (<), (+), (-), between, bind, pure, ($), (<<<), (>), show, map, unit) import Prelude (Ordering(..), compare, (==), (<>), (<), (+), (-), bind, pure, ($), (<<<), (>), show, map, unit)
import Control.Alt ((<|>)) import Control.Alt ((<|>))
import Data.Array as A import Data.Array as A
@ -12,26 +12,26 @@ import GenericParser.Parser (Parser(..)
, failureError , failureError
, current_position , current_position
, string , string
, read_input, many1, lookahead , many1, lookahead
, char) , char)
import GenericParser.BaseFunctions (repeat) import GenericParser.BaseFunctions (repeat)
import GenericParser.SomeParsers (nat) import GenericParser.SomeParsers (nat)
import GenericParser.RFC5234 (hexdig) import GenericParser.RFC5234 (hexdig)
data IPv6Error data IPv6Error
= IP6TooManyHexaDecimalCharacters = InvalidCharacter
| IP6NotEnoughChunks | TooManyHexaDecimalCharacters
| IP6TooManyChunks | NotEnoughChunks
| IP6IrrelevantShortRepresentation | TooManyChunks
| IP6InvalidRange | IPv6UnrelevantShortRepresentation
-- | `ipv6_chunk` parses just a group of hexadecimal characters. -- | `ipv6_chunk` parses just a group of hexadecimal characters.
-- | Return an error (IP6TooManyHexaDecimalCharacters) in case the group has more than 4 characters. -- | Return an error (TooManyHexaDecimalCharacters) in case the group has more than 4 characters.
ipv6_chunk :: Parser IPv6Error String ipv6_chunk :: Parser IPv6Error String
ipv6_chunk = do pos <- current_position ipv6_chunk = do pos <- current_position
hexachars <- many1 hexdig hexachars <- many1 hexdig
if A.length hexachars > 4 if A.length hexachars > 4
then Parser \_ -> failureError pos (Just IP6TooManyHexaDecimalCharacters) then Parser \_ -> failureError pos (Just TooManyHexaDecimalCharacters)
else pure $ CU.fromCharArray hexachars else pure $ CU.fromCharArray hexachars
-- | `ipv6_chunk'` is `ipv6_chunk` with a following ':' character. -- | `ipv6_chunk'` is `ipv6_chunk` with a following ':' character.
@ -52,9 +52,9 @@ ipv6_full = do chunks <- many1 ipv6_chunk'
pos <- current_position pos <- current_position
lastchunk <- ipv6_chunk lastchunk <- ipv6_chunk
case compare (A.length chunks) 7 of case compare (A.length chunks) 7 of
LT -> Parser \_ -> failureError pos (Just IP6NotEnoughChunks) LT -> Parser \_ -> failureError pos (Just NotEnoughChunks)
EQ -> pure $ A.fold (A.intersperse ":" (chunks <> [lastchunk])) EQ -> pure $ A.fold (A.intersperse ":" (chunks <> [lastchunk]))
GT -> Parser \_ -> failureError pos (Just IP6TooManyChunks) GT -> Parser \_ -> failureError pos (Just TooManyChunks)
-- | `ipv6_shortened` parses a shortened representation of an IPv6 address. -- | `ipv6_shortened` parses a shortened representation of an IPv6 address.
ipv6_shortened :: Parser IPv6Error String ipv6_shortened :: Parser IPv6Error String
@ -73,36 +73,23 @@ ipv6_shortened =
let nb_zero_filling = 8 - (A.length chunks_part1 + A.length chunks_part2) let nb_zero_filling = 8 - (A.length chunks_part1 + A.length chunks_part2)
filling = repeat nb_zero_filling "0000" filling = repeat nb_zero_filling "0000"
if nb_zero_filling < 1 if nb_zero_filling < 1
then Parser \_ -> failureError pos (Just IP6IrrelevantShortRepresentation) then Parser \_ -> failureError pos (Just IPv6UnrelevantShortRepresentation)
else pure $ A.fold (A.intersperse ":" $ A.concat [chunks_part1, filling, chunks_part2]) else pure $ A.fold (A.intersperse ":" $ A.concat [chunks_part1, filling, chunks_part2])
-- | TODO: accept IPv6 addresses between brackets ([ipv6]). -- | TODO: accept IPv6 addresses between brackets ([ipv6]).
ipv6 :: Parser IPv6Error String ipv6 :: Parser IPv6Error String
ipv6 = ipv6_shortened <|> ipv6_full ipv6 = ipv6_shortened <|> ipv6_full
-- | `ipv6_range` parses an ipv6 range, such as "2001::1/56".
-- | If the parsing succeed, the whole string is returned.
ipv6_range :: Parser IPv6Error String
ipv6_range =
read_input do _ <- ipv6
_ <- char '/'
pos <- current_position
n <- nat
if between 0 128 n
then pure ""
else Parser \_ -> failureError pos (Just IP6InvalidRange)
data IPv4Error data IPv4Error
= IP4NumberTooBig Int = NumberTooBig Int
| IP4IrrelevantShortRepresentation | IPv4UnrelevantShortRepresentation
| IP4InvalidRange
-- | `ipv4_byte` a parser for 0 to 255 natural integers, which is part of the representation of an IPv4 address. -- | `ipv4_byte` a parser for 0 to 255 natural integers, which is part of the representation of an IPv4 address.
ipv4_byte :: Parser IPv4Error Int ipv4_byte :: Parser IPv4Error Int
ipv4_byte = do pos <- current_position ipv4_byte = do pos <- current_position
number <- nat number <- nat
if number > 255 if number > 255
then Parser \_ -> failureError pos ((Just <<< IP4NumberTooBig) number) then Parser \_ -> failureError pos ((Just <<< NumberTooBig) number)
else pure number else pure number
-- | `ipv4_byte'` is `ipv4_byte` with a leading '.'. -- | `ipv4_byte'` is `ipv4_byte` with a leading '.'.
@ -136,20 +123,8 @@ ipv4_shortened =
nb_zero_filling = 4 - (A.length chunks_part1 + A.length chunks_part2) nb_zero_filling = 4 - (A.length chunks_part1 + A.length chunks_part2)
filling = A.fold (A.intersperse "." $ repeat nb_zero_filling "0") filling = A.fold (A.intersperse "." $ repeat nb_zero_filling "0")
if nb_zero_filling < 1 if nb_zero_filling < 1
then Parser \_ -> failureError pos (Just IP4IrrelevantShortRepresentation) then Parser \_ -> failureError pos (Just IPv4UnrelevantShortRepresentation)
else pure $ A.fold (A.intersperse "." [part1, filling, part2]) else pure $ A.fold (A.intersperse "." [part1, filling, part2])
ipv4 :: Parser IPv4Error String ipv4 :: Parser IPv4Error String
ipv4 = ipv4_shortened <|> ipv4_generic4bytes ipv4 = ipv4_shortened <|> ipv4_generic4bytes
-- | `ipv4_range` parses an ipv4 range, such as "192.0.2.0/24".
-- | If the parsing succeed, the whole string is returned.
ipv4_range :: Parser IPv4Error String
ipv4_range =
read_input do _ <- ipv4
_ <- char '/'
pos <- current_position
n <- nat
if between 0 32 n
then pure ""
else Parser \_ -> failureError pos (Just IP4InvalidRange)

View file

@ -223,37 +223,3 @@ read_input p = do input <- current_input
_ <- p _ <- p
endpos <- current_position endpos <- current_position
pure $ CU.take (endpos - input.position) input.string pure $ CU.take (endpos - input.position) input.string
-- | `errorParser` provides a parser that keeps the current input position but fails
-- | and gives the argument as the error.
errorParser :: forall e v. Maybe e -> Parser e v
errorParser e = Parser \input -> failureError input.position e
-- | `error_functor` or `(<:>)` is an `error functor`, allowing to encapsulate
-- | an error in another type.
-- |
-- |```
-- |data LoginParsingError -- Login parsing errors.
-- | = CannotParse
-- | | CannotEntirelyParse
-- | | Size Int Int Int
-- |
-- |data Error -- Login validation errors.
-- | = ParsingError (G.Error LoginParsingError)
-- |
-- |login_parser :: Parser LoginParsingError String
-- |login_parser = do
-- | input <- current_input
-- | _ <- many1 (alpha <|> digit) <:> \_ -> CannotParse
-- | _ <- SomeParsers.eof <:> \_ -> CannotEntirelyParse
-- | pos <- current_position
-- | if between min_login_size max_login_size pos
-- | then pure input.string
-- | else errorParser (Just $ Size min_login_size max_login_size pos)
-- |```
error_functor :: forall e v f. Parser e v -> (e -> f) -> Parser f v
error_functor (Parser p) f = Parser \i -> case p i of
Left e -> failureError e.position $ maybe Nothing (Just <<< f) e.error
Right v -> Right v
infixl 8 error_functor as <:>

View file

@ -1,554 +0,0 @@
-- | This module implements the email address grammar found in RFC5322.
-- |
-- | STATUS: the parser works except for comments.
-- | Comments provoke a stack overflow, this must be investigated.
-- |
-- | Also, the parser needs a thorough review.
module GenericParser.RFC5322 where
import Prelude ((<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, unit)
import Control.Alt ((<|>))
import Control.Lazy (defer)
import Data.Array as A
import Data.Char as C
import Data.Maybe (Maybe(..), maybe)
import Data.String.CodeUnits as CU
import GenericParser.BaseFunctions (id)
import GenericParser.Parser (Parser, sat, char, char_num, char_range, string, many1, tryMaybe)
-- ABNF core rules.
import GenericParser.RFC5234 (crlf, wsp, vchar, lf, cr)
import GenericParser.SomeParsers (alphanum)
-- | obs-FWS: obsolete folding white space.
-- |
-- | obs-FWS = `1*WSP *(CRLF 1*WSP)`
obs_fws :: forall e. Parser e String
obs_fws = do x <- A.many wsp
xs <- A.many $ do v <- crlf
w <- many1 wsp
pure $ v <> CU.fromCharArray w
pure $ A.fold $ [CU.fromCharArray x] <> xs
-- | FWS: folding white space. This can be described in plain english as:
-- | 1. an OPTIONAL line with potential white spaces followed by at least one white space
-- | 2. or, by the obs-FWS rule (meaning: many empty lines)
-- |
-- | FWS = `([*WSP CRLF] 1*WSP) / obs-FWS`
fws :: forall e. Parser e String
fws = do x <- tryMaybe do xs <- A.many wsp
v <- crlf
pure $ CU.fromCharArray xs <> v
w <- many1 wsp
let first_part = maybe "" id x
second_part = CU.fromCharArray w
pure $ first_part <> second_part
<|> obs_fws
-- | ctext: comment text, meaning printable US-ASCII characters excluding '(', ')' and '\'.
-- |
-- | ctext = `%d33-39 / %d42-91 / %d93-126 / obs-ctext`
ctext :: forall e. Parser e Char
ctext = sat cond <|> obs_ctext
where cond x = let charcode = C.toCharCode x
in between 33 39 charcode
|| between 42 91 charcode
|| between 93 126 charcode
-- | `quoted_pair`: pair of characters.
-- |
-- | quoted-pair = `("\" (VCHAR / WSP)) / obs-qp`
quoted_pair :: forall e. Parser e String
quoted_pair = do _ <- char '\\'
v <- vchar <|> wsp
pure $ "\\" <> CU.singleton v
<|> obs_qp
-- | Comment content.
-- |
-- | ccontent = `ctext / quoted-pair / comment`
ccontent :: forall e. Parser e String
ccontent = CU.singleton <$> ctext <|> quoted_pair <|> defer \_ -> comment
-- | Comment. Nothing to return since comments aren't to be processed.
-- |
-- | comment = `"(" *([FWS] ccontent) [FWS] ")"`
comment :: forall e. Parser e String
comment = do _ <- char '('
xs <- A.many do _ <- A.many fws
ccontent
_ <- char ')'
pure $ "(" <> A.fold xs <> ")"
-- | CFWS: comment folding white space.
-- |
-- | CFWS = `(1*([FWS] comment) [FWS]) / FWS`
cfws :: forall e. Parser e String
cfws = do xs <- many1 $ do _ <- tryMaybe fws
comment
pure $ A.fold xs
<|> fws
-- | `address`: email address.
-- |
-- | address = `mailbox / group`
address :: forall e. Parser e String
address = mailbox <|> group
-- | `mailbox`: mail address.
-- |
-- | mailbox = `name-addr / addr-spec`
mailbox :: forall e. Parser e String
mailbox = name_addr <|> addr_spec
-- | `name_addr`: address name.
-- |
-- | name-addr = `[display-name] angle-addr`
name_addr :: forall e. Parser e String
name_addr = do _ <- tryMaybe display_name
angle_addr
-- | `angle_addr` address specification between '<' and '>' characters.
-- |
-- | angle-addr = `[CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr`
angle_addr :: forall e. Parser e String
angle_addr = do _ <- tryMaybe cfws
_ <- char '<'
a <- addr_spec
_ <- char '>'
_ <- tryMaybe cfws
pure $ "<" <> a <> ">"
<|> obs_angle_addr
-- | `group`: a list of email addresses.
-- |
-- | group = `display-name ":" [group-list] ";" [CFWS]`
group :: forall e. Parser e String
group = do _ <- display_name
_ <- char ':'
xs <- tryMaybe group_list
_ <- char ';'
_ <- tryMaybe cfws
let res = ":" <> maybe "" id xs <> ";"
pure res
-- | `display_name`: displayed name, not the actual email address.
-- |
-- | display-name = `phrase`
display_name :: forall e. Parser e String
display_name = phrase
-- | `mailbox_list`
-- |
-- | mailbox-list = `(mailbox *("," mailbox)) / obs-mbox-list`
mailbox_list :: forall e. Parser e String
mailbox_list = do mb <- mailbox
xs <- A.many $ do _ <- char ','
mx <- mailbox
pure $ "," <> mx
pure $ mb <> A.fold xs
<|> obs_mbox_list
-- | `address_list`
-- |
-- | address-list = `(address *("," address)) / obs-addr-list`
address_list :: forall e. Parser e String
address_list = do a <- address
xs <- A.many do _ <- char ','
address
pure $ A.fold $ A.intersperse "," $ [a] <> xs
<|> obs_addr_list
-- | `group_list`
-- |
-- | group-list = `mailbox-list / CFWS / obs-group-list`
group_list :: forall e. Parser e String
group_list = mailbox_list <|> cfws <|> obs_group_list
-- | `atext`: atom accepted characters.
-- | Printable US-ASCII characters not including specials.
-- |
-- | atext = `ALPHA / DIGIT / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" /`
-- | `"-" / "/" / "=" / "?" / "^" / "_" / backquote / "{" / "|" / "}" / "~"`
atext :: forall e. Parser e Char
atext = alphanum
<|> char '!' <|> char '#'
<|> char '$' <|> char '%'
<|> char '&' <|> char '\''
<|> char '*' <|> char '+'
<|> char '-' <|> char '/'
<|> char '=' <|> char '?'
<|> char '^' <|> char '_'
<|> char '`' <|> char '{'
<|> char '|' <|> char '}'
<|> char '~'
-- | `atom`
-- |
-- | atom = `[CFWS] 1*atext [CFWS]`
atom :: forall e. Parser e String
atom = CU.fromCharArray <$> do _ <- tryMaybe cfws
a <- many1 atext
_ <- tryMaybe cfws
pure a
-- | `dot_atom_text`
-- |
-- | dot-atom-text = `1*atext *("." 1*atext)`
dot_atom_text :: forall e. Parser e String
dot_atom_text = do xs0 <- many1 atext
xs1 <- A.many $ do _ <- char '.'
xs <- many1 atext
pure $ "." <> CU.fromCharArray xs
let str0 = CU.fromCharArray xs0
str1 = A.fold xs1
pure $ str0 <> str1
-- | `dot_atom`
-- |
-- | dot-atom = `[CFWS] dot-atom-text [CFWS]`
dot_atom :: forall e. Parser e String
dot_atom = do _ <- tryMaybe cfws
x <- dot_atom_text
_ <- tryMaybe cfws
pure x
-- | `specials`: special characters that do not appear in `atext`.
-- |
-- | specials = `"(" / ")" / "<" / ">" / "[" / "]" / ":" / ";" / "@" /`
-- | `"\" / "," / "." / DQUOTE`
specials :: forall e. Parser e Char
specials = char '('
<|> char ')'
<|> char '<'
<|> char '>'
<|> char '['
<|> char ']'
<|> char ':'
<|> char ';'
<|> char '@'
<|> char '\\'
<|> char ','
<|> char '.'
<|> char '"'
-- | `addr_spec`
-- |
-- | addr-spec = `local-part "@" domain`
addr_spec :: forall e. Parser e String
addr_spec = do lpart <- local_part
_ <- char '@'
dom <- domain
pure $ lpart <> "@" <> dom
-- | `local_part`
-- |
-- | local-part = `dot-atom / quoted-string / obs-local-part`
local_part :: forall e. Parser e String
local_part = dot_atom <|> quoted_string <|> obs_local_part
-- | `domain`: this is a parser for a domain.
-- | The parser from `GenericParser.DomainParser` cannot be used, these parsers are different.
-- | `DomainParser` is more strict, it doesn't allow comments or IP addresses for example.
-- | Therefore, errors are differents from a parser to another.
-- |
-- | domain = `dot-atom / domain-literal / obs-domain`
domain :: forall e. Parser e String
domain = dot_atom <|> domain_literal <|> obs_domain
-- | `domain_literal`
-- |
-- | domain-literal = `[CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]`
domain_literal :: forall e. Parser e String
domain_literal = do s <- tryMaybe cfws
_ <- char '['
xs <- A.many do _ <- tryMaybe fws
dtext
m <- tryMaybe fws
_ <- char ']'
e <- tryMaybe cfws
pure $ maybe "" id s
<> "[" <> A.fold xs <> maybe "" id m <> "]"
<> maybe "" id e
-- | dtext: characters in domains.
-- | Printable US-ASCII characters not including "[", "]", or "\".
-- |
-- | dtext = `%d33-90 / %d94-126 / obs-dtext`
dtext :: forall e. Parser e String
dtext = CU.singleton <$> sat cond <|> obs_dtext
where cond x = let charcode = C.toCharCode x
in between 33 90 charcode || between 94 126 charcode
-- | `obs_angle_addr`: obsolete address specification between '<' and '>' characters.
-- |
-- | obs-angle-addr = `[CFWS] "<" obs-route addr-spec ">" [CFWS]`
obs_angle_addr :: forall e. Parser e String
obs_angle_addr = do s <- tryMaybe cfws
_ <- char '<'
r <- obs_route
a <- addr_spec
_ <- char '>'
e <- tryMaybe cfws
pure $ maybe "" id s <> "<" <> r <> a <> ">" <> maybe "" id e
-- | `obs_route`
-- |
-- | obs-route = `obs-domain-list ":"`
obs_route :: forall e. Parser e String
obs_route = do l <- obs_domain_list
_ <- char ':'
pure $ l <> ":"
-- | `obs_domain_list`
-- |
-- | obs-domain-list = `*(CFWS / ",") "@" domain *("," [CFWS] ["@" domain])`
obs_domain_list :: forall e. Parser e String
obs_domain_list = do s <- A.many $ cfws <|> CU.singleton <$> char ','
_ <- char '@'
d <- domain
xs <- A.many do _ <- char ','
c <- tryMaybe cfws
_ <- char '@'
d2 <- domain
pure $ "," <> maybe "" id c <> d2
pure $ A.fold s <> d <> A.fold xs
-- | `obs_mbox_list`
-- |
-- | obs-mbox-list = `*([CFWS] ",") mailbox *("," [mailbox / CFWS])`
obs_mbox_list :: forall e. Parser e String
obs_mbox_list = do _ <- A.many $ do _ <- cfws
_ <- char ','
pure unit
mb <- mailbox
xs <- A.many $ do _ <- char ','
x <- mailbox <|> do _ <- cfws
pure ""
pure x
pure $ mb <> A.fold xs
-- | `obs_addr_list`
-- |
-- | obs-addr-list = `*([CFWS] ",") address *("," [address / CFWS])`
obs_addr_list :: forall e. Parser e String
obs_addr_list = do _ <- A.many do _ <- tryMaybe cfws
char ','
a <- address
xs <- A.many do _ <- char ','
address <|> do _ <- cfws
pure ""
let res = A.fold $ A.intersperse "," $ [a] <> xs
pure res
-- | `obs_group_list`
-- |
-- | obs-group-list = `1*([CFWS] ",") [CFWS]`
obs_group_list :: forall e. Parser e String
obs_group_list = do xs <- many1 $ do x <- tryMaybe cfws
_ <- char ','
pure $ maybe "" id x <> ","
c <- tryMaybe cfws
pure $ A.fold xs <> maybe "" id c
-- | `obs_local_part`
-- |
-- | obs-local-part = `word *("." word)`
obs_local_part :: forall e. Parser e String
obs_local_part = do w <- word
ws <- A.many $ do _ <- char '.'
w1 <- word
pure $ "." <> w1
pure $ w <> A.fold ws
-- | `obs_domain`
-- |
-- | obs-domain = `atom *("." atom)`
obs_domain :: forall e. Parser e String
obs_domain = do a <- atom
xs <- A.many $ do _ <- char '.'
x <- atom
pure $ "." <> x
pure $ a <> A.fold xs
-- | `obs_dtext`: obsolete domain text.
-- |
-- | obs-dtext = `obs-NO-WS-CTL / quoted-pair`
obs_dtext :: forall e. Parser e String
obs_dtext = CU.singleton <$> obs_no_ws_ctl <|> quoted_pair
-- | obs-NO-WS-CTL: US-ASCII control characters without carriage return,
-- | line feed and white space characters.
-- |
-- | obs-NO-WS-CTL = `%d1-8 / %d11 / %d12 / %d14-31 / %d127`
obs_no_ws_ctl :: forall e. Parser e Char
obs_no_ws_ctl = sat cond
where cond x = let charcode = C.toCharCode x
in between 1 8 charcode
|| between 11 12 charcode
|| between 14 31 charcode
|| charcode == 127
-- | obs-ctext: obsolete comment text.
-- |
-- | obs-ctext = `obs-NO-WS-CTL`
obs_ctext :: forall e. Parser e Char
obs_ctext = obs_no_ws_ctl
-- | `obs_qtext`: obsolete accepted quoted text.
-- |
-- | obs-qtext = `obs-NO-WS-CTL`
obs_qtext :: forall e. Parser e Char
obs_qtext = obs_no_ws_ctl
-- | `obs_utext`: obsolete text.
-- |
-- | obs-utext = `%d0 / obs-NO-WS-CTL / VCHAR`
obs_utext :: forall e. Parser e Char
obs_utext = char_num 0 <|> obs_no_ws_ctl <|> vchar
-- | `obs_qp`: obsolete quoted-pair rule.
-- |
-- | obs-qp = `"\" (%d0 / obs-NO-WS-CTL / LF / CR)`
obs_qp :: forall e. Parser e String
obs_qp = do _ <- char '\\'
v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr
pure $ "\\" <> CU.singleton v
-- | `obs_body`: obsolete body.
-- |
-- | Note: the simpler version found in the errata is implemented, which basically accept everything.
-- |
-- | Note: `text` is replaced by `vchar`.
-- |
-- | (RFC)
-- | obs-body = `*((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)`
-- |
-- | (RFC Errata v1)
-- | obs-body = `*(%d0-127)`
-- |
-- | (RFC Errata v2)
-- | obs-body = `*(d0 /text / LF / CR)`
-- Errata v1
--obs_body :: forall e. Parser e String
--obs_body = CU.fromCharArray <$> A.many item
-- Errata v2
obs_body :: forall e. Parser e String
obs_body = CU.fromCharArray <$> do A.many (char_num 0 <|> vchar <|> lf <|> cr)
--obs_body original
--obs_body :: forall e. Parser e String
--obs_body = do A.many $ do _ <- A.many lf
-- _ <- A.many cr
-- v <- A.many $ do x <- char_num 0 <|> text
-- _ <- A.many lf
-- _ <- A.many cr
-- pure x
-- pure $ A.fold v
-- <|> _ <- crlf
-- pure ""
-- | `obs_unstruct`
-- |
-- | Note: implement the version found in the Errata page.
-- |
-- | obs-unstruct = `*((*LF *CR *(obs-utext *LF *CR)) / FWS)`
-- |
-- | (RFC Errata)
-- | obs-unstruct = `*( (*CR 1*(obs-utext / FWS)) / 1*LF ) *CR`
obs_unstruct :: forall e. Parser e String
obs_unstruct = (CU.fromCharArray <<< A.fold) <$> A.many do _ <- A.many cr
many1 (obs_utext <|> do _ <- fws
pure ' ')
<|> do _ <- fws
pure ""
-- | `obs_phrase`: obsolete "phrase".
-- |
-- | obs-phrase = `word *(word / "." / CFWS)`
obs_phrase :: forall e. Parser e String
obs_phrase = do w <- word
ws <- A.many (word <|> string "." <|> do _ <- cfws
pure "")
pure $ w <> A.fold ws
-- | `word`.
-- |
-- | word = `atom / quoted-string`
word :: forall e. Parser e String
word = atom <|> quoted_string
-- | `phrase`: list of words (at least one) or the obsolete phrase rule.
-- |
-- | phrase = `1*word / obs-phrase`
phrase :: forall e. Parser e String
phrase = do ws <- many1 word
pure $ A.fold ws
<|> obs_phrase
-- | `unstructured`
-- |
-- | unstructured = `(*([FWS] VCHAR) *WSP) / obs-unstruct`
unstructured :: forall e. Parser e String
unstructured = do v <- A.many $ do _ <- fws
vchar
_ <- A.many wsp
pure $ CU.fromCharArray v
<|> obs_unstruct
-- | `obs_phrase_list`: obsolete list of phrases.
-- |
-- | obs-phrase-list = `[phrase / CFWS] *("," [phrase / CFWS])`
obs_phrase_list :: forall e. Parser e String
obs_phrase_list = do first_phrase <- phrase_or_cfws
xs <- A.many $ do _ <- char ','
phrase_or_cfws
pure $ A.fold $ [first_phrase] <> xs
where phrase_or_cfws = do first_phrase <- tryMaybe phrase
case first_phrase of
Nothing -> do _ <- cfws
pure ""
Just x -> pure x
-- | `qtext`: printable US-ASCII characters not including "\" or the quote character.
-- |
-- | WARNING: contrary to the RFC, the `qtext` rule accepts a space character (`%d32`).
-- |
-- | qtext = `%d33 / %d35-91 / %d93-126 / obs-qtext`
qtext :: forall e. Parser e Char
qtext = char_range 32 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext
-- | `qcontent`
-- |
-- | qcontent = `qtext / quoted-pair`
qcontent :: forall e. Parser e String
qcontent = CU.singleton <$> qtext <|> quoted_pair
-- | `quoted_string`
-- |
-- | WARNING: this rule was changed in order to take into account the new `qtext` rule,
-- | which now accepts the space character. This new rule, as implemented, still allows
-- | for multiple line returns in the quoted string.
-- |
-- |
-- | Original RFC5322 rule:
-- |
-- | quoted-string = `[CFWS] DQUOTE *([FWS] qcontent) [FWS] DQUOTE [CFWS]`
-- |
-- | Implemented rule:
-- |
-- | quoted-string = `[CFWS] DQUOTE *([CRLF] qcontent) [FWS] DQUOTE [CFWS]`
quoted_string :: forall e. Parser e String
quoted_string = do s <- tryMaybe cfws
_ <- char '"'
m <- A.many do l <- tryMaybe crlf
c <- qcontent
pure $ maybe "" id l <> c
_ <- char '"'
e <- tryMaybe cfws
pure $ maybe "" id s <> "\"" <> A.fold m <> "\"" <> maybe "" id e

View file

@ -14,8 +14,7 @@ import GenericParser.DomainParserRFC1035 as RFC1035
import GenericParser.DomainParser as ModernDomains import GenericParser.DomainParser as ModernDomains
import GenericParser.IPAddress (IPv4Error(..)) import GenericParser.IPAddress (IPv4Error(..))
import GenericParser.IPAddress as IP import GenericParser.IPAddress as IP
import GenericParser.EmailAddress as EA import GenericParser.EmailAddress as E
import GenericParser.RFC5322 as E
import Test.TestValues as T import Test.TestValues as T
run :: forall e v. Parser e v -> String -> P.Result e v run :: forall e v. Parser e v -> String -> P.Result e v
@ -69,20 +68,19 @@ test_series :: forall e v
test_series l p v e a = foreachE a (\s -> logtest l p s v e) test_series l p v e a = foreachE a (\s -> logtest l p s v e)
showerror_ipv6 :: IP.IPv6Error -> String showerror_ipv6 :: IP.IPv6Error -> String
showerror_ipv6 (IP.IP6TooManyHexaDecimalCharacters) = "TooManyHexaDecimalCharacters" showerror_ipv6 (IP.InvalidCharacter) = "InvalidCharacter"
showerror_ipv6 (IP.IP6NotEnoughChunks) = "NotEnoughChunks" showerror_ipv6 (IP.TooManyHexaDecimalCharacters) = "TooManyHexaDecimalCharacters"
showerror_ipv6 (IP.IP6TooManyChunks) = "TooManyChunks" showerror_ipv6 (IP.NotEnoughChunks) = "NotEnoughChunks"
showerror_ipv6 IP.IP6IrrelevantShortRepresentation = "useless double dots" showerror_ipv6 (IP.TooManyChunks) = "TooManyChunks"
showerror_ipv6 IP.IP6InvalidRange = "invalid IPv6 range" showerror_ipv6 IP.IPv6UnrelevantShortRepresentation = "useless double dots"
showerror_ipv4 :: IP.IPv4Error -> String showerror_ipv4 :: IP.IPv4Error -> String
showerror_ipv4 (IP4NumberTooBig x) = "value '" <> show x <> "' is > 255" showerror_ipv4 (NumberTooBig x) = "value '" <> show x <> "' is > 255"
showerror_ipv4 IP4IrrelevantShortRepresentation = "useless double dots" showerror_ipv4 IPv4UnrelevantShortRepresentation = "useless double dots"
showerror_ipv4 IP4InvalidRange = "invalid IPv4 range"
showerror_email :: EA.EmailError -> String showerror_email :: E.EmailError -> String
showerror_email EA.InvalidCharacter = "InvalidCharacter" showerror_email E.InvalidCharacter = "InvalidCharacter"
showerror_email (EA.InvalidDomain e) = "invalid domain: " <> maybe "no domain error provided, weird" showerror e showerror_email (E.InvalidDomain e) = "invalid domain: " <> maybe "no domain error provided, weird" showerror e
main :: Effect Unit main :: Effect Unit
main = do main = do
@ -105,7 +103,7 @@ main = do
test_series "IP.ipv6" IP.ipv6 id showerror_ipv6 T.ipv6_addresses test_series "IP.ipv6" IP.ipv6 id showerror_ipv6 T.ipv6_addresses
log "" log ""
test_series "EA.email" EA.email id showerror_email T.valid_email_addresses test_series "E.email" E.email id showerror_email T.valid_email_addresses
log "" log ""
test_series "E.address (short)" E.address id showerror_email T.valid_email_addresses_short test_series "E.address (short)" E.address id showerror_email T.valid_email_addresses_short
@ -127,11 +125,3 @@ main = do
log "Does parsers behave correctly (give the exact same input)?" log "Does parsers behave correctly (give the exact same input)?"
foreachE T.valid_email_addresses_short (\s -> compare_parsers "E.address" (P.read_input E.address) E.address s) foreachE T.valid_email_addresses_short (\s -> compare_parsers "E.address" (P.read_input E.address) E.address s)
log ""
let ip4ranges = [ "10.0.0.1/24", "192.168.0.1/32", "1.2.3.4/0", "192.168.0.1/33" ]
test_series "IP.ipv4_range" IP.ipv4_range id showerror_ipv4 ip4ranges
log ""
let ip6ranges = [ "2001::1/56", "2001:1:1::1/0", "::1/128", "::1/129" ]
test_series "IP.ipv6_range" IP.ipv6_range id showerror_ipv6 ip6ranges