Compare commits
10 commits
d7f713839c
...
0684286805
| Author | SHA1 | Date | |
|---|---|---|---|
| 0684286805 | |||
| fe3996829b | |||
| d30d9d74ce | |||
| bbc20a0a12 | |||
| 991a4f36a3 | |||
| e290d1a73d | |||
| 49eb615b79 | |||
| 67e318b949 | |||
| 624bd549f3 | |||
| 51bdcc3ba4 |
7 changed files with 674 additions and 583 deletions
5
makefile
5
makefile
|
|
@ -12,9 +12,12 @@ run:
|
||||||
t:
|
t:
|
||||||
spago test
|
spago test
|
||||||
|
|
||||||
docs:
|
docs-with-search:
|
||||||
spago docs
|
spago docs
|
||||||
|
|
||||||
|
docs:
|
||||||
|
spago docs --no-search
|
||||||
|
|
||||||
DOCS_HTTPD_ACCESS_LOGS ?= /tmp/parser-docs-access.log
|
DOCS_HTTPD_ACCESS_LOGS ?= /tmp/parser-docs-access.log
|
||||||
DOCS_HTTPD_ADDR ?= 127.0.0.1
|
DOCS_HTTPD_ADDR ?= 127.0.0.1
|
||||||
DOCS_HTTPD_PORT ?= 30000
|
DOCS_HTTPD_PORT ?= 30000
|
||||||
|
|
|
||||||
|
|
@ -2,11 +2,13 @@ module GenericParser
|
||||||
( module GenericParser.Parser
|
( module GenericParser.Parser
|
||||||
, module GenericParser.DomainParser.Common
|
, module GenericParser.DomainParser.Common
|
||||||
, module GenericParser.DomainParser
|
, module GenericParser.DomainParser
|
||||||
|
, module GenericParser.RFC5322
|
||||||
--, module GenericParser.RFC5234
|
--, module GenericParser.RFC5234
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size)
|
import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size)
|
||||||
import GenericParser.DomainParser (domain, label, subdomain, sub_eof)
|
import GenericParser.DomainParser (domain, label, subdomain, sub_eof)
|
||||||
import GenericParser.Parser (char, current_input, current_position, Error, failure, failureError, Input, item, lookahead, many1, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, string, success, try, tryMaybe, until, Value)
|
import GenericParser.Parser (char, current_input, current_position, Error, errorParser, error_functor, failure, failureError, Input, item, lookahead, many1, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, string, success, try, tryMaybe, until, Value)
|
||||||
|
import GenericParser.RFC5322 (obs_fws, fws, ctext, quoted_pair, ccontent, comment, cfws, address, mailbox, name_addr, angle_addr, group, display_name, mailbox_list, address_list, group_list, atext, atom, dot_atom_text, dot_atom, specials, addr_spec, local_part, {- domain ,-} domain_literal, dtext, obs_angle_addr, obs_route, obs_domain_list, obs_mbox_list, obs_addr_list, obs_group_list, obs_local_part, obs_domain, obs_dtext, obs_no_ws_ctl, obs_ctext, obs_qtext, obs_utext, obs_qp, obs_body, obs_unstruct, obs_phrase, word, phrase, unstructured, obs_phrase_list, qtext, qcontent, quoted_string)
|
||||||
|
-- import GenericParser.RFC5234
|
||||||
-- import GenericParser.SomeParsers (alphanum, eof, ident, identifier, int, integer, letter, lower, nat, natural, space, symbol, token, upper)
|
-- import GenericParser.SomeParsers (alphanum, eof, ident, identifier, int, integer, letter, lower, nat, natural, space, symbol, token, upper)
|
||||||
-- import GenericParser.RFC5234 ()
|
|
||||||
|
|
|
||||||
|
|
@ -1,574 +1,37 @@
|
||||||
-- | `EmailAddress` is a parser for email addresses, implementing the grammar found in RFC5322.
|
-- | `EmailAddress` is a simplistic parser for email addresses.
|
||||||
|
-- | For a more serious parser, see the `RFC5322` module.
|
||||||
-- |
|
-- |
|
||||||
-- | STATUS: the parser mostly works, except for comments.
|
-- | STATUS: the parser works for very simplistic email addresses.
|
||||||
-- | Comments provoke a stack overflow, this must be investigated.
|
-- | This shouldn't be used in a serious environment.
|
||||||
-- |
|
|
||||||
-- | Also, the parser needs a thorough review.
|
|
||||||
module GenericParser.EmailAddress where
|
module GenericParser.EmailAddress where
|
||||||
|
|
||||||
import Prelude ((<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, unit)
|
import Prelude (bind, pure, ($), (<>))
|
||||||
|
|
||||||
import Control.Alt ((<|>))
|
import Control.Alt ((<|>))
|
||||||
import Control.Lazy (defer)
|
|
||||||
import Data.Array as A
|
|
||||||
import Data.Char as C
|
|
||||||
import Data.Either (Either(..))
|
import Data.Either (Either(..))
|
||||||
import Data.Maybe (Maybe(..), maybe)
|
import Data.Maybe (Maybe(..))
|
||||||
import Data.String.CodeUnits as CU
|
import Data.String.CodeUnits as CU
|
||||||
|
|
||||||
import GenericParser.BaseFunctions (id)
|
import GenericParser.Parser (Parser(..), char, item, current_input, failureError, parse, rollback, until)
|
||||||
import GenericParser.Parser (Parser(..)
|
|
||||||
, sat, char, char_num, char_range, string, item, many1, tryMaybe
|
|
||||||
, current_input, failureError, parse, rollback, until)
|
|
||||||
import GenericParser.DomainParser.Common (DomainError)
|
import GenericParser.DomainParser.Common (DomainError)
|
||||||
import GenericParser.DomainParser (sub_eof)
|
import GenericParser.DomainParser (sub_eof)
|
||||||
-- ABNF core rules.
|
-- ABNF core rules.
|
||||||
import GenericParser.RFC5234 (crlf, digit, wsp, vchar, lf, cr)
|
import GenericParser.RFC5234 (digit)
|
||||||
import GenericParser.SomeParsers (letter, alphanum)
|
import GenericParser.SomeParsers (letter)
|
||||||
|
|
||||||
data EmailError
|
data EmailError
|
||||||
= InvalidCharacter
|
= InvalidCharacter
|
||||||
| InvalidDomain (Maybe DomainError)
|
| InvalidDomain (Maybe DomainError)
|
||||||
|
|
||||||
-- | obs-FWS: obsolete folding white space.
|
|
||||||
-- |
|
|
||||||
-- | obs-FWS = `1*WSP *(CRLF 1*WSP)`
|
|
||||||
obs_fws :: forall e. Parser e String
|
|
||||||
obs_fws = do x <- A.many wsp
|
|
||||||
xs <- A.many $ do v <- crlf
|
|
||||||
w <- many1 wsp
|
|
||||||
pure $ v <> CU.fromCharArray w
|
|
||||||
pure $ A.fold $ [CU.fromCharArray x] <> xs
|
|
||||||
|
|
||||||
-- | FWS: folding white space. This can be described in plain english as:
|
|
||||||
-- | 1. an OPTIONAL line with potential white spaces followed by at least one white space
|
|
||||||
-- | 2. or, by the obs-FWS rule (meaning: many empty lines)
|
|
||||||
-- |
|
|
||||||
-- | FWS = `([*WSP CRLF] 1*WSP) / obs-FWS`
|
|
||||||
fws :: forall e. Parser e String
|
|
||||||
fws = do x <- tryMaybe do xs <- A.many wsp
|
|
||||||
v <- crlf
|
|
||||||
pure $ CU.fromCharArray xs <> v
|
|
||||||
w <- many1 wsp
|
|
||||||
let first_part = maybe "" id x
|
|
||||||
second_part = CU.fromCharArray w
|
|
||||||
pure $ first_part <> second_part
|
|
||||||
<|> obs_fws
|
|
||||||
|
|
||||||
-- | ctext: comment text, meaning printable US-ASCII characters excluding '(', ')' and '\'.
|
|
||||||
-- |
|
|
||||||
-- | ctext = `%d33-39 / %d42-91 / %d93-126 / obs-ctext`
|
|
||||||
ctext :: forall e. Parser e Char
|
|
||||||
ctext = sat cond <|> obs_ctext
|
|
||||||
where cond x = let charcode = C.toCharCode x
|
|
||||||
in between 33 39 charcode
|
|
||||||
|| between 42 91 charcode
|
|
||||||
|| between 93 126 charcode
|
|
||||||
|
|
||||||
-- | `quoted_pair`: pair of characters.
|
|
||||||
-- |
|
|
||||||
-- | quoted-pair = `("\" (VCHAR / WSP)) / obs-qp`
|
|
||||||
quoted_pair :: forall e. Parser e String
|
|
||||||
quoted_pair = do _ <- char '\\'
|
|
||||||
v <- vchar <|> wsp
|
|
||||||
pure $ "\\" <> CU.singleton v
|
|
||||||
<|> obs_qp
|
|
||||||
|
|
||||||
-- | Comment content.
|
|
||||||
-- |
|
|
||||||
-- | ccontent = `ctext / quoted-pair / comment`
|
|
||||||
ccontent :: forall e. Parser e String
|
|
||||||
ccontent = CU.singleton <$> ctext <|> quoted_pair <|> defer \_ -> comment
|
|
||||||
|
|
||||||
-- | Comment. Nothing to return since comments aren't to be processed.
|
|
||||||
-- |
|
|
||||||
-- | comment = `"(" *([FWS] ccontent) [FWS] ")"`
|
|
||||||
comment :: forall e. Parser e String
|
|
||||||
comment = do _ <- char '('
|
|
||||||
xs <- A.many do _ <- A.many fws
|
|
||||||
ccontent
|
|
||||||
_ <- char ')'
|
|
||||||
pure $ "(" <> A.fold xs <> ")"
|
|
||||||
|
|
||||||
-- | CFWS: comment folding white space.
|
|
||||||
-- |
|
|
||||||
-- | CFWS = `(1*([FWS] comment) [FWS]) / FWS`
|
|
||||||
cfws :: forall e. Parser e String
|
|
||||||
cfws = do xs <- many1 $ do _ <- tryMaybe fws
|
|
||||||
comment
|
|
||||||
pure $ A.fold xs
|
|
||||||
<|> fws
|
|
||||||
|
|
||||||
-- | `address`: email address.
|
|
||||||
-- |
|
|
||||||
-- | address = `mailbox / group`
|
|
||||||
address :: forall e. Parser e String
|
|
||||||
address = mailbox <|> group
|
|
||||||
|
|
||||||
-- | `mailbox`: mail address.
|
|
||||||
-- |
|
|
||||||
-- | mailbox = `name-addr / addr-spec`
|
|
||||||
mailbox :: forall e. Parser e String
|
|
||||||
mailbox = name_addr <|> addr_spec
|
|
||||||
|
|
||||||
-- | `name_addr`: address name.
|
|
||||||
-- |
|
|
||||||
-- | name-addr = `[display-name] angle-addr`
|
|
||||||
name_addr :: forall e. Parser e String
|
|
||||||
name_addr = do _ <- tryMaybe display_name
|
|
||||||
angle_addr
|
|
||||||
|
|
||||||
-- | `angle_addr` address specification between '<' and '>' characters.
|
|
||||||
-- |
|
|
||||||
-- | angle-addr = `[CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr`
|
|
||||||
angle_addr :: forall e. Parser e String
|
|
||||||
angle_addr = do _ <- tryMaybe cfws
|
|
||||||
_ <- char '<'
|
|
||||||
a <- addr_spec
|
|
||||||
_ <- char '>'
|
|
||||||
_ <- tryMaybe cfws
|
|
||||||
pure $ "<" <> a <> ">"
|
|
||||||
<|> obs_angle_addr
|
|
||||||
|
|
||||||
-- | `group`: a list of email addresses.
|
|
||||||
-- |
|
|
||||||
-- | group = `display-name ":" [group-list] ";" [CFWS]`
|
|
||||||
group :: forall e. Parser e String
|
|
||||||
group = do _ <- display_name
|
|
||||||
_ <- char ':'
|
|
||||||
xs <- tryMaybe group_list
|
|
||||||
_ <- char ';'
|
|
||||||
_ <- tryMaybe cfws
|
|
||||||
let res = ":" <> maybe "" id xs <> ";"
|
|
||||||
pure res
|
|
||||||
|
|
||||||
-- | `display_name`: displayed name, not the actual email address.
|
|
||||||
-- |
|
|
||||||
-- | display-name = `phrase`
|
|
||||||
display_name :: forall e. Parser e String
|
|
||||||
display_name = phrase
|
|
||||||
|
|
||||||
-- | `mailbox_list`
|
|
||||||
-- |
|
|
||||||
-- | mailbox-list = `(mailbox *("," mailbox)) / obs-mbox-list`
|
|
||||||
mailbox_list :: forall e. Parser e String
|
|
||||||
mailbox_list = do mb <- mailbox
|
|
||||||
xs <- A.many $ do _ <- char ','
|
|
||||||
mx <- mailbox
|
|
||||||
pure $ "," <> mx
|
|
||||||
pure $ mb <> A.fold xs
|
|
||||||
<|> obs_mbox_list
|
|
||||||
|
|
||||||
-- | `address_list`
|
|
||||||
-- |
|
|
||||||
-- | address-list = `(address *("," address)) / obs-addr-list`
|
|
||||||
address_list :: forall e. Parser e String
|
|
||||||
address_list = do a <- address
|
|
||||||
xs <- A.many do _ <- char ','
|
|
||||||
address
|
|
||||||
pure $ A.fold $ A.intersperse "," $ [a] <> xs
|
|
||||||
<|> obs_addr_list
|
|
||||||
|
|
||||||
-- | `group_list`
|
|
||||||
-- |
|
|
||||||
-- | group-list = `mailbox-list / CFWS / obs-group-list`
|
|
||||||
group_list :: forall e. Parser e String
|
|
||||||
group_list = mailbox_list <|> cfws <|> obs_group_list
|
|
||||||
|
|
||||||
|
|
||||||
-- | `atext`: atom accepted characters.
|
|
||||||
-- | Printable US-ASCII characters not including specials.
|
|
||||||
-- |
|
|
||||||
-- | atext = `ALPHA / DIGIT / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" /`
|
|
||||||
-- | `"-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}" / "~"`
|
|
||||||
atext :: forall e. Parser e Char
|
|
||||||
atext = alphanum
|
|
||||||
<|> char '!' <|> char '#'
|
|
||||||
<|> char '$' <|> char '%'
|
|
||||||
<|> char '&' <|> char '\''
|
|
||||||
<|> char '*' <|> char '+'
|
|
||||||
<|> char '-' <|> char '/'
|
|
||||||
<|> char '=' <|> char '?'
|
|
||||||
<|> char '^' <|> char '_'
|
|
||||||
<|> char '`' <|> char '{'
|
|
||||||
<|> char '|' <|> char '}'
|
|
||||||
<|> char '~'
|
|
||||||
|
|
||||||
|
|
||||||
-- | `atom`
|
|
||||||
-- |
|
|
||||||
-- | atom = `[CFWS] 1*atext [CFWS]`
|
|
||||||
atom :: forall e. Parser e String
|
|
||||||
atom = CU.fromCharArray <$> do _ <- tryMaybe cfws
|
|
||||||
a <- many1 atext
|
|
||||||
_ <- tryMaybe cfws
|
|
||||||
pure a
|
|
||||||
|
|
||||||
-- | `dot_atom_text`
|
|
||||||
-- |
|
|
||||||
-- | dot-atom-text = `1*atext *("." 1*atext)`
|
|
||||||
dot_atom_text :: forall e. Parser e String
|
|
||||||
dot_atom_text = do xs0 <- many1 atext
|
|
||||||
xs1 <- A.many $ do _ <- char '.'
|
|
||||||
xs <- many1 atext
|
|
||||||
pure $ "." <> CU.fromCharArray xs
|
|
||||||
let str0 = CU.fromCharArray xs0
|
|
||||||
str1 = A.fold xs1
|
|
||||||
pure $ str0 <> str1
|
|
||||||
|
|
||||||
-- | `dot_atom`
|
|
||||||
-- |
|
|
||||||
-- | dot-atom = `[CFWS] dot-atom-text [CFWS]`
|
|
||||||
dot_atom :: forall e. Parser e String
|
|
||||||
dot_atom = do _ <- tryMaybe cfws
|
|
||||||
x <- dot_atom_text
|
|
||||||
_ <- tryMaybe cfws
|
|
||||||
pure x
|
|
||||||
|
|
||||||
-- | `specials`: special characters that do not appear in `atext`.
|
|
||||||
-- |
|
|
||||||
-- | specials = `"(" / ")" / "<" / ">" / "[" / "]" / ":" / ";" / "@" /`
|
|
||||||
-- | `"\" / "," / "." / DQUOTE`
|
|
||||||
specials :: forall e. Parser e Char
|
|
||||||
specials = char '('
|
|
||||||
<|> char ')'
|
|
||||||
<|> char '<'
|
|
||||||
<|> char '>'
|
|
||||||
<|> char '['
|
|
||||||
<|> char ']'
|
|
||||||
<|> char ':'
|
|
||||||
<|> char ';'
|
|
||||||
<|> char '@'
|
|
||||||
<|> char '\\'
|
|
||||||
<|> char ','
|
|
||||||
<|> char '.'
|
|
||||||
<|> char '"'
|
|
||||||
|
|
||||||
-- | `addr_spec`
|
|
||||||
-- |
|
|
||||||
-- | addr-spec = `local-part "@" domain`
|
|
||||||
addr_spec :: forall e. Parser e String
|
|
||||||
addr_spec = do lpart <- local_part
|
|
||||||
_ <- char '@'
|
|
||||||
dom <- domain
|
|
||||||
pure $ lpart <> "@" <> dom
|
|
||||||
|
|
||||||
-- | `local_part`
|
|
||||||
-- |
|
|
||||||
-- | local-part = `dot-atom / quoted-string / obs-local-part`
|
|
||||||
local_part :: forall e. Parser e String
|
|
||||||
local_part = dot_atom <|> quoted_string <|> obs_local_part
|
|
||||||
|
|
||||||
-- | `domain`: this is a parser for a domain.
|
|
||||||
-- | The parser from `GenericParser.DomainParser` cannot be used, these parsers are different.
|
|
||||||
-- | `DomainParser` is more strict, it doesn't allow comments or IP addresses for example.
|
|
||||||
-- | Therefore, errors are differents from a parser to another.
|
|
||||||
-- |
|
|
||||||
-- | domain = `dot-atom / domain-literal / obs-domain`
|
|
||||||
domain :: forall e. Parser e String
|
|
||||||
domain = dot_atom <|> domain_literal <|> obs_domain
|
|
||||||
|
|
||||||
-- | `domain_literal`
|
|
||||||
-- |
|
|
||||||
-- | domain-literal = `[CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]`
|
|
||||||
domain_literal :: forall e. Parser e String
|
|
||||||
domain_literal = do s <- tryMaybe cfws
|
|
||||||
_ <- char '['
|
|
||||||
xs <- A.many do _ <- tryMaybe fws
|
|
||||||
dtext
|
|
||||||
m <- tryMaybe fws
|
|
||||||
_ <- char ']'
|
|
||||||
e <- tryMaybe cfws
|
|
||||||
pure $ maybe "" id s
|
|
||||||
<> "[" <> A.fold xs <> maybe "" id m <> "]"
|
|
||||||
<> maybe "" id e
|
|
||||||
|
|
||||||
-- | dtext: characters in domains.
|
|
||||||
-- | Printable US-ASCII characters not including "[", "]", or "\".
|
|
||||||
-- |
|
|
||||||
-- | dtext = `%d33-90 / %d94-126 / obs-dtext`
|
|
||||||
dtext :: forall e. Parser e String
|
|
||||||
dtext = CU.singleton <$> sat cond <|> obs_dtext
|
|
||||||
where cond x = let charcode = C.toCharCode x
|
|
||||||
in between 33 90 charcode || between 94 126 charcode
|
|
||||||
|
|
||||||
-- | `obs_angle_addr`: obsolete address specification between '<' and '>' characters.
|
|
||||||
-- |
|
|
||||||
-- | obs-angle-addr = `[CFWS] "<" obs-route addr-spec ">" [CFWS]`
|
|
||||||
obs_angle_addr :: forall e. Parser e String
|
|
||||||
obs_angle_addr = do s <- tryMaybe cfws
|
|
||||||
_ <- char '<'
|
|
||||||
r <- obs_route
|
|
||||||
a <- addr_spec
|
|
||||||
_ <- char '>'
|
|
||||||
e <- tryMaybe cfws
|
|
||||||
pure $ maybe "" id s <> "<" <> r <> a <> ">" <> maybe "" id e
|
|
||||||
|
|
||||||
-- | `obs_route`
|
|
||||||
-- |
|
|
||||||
-- | obs-route = `obs-domain-list ":"`
|
|
||||||
obs_route :: forall e. Parser e String
|
|
||||||
obs_route = do l <- obs_domain_list
|
|
||||||
_ <- char ':'
|
|
||||||
pure $ l <> ":"
|
|
||||||
|
|
||||||
-- | `obs_domain_list`
|
|
||||||
-- |
|
|
||||||
-- | obs-domain-list = `*(CFWS / ",") "@" domain *("," [CFWS] ["@" domain])`
|
|
||||||
obs_domain_list :: forall e. Parser e String
|
|
||||||
obs_domain_list = do s <- A.many $ cfws <|> CU.singleton <$> char ','
|
|
||||||
_ <- char '@'
|
|
||||||
d <- domain
|
|
||||||
xs <- A.many do _ <- char ','
|
|
||||||
c <- tryMaybe cfws
|
|
||||||
_ <- char '@'
|
|
||||||
d2 <- domain
|
|
||||||
pure $ "," <> maybe "" id c <> d2
|
|
||||||
pure $ A.fold s <> d <> A.fold xs
|
|
||||||
|
|
||||||
-- | `obs_mbox_list`
|
|
||||||
-- |
|
|
||||||
-- | obs-mbox-list = `*([CFWS] ",") mailbox *("," [mailbox / CFWS])`
|
|
||||||
obs_mbox_list :: forall e. Parser e String
|
|
||||||
obs_mbox_list = do _ <- A.many $ do _ <- cfws
|
|
||||||
_ <- char ','
|
|
||||||
pure unit
|
|
||||||
mb <- mailbox
|
|
||||||
xs <- A.many $ do _ <- char ','
|
|
||||||
x <- mailbox <|> do _ <- cfws
|
|
||||||
pure ""
|
|
||||||
pure x
|
|
||||||
pure $ mb <> A.fold xs
|
|
||||||
|
|
||||||
-- | `obs_addr_list`
|
|
||||||
-- |
|
|
||||||
-- | obs-addr-list = `*([CFWS] ",") address *("," [address / CFWS])`
|
|
||||||
obs_addr_list :: forall e. Parser e String
|
|
||||||
obs_addr_list = do _ <- A.many do _ <- tryMaybe cfws
|
|
||||||
char ','
|
|
||||||
a <- address
|
|
||||||
xs <- A.many do _ <- char ','
|
|
||||||
address <|> do _ <- cfws
|
|
||||||
pure ""
|
|
||||||
let res = A.fold $ A.intersperse "," $ [a] <> xs
|
|
||||||
pure res
|
|
||||||
|
|
||||||
-- | `obs_group_list`
|
|
||||||
-- |
|
|
||||||
-- | obs-group-list = `1*([CFWS] ",") [CFWS]`
|
|
||||||
obs_group_list :: forall e. Parser e String
|
|
||||||
obs_group_list = do xs <- many1 $ do x <- tryMaybe cfws
|
|
||||||
_ <- char ','
|
|
||||||
pure $ maybe "" id x <> ","
|
|
||||||
c <- tryMaybe cfws
|
|
||||||
pure $ A.fold xs <> maybe "" id c
|
|
||||||
|
|
||||||
-- | `obs_local_part`
|
|
||||||
-- |
|
|
||||||
-- | obs-local-part = `word *("." word)`
|
|
||||||
obs_local_part :: forall e. Parser e String
|
|
||||||
obs_local_part = do w <- word
|
|
||||||
ws <- A.many $ do _ <- char '.'
|
|
||||||
w1 <- word
|
|
||||||
pure $ "." <> w1
|
|
||||||
pure $ w <> A.fold ws
|
|
||||||
|
|
||||||
-- | `obs_domain`
|
|
||||||
-- |
|
|
||||||
-- | obs-domain = `atom *("." atom)`
|
|
||||||
obs_domain :: forall e. Parser e String
|
|
||||||
obs_domain = do a <- atom
|
|
||||||
xs <- A.many $ do _ <- char '.'
|
|
||||||
x <- atom
|
|
||||||
pure $ "." <> x
|
|
||||||
pure $ a <> A.fold xs
|
|
||||||
|
|
||||||
-- | `obs_dtext`: obsolete domain text.
|
|
||||||
-- |
|
|
||||||
-- | obs-dtext = `obs-NO-WS-CTL / quoted-pair`
|
|
||||||
obs_dtext :: forall e. Parser e String
|
|
||||||
obs_dtext = CU.singleton <$> obs_no_ws_ctl <|> quoted_pair
|
|
||||||
|
|
||||||
-- | obs-NO-WS-CTL: US-ASCII control characters without carriage return,
|
|
||||||
-- | line feed and white space characters.
|
|
||||||
-- |
|
|
||||||
-- | obs-NO-WS-CTL = `%d1-8 / %d11 / %d12 / %d14-31 / %d127`
|
|
||||||
obs_no_ws_ctl :: forall e. Parser e Char
|
|
||||||
obs_no_ws_ctl = sat cond
|
|
||||||
where cond x = let charcode = C.toCharCode x
|
|
||||||
in between 1 8 charcode
|
|
||||||
|| between 11 12 charcode
|
|
||||||
|| between 14 31 charcode
|
|
||||||
|| charcode == 127
|
|
||||||
|
|
||||||
-- | obs-ctext: obsolete comment text.
|
|
||||||
-- |
|
|
||||||
-- | obs-ctext = `obs-NO-WS-CTL`
|
|
||||||
obs_ctext :: forall e. Parser e Char
|
|
||||||
obs_ctext = obs_no_ws_ctl
|
|
||||||
|
|
||||||
-- | `obs_qtext`: obsolete accepted quoted text.
|
|
||||||
-- |
|
|
||||||
-- | obs-qtext = `obs-NO-WS-CTL`
|
|
||||||
obs_qtext :: forall e. Parser e Char
|
|
||||||
obs_qtext = obs_no_ws_ctl
|
|
||||||
|
|
||||||
-- | `obs_utext`: obsolete text.
|
|
||||||
-- |
|
|
||||||
-- | obs-utext = `%d0 / obs-NO-WS-CTL / VCHAR`
|
|
||||||
obs_utext :: forall e. Parser e Char
|
|
||||||
obs_utext = char_num 0 <|> obs_no_ws_ctl <|> vchar
|
|
||||||
|
|
||||||
-- | `obs_qp`: obsolete quoted-pair rule.
|
|
||||||
-- |
|
|
||||||
-- | obs-qp = `"\" (%d0 / obs-NO-WS-CTL / LF / CR)`
|
|
||||||
obs_qp :: forall e. Parser e String
|
|
||||||
obs_qp = do _ <- char '\\'
|
|
||||||
v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr
|
|
||||||
pure $ "\\" <> CU.singleton v
|
|
||||||
|
|
||||||
-- | `obs_body`: obsolete body.
|
|
||||||
-- |
|
|
||||||
-- | Note: the simpler version found in the errata is implemented, which basically accept everything.
|
|
||||||
-- |
|
|
||||||
-- | Note: `text` is replaced by `vchar`.
|
|
||||||
-- |
|
|
||||||
-- | (RFC)
|
|
||||||
-- | obs-body = `*((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)`
|
|
||||||
-- |
|
|
||||||
-- | (RFC Errata v1)
|
|
||||||
-- | obs-body = `*(%d0-127)`
|
|
||||||
-- |
|
|
||||||
-- | (RFC Errata v2)
|
|
||||||
-- | obs-body = `*(d0 /text / LF / CR)`
|
|
||||||
-- Errata v1
|
|
||||||
--obs_body :: forall e. Parser e String
|
|
||||||
--obs_body = CU.fromCharArray <$> A.many item
|
|
||||||
|
|
||||||
-- Errata v2
|
|
||||||
obs_body :: forall e. Parser e String
|
|
||||||
obs_body = CU.fromCharArray <$> do A.many (char_num 0 <|> vchar <|> lf <|> cr)
|
|
||||||
|
|
||||||
--obs_body original
|
|
||||||
--obs_body :: forall e. Parser e String
|
|
||||||
--obs_body = do A.many $ do _ <- A.many lf
|
|
||||||
-- _ <- A.many cr
|
|
||||||
-- v <- A.many $ do x <- char_num 0 <|> text
|
|
||||||
-- _ <- A.many lf
|
|
||||||
-- _ <- A.many cr
|
|
||||||
-- pure x
|
|
||||||
-- pure $ A.fold v
|
|
||||||
-- <|> _ <- crlf
|
|
||||||
-- pure ""
|
|
||||||
|
|
||||||
-- | `obs_unstruct`
|
|
||||||
-- |
|
|
||||||
-- | Note: implement the version found in the Errata page.
|
|
||||||
-- |
|
|
||||||
-- | obs-unstruct = `*((*LF *CR *(obs-utext *LF *CR)) / FWS)`
|
|
||||||
-- |
|
|
||||||
-- | (RFC Errata)
|
|
||||||
-- | obs-unstruct = `*( (*CR 1*(obs-utext / FWS)) / 1*LF ) *CR`
|
|
||||||
obs_unstruct :: forall e. Parser e String
|
|
||||||
obs_unstruct = (CU.fromCharArray <<< A.fold) <$> A.many do _ <- A.many cr
|
|
||||||
many1 (obs_utext <|> do _ <- fws
|
|
||||||
pure ' ')
|
|
||||||
<|> do _ <- fws
|
|
||||||
pure ""
|
|
||||||
|
|
||||||
-- | `obs_phrase`: obsolete "phrase".
|
|
||||||
-- |
|
|
||||||
-- | obs-phrase = `word *(word / "." / CFWS)`
|
|
||||||
obs_phrase :: forall e. Parser e String
|
|
||||||
obs_phrase = do w <- word
|
|
||||||
ws <- A.many (word <|> string "." <|> do _ <- cfws
|
|
||||||
pure "")
|
|
||||||
pure $ w <> A.fold ws
|
|
||||||
|
|
||||||
-- | `word`.
|
|
||||||
-- |
|
|
||||||
-- | word = `atom / quoted-string`
|
|
||||||
word :: forall e. Parser e String
|
|
||||||
word = atom <|> quoted_string
|
|
||||||
|
|
||||||
-- | `phrase`: list of words (at least one) or the obsolete phrase rule.
|
|
||||||
-- |
|
|
||||||
-- | phrase = `1*word / obs-phrase`
|
|
||||||
phrase :: forall e. Parser e String
|
|
||||||
phrase = do ws <- many1 word
|
|
||||||
pure $ A.fold ws
|
|
||||||
<|> obs_phrase
|
|
||||||
|
|
||||||
-- | `unstructured`
|
|
||||||
-- |
|
|
||||||
-- | unstructured = `(*([FWS] VCHAR) *WSP) / obs-unstruct`
|
|
||||||
unstructured :: forall e. Parser e String
|
|
||||||
unstructured = do v <- A.many $ do _ <- fws
|
|
||||||
vchar
|
|
||||||
_ <- A.many wsp
|
|
||||||
pure $ CU.fromCharArray v
|
|
||||||
<|> obs_unstruct
|
|
||||||
|
|
||||||
|
|
||||||
-- | `obs_phrase_list`: obsolete list of phrases.
|
|
||||||
-- |
|
|
||||||
-- | obs-phrase-list = `[phrase / CFWS] *("," [phrase / CFWS])`
|
|
||||||
obs_phrase_list :: forall e. Parser e String
|
|
||||||
obs_phrase_list = do first_phrase <- phrase_or_cfws
|
|
||||||
xs <- A.many $ do _ <- char ','
|
|
||||||
phrase_or_cfws
|
|
||||||
pure $ A.fold $ [first_phrase] <> xs
|
|
||||||
where phrase_or_cfws = do first_phrase <- tryMaybe phrase
|
|
||||||
case first_phrase of
|
|
||||||
Nothing -> do _ <- cfws
|
|
||||||
pure ""
|
|
||||||
Just x -> pure x
|
|
||||||
|
|
||||||
-- | `qtext`: printable US-ASCII characters not including "\" or the quote character.
|
|
||||||
-- |
|
|
||||||
-- | WARNING: contrary to the RFC, the `qtext` rule accepts a space character (`%d32`).
|
|
||||||
-- |
|
|
||||||
-- | qtext = `%d33 / %d35-91 / %d93-126 / obs-qtext`
|
|
||||||
qtext :: forall e. Parser e Char
|
|
||||||
qtext = char_range 32 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext
|
|
||||||
|
|
||||||
-- | `qcontent`
|
|
||||||
-- |
|
|
||||||
-- | qcontent = `qtext / quoted-pair`
|
|
||||||
qcontent :: forall e. Parser e String
|
|
||||||
qcontent = CU.singleton <$> qtext <|> quoted_pair
|
|
||||||
|
|
||||||
-- | `quoted_string`
|
|
||||||
-- |
|
|
||||||
-- | WARNING: this rule was changed in order to take into account the new `qtext` rule,
|
|
||||||
-- | which now accepts the space character. This new rule, as implemented, still allows
|
|
||||||
-- | for multiple line returns in the quoted string.
|
|
||||||
-- |
|
|
||||||
-- |
|
|
||||||
-- | Original RFC5322 rule:
|
|
||||||
-- | quoted-string = `[CFWS] DQUOTE *([FWS] qcontent) [FWS] DQUOTE [CFWS]`
|
|
||||||
-- |
|
|
||||||
-- | Implemented rule:
|
|
||||||
-- | quoted-string = `[CFWS] DQUOTE *([CRLF] qcontent) [FWS] DQUOTE [CFWS]`
|
|
||||||
quoted_string :: forall e. Parser e String
|
|
||||||
quoted_string = do s <- tryMaybe cfws
|
|
||||||
_ <- char '"'
|
|
||||||
m <- A.many do l <- tryMaybe crlf
|
|
||||||
c <- qcontent
|
|
||||||
pure $ maybe "" id l <> c
|
|
||||||
_ <- char '"'
|
|
||||||
e <- tryMaybe cfws
|
|
||||||
pure $ maybe "" id s <> "\"" <> A.fold m <> "\"" <> maybe "" id e
|
|
||||||
|
|
||||||
-- | TODO: For now, `local_part` only checks that
|
-- | TODO: For now, `local_part` only checks that
|
||||||
-- | (a) the first character is a letter,
|
-- | (a) the first character is a letter,
|
||||||
-- | (b) the last character is either a letter or a digit.
|
-- | (b) the last character is either a letter or a digit.
|
||||||
-- | The rest can be any letter, digit, '-' or '.'.
|
-- | The rest can be any letter, digit, '-' or '.'.
|
||||||
local_part' :: Parser EmailError String
|
local_part :: Parser EmailError String
|
||||||
local_part' = do firstchar <- letter
|
local_part = do firstchar <- letter
|
||||||
rest <- until end (letter <|> digit <|> char '-' <|> char '.')
|
rest <- until end (letter <|> digit <|> char '-' <|> char '.')
|
||||||
lastchar <- letter <|> digit
|
lastchar <- letter <|> digit
|
||||||
pure $ CU.fromCharArray $ [firstchar] <> rest <> [lastchar]
|
pure $ CU.fromCharArray $ [firstchar] <> rest <> [lastchar]
|
||||||
where
|
where
|
||||||
end :: forall e. Parser e Char
|
end :: forall e. Parser e Char
|
||||||
end = do c <- item
|
end = do c <- item
|
||||||
|
|
@ -577,7 +40,7 @@ local_part' = do firstchar <- letter
|
||||||
|
|
||||||
-- | `email` is the parser for email addresses.
|
-- | `email` is the parser for email addresses.
|
||||||
email :: Parser EmailError String
|
email :: Parser EmailError String
|
||||||
email = do login <- local_part'
|
email = do login <- local_part
|
||||||
_ <- char '@'
|
_ <- char '@'
|
||||||
input <- current_input
|
input <- current_input
|
||||||
case parse sub_eof input of
|
case parse sub_eof input of
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
-- | `IPAddress` is a parser for internet protocol addresses (both IPv4 and IPv6).
|
-- | `IPAddress` is a parser for internet protocol addresses (both IPv4 and IPv6).
|
||||||
module GenericParser.IPAddress where
|
module GenericParser.IPAddress where
|
||||||
|
|
||||||
import Prelude (Ordering(..), compare, (==), (<>), (<), (+), (-), bind, pure, ($), (<<<), (>), show, map, unit)
|
import Prelude (Ordering(..), compare, (==), (<>), (<), (+), (-), between, bind, pure, ($), (<<<), (>), show, map, unit)
|
||||||
|
|
||||||
import Control.Alt ((<|>))
|
import Control.Alt ((<|>))
|
||||||
import Data.Array as A
|
import Data.Array as A
|
||||||
|
|
@ -12,26 +12,26 @@ import GenericParser.Parser (Parser(..)
|
||||||
, failureError
|
, failureError
|
||||||
, current_position
|
, current_position
|
||||||
, string
|
, string
|
||||||
, many1, lookahead
|
, read_input, many1, lookahead
|
||||||
, char)
|
, char)
|
||||||
import GenericParser.BaseFunctions (repeat)
|
import GenericParser.BaseFunctions (repeat)
|
||||||
import GenericParser.SomeParsers (nat)
|
import GenericParser.SomeParsers (nat)
|
||||||
import GenericParser.RFC5234 (hexdig)
|
import GenericParser.RFC5234 (hexdig)
|
||||||
|
|
||||||
data IPv6Error
|
data IPv6Error
|
||||||
= InvalidCharacter
|
= IP6TooManyHexaDecimalCharacters
|
||||||
| TooManyHexaDecimalCharacters
|
| IP6NotEnoughChunks
|
||||||
| NotEnoughChunks
|
| IP6TooManyChunks
|
||||||
| TooManyChunks
|
| IP6IrrelevantShortRepresentation
|
||||||
| IPv6UnrelevantShortRepresentation
|
| IP6InvalidRange
|
||||||
|
|
||||||
-- | `ipv6_chunk` parses just a group of hexadecimal characters.
|
-- | `ipv6_chunk` parses just a group of hexadecimal characters.
|
||||||
-- | Return an error (TooManyHexaDecimalCharacters) in case the group has more than 4 characters.
|
-- | Return an error (IP6TooManyHexaDecimalCharacters) in case the group has more than 4 characters.
|
||||||
ipv6_chunk :: Parser IPv6Error String
|
ipv6_chunk :: Parser IPv6Error String
|
||||||
ipv6_chunk = do pos <- current_position
|
ipv6_chunk = do pos <- current_position
|
||||||
hexachars <- many1 hexdig
|
hexachars <- many1 hexdig
|
||||||
if A.length hexachars > 4
|
if A.length hexachars > 4
|
||||||
then Parser \_ -> failureError pos (Just TooManyHexaDecimalCharacters)
|
then Parser \_ -> failureError pos (Just IP6TooManyHexaDecimalCharacters)
|
||||||
else pure $ CU.fromCharArray hexachars
|
else pure $ CU.fromCharArray hexachars
|
||||||
|
|
||||||
-- | `ipv6_chunk'` is `ipv6_chunk` with a following ':' character.
|
-- | `ipv6_chunk'` is `ipv6_chunk` with a following ':' character.
|
||||||
|
|
@ -52,9 +52,9 @@ ipv6_full = do chunks <- many1 ipv6_chunk'
|
||||||
pos <- current_position
|
pos <- current_position
|
||||||
lastchunk <- ipv6_chunk
|
lastchunk <- ipv6_chunk
|
||||||
case compare (A.length chunks) 7 of
|
case compare (A.length chunks) 7 of
|
||||||
LT -> Parser \_ -> failureError pos (Just NotEnoughChunks)
|
LT -> Parser \_ -> failureError pos (Just IP6NotEnoughChunks)
|
||||||
EQ -> pure $ A.fold (A.intersperse ":" (chunks <> [lastchunk]))
|
EQ -> pure $ A.fold (A.intersperse ":" (chunks <> [lastchunk]))
|
||||||
GT -> Parser \_ -> failureError pos (Just TooManyChunks)
|
GT -> Parser \_ -> failureError pos (Just IP6TooManyChunks)
|
||||||
|
|
||||||
-- | `ipv6_shortened` parses a shortened representation of an IPv6 address.
|
-- | `ipv6_shortened` parses a shortened representation of an IPv6 address.
|
||||||
ipv6_shortened :: Parser IPv6Error String
|
ipv6_shortened :: Parser IPv6Error String
|
||||||
|
|
@ -73,23 +73,36 @@ ipv6_shortened =
|
||||||
let nb_zero_filling = 8 - (A.length chunks_part1 + A.length chunks_part2)
|
let nb_zero_filling = 8 - (A.length chunks_part1 + A.length chunks_part2)
|
||||||
filling = repeat nb_zero_filling "0000"
|
filling = repeat nb_zero_filling "0000"
|
||||||
if nb_zero_filling < 1
|
if nb_zero_filling < 1
|
||||||
then Parser \_ -> failureError pos (Just IPv6UnrelevantShortRepresentation)
|
then Parser \_ -> failureError pos (Just IP6IrrelevantShortRepresentation)
|
||||||
else pure $ A.fold (A.intersperse ":" $ A.concat [chunks_part1, filling, chunks_part2])
|
else pure $ A.fold (A.intersperse ":" $ A.concat [chunks_part1, filling, chunks_part2])
|
||||||
|
|
||||||
-- | TODO: accept IPv6 addresses between brackets ([ipv6]).
|
-- | TODO: accept IPv6 addresses between brackets ([ipv6]).
|
||||||
ipv6 :: Parser IPv6Error String
|
ipv6 :: Parser IPv6Error String
|
||||||
ipv6 = ipv6_shortened <|> ipv6_full
|
ipv6 = ipv6_shortened <|> ipv6_full
|
||||||
|
|
||||||
|
-- | `ipv6_range` parses an ipv6 range, such as "2001::1/56".
|
||||||
|
-- | If the parsing succeed, the whole string is returned.
|
||||||
|
ipv6_range :: Parser IPv6Error String
|
||||||
|
ipv6_range =
|
||||||
|
read_input do _ <- ipv6
|
||||||
|
_ <- char '/'
|
||||||
|
pos <- current_position
|
||||||
|
n <- nat
|
||||||
|
if between 0 128 n
|
||||||
|
then pure ""
|
||||||
|
else Parser \_ -> failureError pos (Just IP6InvalidRange)
|
||||||
|
|
||||||
data IPv4Error
|
data IPv4Error
|
||||||
= NumberTooBig Int
|
= IP4NumberTooBig Int
|
||||||
| IPv4UnrelevantShortRepresentation
|
| IP4IrrelevantShortRepresentation
|
||||||
|
| IP4InvalidRange
|
||||||
|
|
||||||
-- | `ipv4_byte` a parser for 0 to 255 natural integers, which is part of the representation of an IPv4 address.
|
-- | `ipv4_byte` a parser for 0 to 255 natural integers, which is part of the representation of an IPv4 address.
|
||||||
ipv4_byte :: Parser IPv4Error Int
|
ipv4_byte :: Parser IPv4Error Int
|
||||||
ipv4_byte = do pos <- current_position
|
ipv4_byte = do pos <- current_position
|
||||||
number <- nat
|
number <- nat
|
||||||
if number > 255
|
if number > 255
|
||||||
then Parser \_ -> failureError pos ((Just <<< NumberTooBig) number)
|
then Parser \_ -> failureError pos ((Just <<< IP4NumberTooBig) number)
|
||||||
else pure number
|
else pure number
|
||||||
|
|
||||||
-- | `ipv4_byte'` is `ipv4_byte` with a leading '.'.
|
-- | `ipv4_byte'` is `ipv4_byte` with a leading '.'.
|
||||||
|
|
@ -123,8 +136,20 @@ ipv4_shortened =
|
||||||
nb_zero_filling = 4 - (A.length chunks_part1 + A.length chunks_part2)
|
nb_zero_filling = 4 - (A.length chunks_part1 + A.length chunks_part2)
|
||||||
filling = A.fold (A.intersperse "." $ repeat nb_zero_filling "0")
|
filling = A.fold (A.intersperse "." $ repeat nb_zero_filling "0")
|
||||||
if nb_zero_filling < 1
|
if nb_zero_filling < 1
|
||||||
then Parser \_ -> failureError pos (Just IPv4UnrelevantShortRepresentation)
|
then Parser \_ -> failureError pos (Just IP4IrrelevantShortRepresentation)
|
||||||
else pure $ A.fold (A.intersperse "." [part1, filling, part2])
|
else pure $ A.fold (A.intersperse "." [part1, filling, part2])
|
||||||
|
|
||||||
ipv4 :: Parser IPv4Error String
|
ipv4 :: Parser IPv4Error String
|
||||||
ipv4 = ipv4_shortened <|> ipv4_generic4bytes
|
ipv4 = ipv4_shortened <|> ipv4_generic4bytes
|
||||||
|
|
||||||
|
-- | `ipv4_range` parses an ipv4 range, such as "192.0.2.0/24".
|
||||||
|
-- | If the parsing succeed, the whole string is returned.
|
||||||
|
ipv4_range :: Parser IPv4Error String
|
||||||
|
ipv4_range =
|
||||||
|
read_input do _ <- ipv4
|
||||||
|
_ <- char '/'
|
||||||
|
pos <- current_position
|
||||||
|
n <- nat
|
||||||
|
if between 0 32 n
|
||||||
|
then pure ""
|
||||||
|
else Parser \_ -> failureError pos (Just IP4InvalidRange)
|
||||||
|
|
|
||||||
|
|
@ -223,3 +223,37 @@ read_input p = do input <- current_input
|
||||||
_ <- p
|
_ <- p
|
||||||
endpos <- current_position
|
endpos <- current_position
|
||||||
pure $ CU.take (endpos - input.position) input.string
|
pure $ CU.take (endpos - input.position) input.string
|
||||||
|
|
||||||
|
-- | `errorParser` provides a parser that keeps the current input position but fails
|
||||||
|
-- | and gives the argument as the error.
|
||||||
|
errorParser :: forall e v. Maybe e -> Parser e v
|
||||||
|
errorParser e = Parser \input -> failureError input.position e
|
||||||
|
|
||||||
|
-- | `error_functor` or `(<:>)` is an `error functor`, allowing to encapsulate
|
||||||
|
-- | an error in another type.
|
||||||
|
-- |
|
||||||
|
-- |```
|
||||||
|
-- |data LoginParsingError -- Login parsing errors.
|
||||||
|
-- | = CannotParse
|
||||||
|
-- | | CannotEntirelyParse
|
||||||
|
-- | | Size Int Int Int
|
||||||
|
-- |
|
||||||
|
-- |data Error -- Login validation errors.
|
||||||
|
-- | = ParsingError (G.Error LoginParsingError)
|
||||||
|
-- |
|
||||||
|
-- |login_parser :: Parser LoginParsingError String
|
||||||
|
-- |login_parser = do
|
||||||
|
-- | input <- current_input
|
||||||
|
-- | _ <- many1 (alpha <|> digit) <:> \_ -> CannotParse
|
||||||
|
-- | _ <- SomeParsers.eof <:> \_ -> CannotEntirelyParse
|
||||||
|
-- | pos <- current_position
|
||||||
|
-- | if between min_login_size max_login_size pos
|
||||||
|
-- | then pure input.string
|
||||||
|
-- | else errorParser (Just $ Size min_login_size max_login_size pos)
|
||||||
|
-- |```
|
||||||
|
error_functor :: forall e v f. Parser e v -> (e -> f) -> Parser f v
|
||||||
|
error_functor (Parser p) f = Parser \i -> case p i of
|
||||||
|
Left e -> failureError e.position $ maybe Nothing (Just <<< f) e.error
|
||||||
|
Right v -> Right v
|
||||||
|
|
||||||
|
infixl 8 error_functor as <:>
|
||||||
|
|
|
||||||
554
src/GenericParser/RFC5322.purs
Normal file
554
src/GenericParser/RFC5322.purs
Normal file
|
|
@ -0,0 +1,554 @@
|
||||||
|
-- | This module implements the email address grammar found in RFC5322.
|
||||||
|
-- |
|
||||||
|
-- | STATUS: the parser works except for comments.
|
||||||
|
-- | Comments provoke a stack overflow, this must be investigated.
|
||||||
|
-- |
|
||||||
|
-- | Also, the parser needs a thorough review.
|
||||||
|
module GenericParser.RFC5322 where
|
||||||
|
|
||||||
|
import Prelude ((<$>), (<<<), bind, pure, ($), (<>), (==), (||), between, unit)
|
||||||
|
|
||||||
|
import Control.Alt ((<|>))
|
||||||
|
import Control.Lazy (defer)
|
||||||
|
import Data.Array as A
|
||||||
|
import Data.Char as C
|
||||||
|
import Data.Maybe (Maybe(..), maybe)
|
||||||
|
import Data.String.CodeUnits as CU
|
||||||
|
|
||||||
|
import GenericParser.BaseFunctions (id)
|
||||||
|
import GenericParser.Parser (Parser, sat, char, char_num, char_range, string, many1, tryMaybe)
|
||||||
|
-- ABNF core rules.
|
||||||
|
import GenericParser.RFC5234 (crlf, wsp, vchar, lf, cr)
|
||||||
|
import GenericParser.SomeParsers (alphanum)
|
||||||
|
|
||||||
|
-- | obs-FWS: obsolete folding white space.
|
||||||
|
-- |
|
||||||
|
-- | obs-FWS = `1*WSP *(CRLF 1*WSP)`
|
||||||
|
obs_fws :: forall e. Parser e String
|
||||||
|
obs_fws = do x <- A.many wsp
|
||||||
|
xs <- A.many $ do v <- crlf
|
||||||
|
w <- many1 wsp
|
||||||
|
pure $ v <> CU.fromCharArray w
|
||||||
|
pure $ A.fold $ [CU.fromCharArray x] <> xs
|
||||||
|
|
||||||
|
-- | FWS: folding white space. This can be described in plain english as:
|
||||||
|
-- | 1. an OPTIONAL line with potential white spaces followed by at least one white space
|
||||||
|
-- | 2. or, by the obs-FWS rule (meaning: many empty lines)
|
||||||
|
-- |
|
||||||
|
-- | FWS = `([*WSP CRLF] 1*WSP) / obs-FWS`
|
||||||
|
fws :: forall e. Parser e String
|
||||||
|
fws = do x <- tryMaybe do xs <- A.many wsp
|
||||||
|
v <- crlf
|
||||||
|
pure $ CU.fromCharArray xs <> v
|
||||||
|
w <- many1 wsp
|
||||||
|
let first_part = maybe "" id x
|
||||||
|
second_part = CU.fromCharArray w
|
||||||
|
pure $ first_part <> second_part
|
||||||
|
<|> obs_fws
|
||||||
|
|
||||||
|
-- | ctext: comment text, meaning printable US-ASCII characters excluding '(', ')' and '\'.
|
||||||
|
-- |
|
||||||
|
-- | ctext = `%d33-39 / %d42-91 / %d93-126 / obs-ctext`
|
||||||
|
ctext :: forall e. Parser e Char
|
||||||
|
ctext = sat cond <|> obs_ctext
|
||||||
|
where cond x = let charcode = C.toCharCode x
|
||||||
|
in between 33 39 charcode
|
||||||
|
|| between 42 91 charcode
|
||||||
|
|| between 93 126 charcode
|
||||||
|
|
||||||
|
-- | `quoted_pair`: pair of characters.
|
||||||
|
-- |
|
||||||
|
-- | quoted-pair = `("\" (VCHAR / WSP)) / obs-qp`
|
||||||
|
quoted_pair :: forall e. Parser e String
|
||||||
|
quoted_pair = do _ <- char '\\'
|
||||||
|
v <- vchar <|> wsp
|
||||||
|
pure $ "\\" <> CU.singleton v
|
||||||
|
<|> obs_qp
|
||||||
|
|
||||||
|
-- | Comment content.
|
||||||
|
-- |
|
||||||
|
-- | ccontent = `ctext / quoted-pair / comment`
|
||||||
|
ccontent :: forall e. Parser e String
|
||||||
|
ccontent = CU.singleton <$> ctext <|> quoted_pair <|> defer \_ -> comment
|
||||||
|
|
||||||
|
-- | Comment. Nothing to return since comments aren't to be processed.
|
||||||
|
-- |
|
||||||
|
-- | comment = `"(" *([FWS] ccontent) [FWS] ")"`
|
||||||
|
comment :: forall e. Parser e String
|
||||||
|
comment = do _ <- char '('
|
||||||
|
xs <- A.many do _ <- A.many fws
|
||||||
|
ccontent
|
||||||
|
_ <- char ')'
|
||||||
|
pure $ "(" <> A.fold xs <> ")"
|
||||||
|
|
||||||
|
-- | CFWS: comment folding white space.
|
||||||
|
-- |
|
||||||
|
-- | CFWS = `(1*([FWS] comment) [FWS]) / FWS`
|
||||||
|
cfws :: forall e. Parser e String
|
||||||
|
cfws = do xs <- many1 $ do _ <- tryMaybe fws
|
||||||
|
comment
|
||||||
|
pure $ A.fold xs
|
||||||
|
<|> fws
|
||||||
|
|
||||||
|
-- | `address`: email address.
|
||||||
|
-- |
|
||||||
|
-- | address = `mailbox / group`
|
||||||
|
address :: forall e. Parser e String
|
||||||
|
address = mailbox <|> group
|
||||||
|
|
||||||
|
-- | `mailbox`: mail address.
|
||||||
|
-- |
|
||||||
|
-- | mailbox = `name-addr / addr-spec`
|
||||||
|
mailbox :: forall e. Parser e String
|
||||||
|
mailbox = name_addr <|> addr_spec
|
||||||
|
|
||||||
|
-- | `name_addr`: address name.
|
||||||
|
-- |
|
||||||
|
-- | name-addr = `[display-name] angle-addr`
|
||||||
|
name_addr :: forall e. Parser e String
|
||||||
|
name_addr = do _ <- tryMaybe display_name
|
||||||
|
angle_addr
|
||||||
|
|
||||||
|
-- | `angle_addr` address specification between '<' and '>' characters.
|
||||||
|
-- |
|
||||||
|
-- | angle-addr = `[CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr`
|
||||||
|
angle_addr :: forall e. Parser e String
|
||||||
|
angle_addr = do _ <- tryMaybe cfws
|
||||||
|
_ <- char '<'
|
||||||
|
a <- addr_spec
|
||||||
|
_ <- char '>'
|
||||||
|
_ <- tryMaybe cfws
|
||||||
|
pure $ "<" <> a <> ">"
|
||||||
|
<|> obs_angle_addr
|
||||||
|
|
||||||
|
-- | `group`: a list of email addresses.
|
||||||
|
-- |
|
||||||
|
-- | group = `display-name ":" [group-list] ";" [CFWS]`
|
||||||
|
group :: forall e. Parser e String
|
||||||
|
group = do _ <- display_name
|
||||||
|
_ <- char ':'
|
||||||
|
xs <- tryMaybe group_list
|
||||||
|
_ <- char ';'
|
||||||
|
_ <- tryMaybe cfws
|
||||||
|
let res = ":" <> maybe "" id xs <> ";"
|
||||||
|
pure res
|
||||||
|
|
||||||
|
-- | `display_name`: displayed name, not the actual email address.
|
||||||
|
-- |
|
||||||
|
-- | display-name = `phrase`
|
||||||
|
display_name :: forall e. Parser e String
|
||||||
|
display_name = phrase
|
||||||
|
|
||||||
|
-- | `mailbox_list`
|
||||||
|
-- |
|
||||||
|
-- | mailbox-list = `(mailbox *("," mailbox)) / obs-mbox-list`
|
||||||
|
mailbox_list :: forall e. Parser e String
|
||||||
|
mailbox_list = do mb <- mailbox
|
||||||
|
xs <- A.many $ do _ <- char ','
|
||||||
|
mx <- mailbox
|
||||||
|
pure $ "," <> mx
|
||||||
|
pure $ mb <> A.fold xs
|
||||||
|
<|> obs_mbox_list
|
||||||
|
|
||||||
|
-- | `address_list`
|
||||||
|
-- |
|
||||||
|
-- | address-list = `(address *("," address)) / obs-addr-list`
|
||||||
|
address_list :: forall e. Parser e String
|
||||||
|
address_list = do a <- address
|
||||||
|
xs <- A.many do _ <- char ','
|
||||||
|
address
|
||||||
|
pure $ A.fold $ A.intersperse "," $ [a] <> xs
|
||||||
|
<|> obs_addr_list
|
||||||
|
|
||||||
|
-- | `group_list`
|
||||||
|
-- |
|
||||||
|
-- | group-list = `mailbox-list / CFWS / obs-group-list`
|
||||||
|
group_list :: forall e. Parser e String
|
||||||
|
group_list = mailbox_list <|> cfws <|> obs_group_list
|
||||||
|
|
||||||
|
|
||||||
|
-- | `atext`: atom accepted characters.
|
||||||
|
-- | Printable US-ASCII characters not including specials.
|
||||||
|
-- |
|
||||||
|
-- | atext = `ALPHA / DIGIT / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" /`
|
||||||
|
-- | `"-" / "/" / "=" / "?" / "^" / "_" / backquote / "{" / "|" / "}" / "~"`
|
||||||
|
atext :: forall e. Parser e Char
|
||||||
|
atext = alphanum
|
||||||
|
<|> char '!' <|> char '#'
|
||||||
|
<|> char '$' <|> char '%'
|
||||||
|
<|> char '&' <|> char '\''
|
||||||
|
<|> char '*' <|> char '+'
|
||||||
|
<|> char '-' <|> char '/'
|
||||||
|
<|> char '=' <|> char '?'
|
||||||
|
<|> char '^' <|> char '_'
|
||||||
|
<|> char '`' <|> char '{'
|
||||||
|
<|> char '|' <|> char '}'
|
||||||
|
<|> char '~'
|
||||||
|
|
||||||
|
|
||||||
|
-- | `atom`
|
||||||
|
-- |
|
||||||
|
-- | atom = `[CFWS] 1*atext [CFWS]`
|
||||||
|
atom :: forall e. Parser e String
|
||||||
|
atom = CU.fromCharArray <$> do _ <- tryMaybe cfws
|
||||||
|
a <- many1 atext
|
||||||
|
_ <- tryMaybe cfws
|
||||||
|
pure a
|
||||||
|
|
||||||
|
-- | `dot_atom_text`
|
||||||
|
-- |
|
||||||
|
-- | dot-atom-text = `1*atext *("." 1*atext)`
|
||||||
|
dot_atom_text :: forall e. Parser e String
|
||||||
|
dot_atom_text = do xs0 <- many1 atext
|
||||||
|
xs1 <- A.many $ do _ <- char '.'
|
||||||
|
xs <- many1 atext
|
||||||
|
pure $ "." <> CU.fromCharArray xs
|
||||||
|
let str0 = CU.fromCharArray xs0
|
||||||
|
str1 = A.fold xs1
|
||||||
|
pure $ str0 <> str1
|
||||||
|
|
||||||
|
-- | `dot_atom`
|
||||||
|
-- |
|
||||||
|
-- | dot-atom = `[CFWS] dot-atom-text [CFWS]`
|
||||||
|
dot_atom :: forall e. Parser e String
|
||||||
|
dot_atom = do _ <- tryMaybe cfws
|
||||||
|
x <- dot_atom_text
|
||||||
|
_ <- tryMaybe cfws
|
||||||
|
pure x
|
||||||
|
|
||||||
|
-- | `specials`: special characters that do not appear in `atext`.
|
||||||
|
-- |
|
||||||
|
-- | specials = `"(" / ")" / "<" / ">" / "[" / "]" / ":" / ";" / "@" /`
|
||||||
|
-- | `"\" / "," / "." / DQUOTE`
|
||||||
|
specials :: forall e. Parser e Char
|
||||||
|
specials = char '('
|
||||||
|
<|> char ')'
|
||||||
|
<|> char '<'
|
||||||
|
<|> char '>'
|
||||||
|
<|> char '['
|
||||||
|
<|> char ']'
|
||||||
|
<|> char ':'
|
||||||
|
<|> char ';'
|
||||||
|
<|> char '@'
|
||||||
|
<|> char '\\'
|
||||||
|
<|> char ','
|
||||||
|
<|> char '.'
|
||||||
|
<|> char '"'
|
||||||
|
|
||||||
|
-- | `addr_spec`
|
||||||
|
-- |
|
||||||
|
-- | addr-spec = `local-part "@" domain`
|
||||||
|
addr_spec :: forall e. Parser e String
|
||||||
|
addr_spec = do lpart <- local_part
|
||||||
|
_ <- char '@'
|
||||||
|
dom <- domain
|
||||||
|
pure $ lpart <> "@" <> dom
|
||||||
|
|
||||||
|
-- | `local_part`
|
||||||
|
-- |
|
||||||
|
-- | local-part = `dot-atom / quoted-string / obs-local-part`
|
||||||
|
local_part :: forall e. Parser e String
|
||||||
|
local_part = dot_atom <|> quoted_string <|> obs_local_part
|
||||||
|
|
||||||
|
-- | `domain`: this is a parser for a domain.
|
||||||
|
-- | The parser from `GenericParser.DomainParser` cannot be used, these parsers are different.
|
||||||
|
-- | `DomainParser` is more strict, it doesn't allow comments or IP addresses for example.
|
||||||
|
-- | Therefore, errors are differents from a parser to another.
|
||||||
|
-- |
|
||||||
|
-- | domain = `dot-atom / domain-literal / obs-domain`
|
||||||
|
domain :: forall e. Parser e String
|
||||||
|
domain = dot_atom <|> domain_literal <|> obs_domain
|
||||||
|
|
||||||
|
-- | `domain_literal`
|
||||||
|
-- |
|
||||||
|
-- | domain-literal = `[CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]`
|
||||||
|
domain_literal :: forall e. Parser e String
|
||||||
|
domain_literal = do s <- tryMaybe cfws
|
||||||
|
_ <- char '['
|
||||||
|
xs <- A.many do _ <- tryMaybe fws
|
||||||
|
dtext
|
||||||
|
m <- tryMaybe fws
|
||||||
|
_ <- char ']'
|
||||||
|
e <- tryMaybe cfws
|
||||||
|
pure $ maybe "" id s
|
||||||
|
<> "[" <> A.fold xs <> maybe "" id m <> "]"
|
||||||
|
<> maybe "" id e
|
||||||
|
|
||||||
|
-- | dtext: characters in domains.
|
||||||
|
-- | Printable US-ASCII characters not including "[", "]", or "\".
|
||||||
|
-- |
|
||||||
|
-- | dtext = `%d33-90 / %d94-126 / obs-dtext`
|
||||||
|
dtext :: forall e. Parser e String
|
||||||
|
dtext = CU.singleton <$> sat cond <|> obs_dtext
|
||||||
|
where cond x = let charcode = C.toCharCode x
|
||||||
|
in between 33 90 charcode || between 94 126 charcode
|
||||||
|
|
||||||
|
-- | `obs_angle_addr`: obsolete address specification between '<' and '>' characters.
|
||||||
|
-- |
|
||||||
|
-- | obs-angle-addr = `[CFWS] "<" obs-route addr-spec ">" [CFWS]`
|
||||||
|
obs_angle_addr :: forall e. Parser e String
|
||||||
|
obs_angle_addr = do s <- tryMaybe cfws
|
||||||
|
_ <- char '<'
|
||||||
|
r <- obs_route
|
||||||
|
a <- addr_spec
|
||||||
|
_ <- char '>'
|
||||||
|
e <- tryMaybe cfws
|
||||||
|
pure $ maybe "" id s <> "<" <> r <> a <> ">" <> maybe "" id e
|
||||||
|
|
||||||
|
-- | `obs_route`
|
||||||
|
-- |
|
||||||
|
-- | obs-route = `obs-domain-list ":"`
|
||||||
|
obs_route :: forall e. Parser e String
|
||||||
|
obs_route = do l <- obs_domain_list
|
||||||
|
_ <- char ':'
|
||||||
|
pure $ l <> ":"
|
||||||
|
|
||||||
|
-- | `obs_domain_list`
|
||||||
|
-- |
|
||||||
|
-- | obs-domain-list = `*(CFWS / ",") "@" domain *("," [CFWS] ["@" domain])`
|
||||||
|
obs_domain_list :: forall e. Parser e String
|
||||||
|
obs_domain_list = do s <- A.many $ cfws <|> CU.singleton <$> char ','
|
||||||
|
_ <- char '@'
|
||||||
|
d <- domain
|
||||||
|
xs <- A.many do _ <- char ','
|
||||||
|
c <- tryMaybe cfws
|
||||||
|
_ <- char '@'
|
||||||
|
d2 <- domain
|
||||||
|
pure $ "," <> maybe "" id c <> d2
|
||||||
|
pure $ A.fold s <> d <> A.fold xs
|
||||||
|
|
||||||
|
-- | `obs_mbox_list`
|
||||||
|
-- |
|
||||||
|
-- | obs-mbox-list = `*([CFWS] ",") mailbox *("," [mailbox / CFWS])`
|
||||||
|
obs_mbox_list :: forall e. Parser e String
|
||||||
|
obs_mbox_list = do _ <- A.many $ do _ <- cfws
|
||||||
|
_ <- char ','
|
||||||
|
pure unit
|
||||||
|
mb <- mailbox
|
||||||
|
xs <- A.many $ do _ <- char ','
|
||||||
|
x <- mailbox <|> do _ <- cfws
|
||||||
|
pure ""
|
||||||
|
pure x
|
||||||
|
pure $ mb <> A.fold xs
|
||||||
|
|
||||||
|
-- | `obs_addr_list`
|
||||||
|
-- |
|
||||||
|
-- | obs-addr-list = `*([CFWS] ",") address *("," [address / CFWS])`
|
||||||
|
obs_addr_list :: forall e. Parser e String
|
||||||
|
obs_addr_list = do _ <- A.many do _ <- tryMaybe cfws
|
||||||
|
char ','
|
||||||
|
a <- address
|
||||||
|
xs <- A.many do _ <- char ','
|
||||||
|
address <|> do _ <- cfws
|
||||||
|
pure ""
|
||||||
|
let res = A.fold $ A.intersperse "," $ [a] <> xs
|
||||||
|
pure res
|
||||||
|
|
||||||
|
-- | `obs_group_list`
|
||||||
|
-- |
|
||||||
|
-- | obs-group-list = `1*([CFWS] ",") [CFWS]`
|
||||||
|
obs_group_list :: forall e. Parser e String
|
||||||
|
obs_group_list = do xs <- many1 $ do x <- tryMaybe cfws
|
||||||
|
_ <- char ','
|
||||||
|
pure $ maybe "" id x <> ","
|
||||||
|
c <- tryMaybe cfws
|
||||||
|
pure $ A.fold xs <> maybe "" id c
|
||||||
|
|
||||||
|
-- | `obs_local_part`
|
||||||
|
-- |
|
||||||
|
-- | obs-local-part = `word *("." word)`
|
||||||
|
obs_local_part :: forall e. Parser e String
|
||||||
|
obs_local_part = do w <- word
|
||||||
|
ws <- A.many $ do _ <- char '.'
|
||||||
|
w1 <- word
|
||||||
|
pure $ "." <> w1
|
||||||
|
pure $ w <> A.fold ws
|
||||||
|
|
||||||
|
-- | `obs_domain`
|
||||||
|
-- |
|
||||||
|
-- | obs-domain = `atom *("." atom)`
|
||||||
|
obs_domain :: forall e. Parser e String
|
||||||
|
obs_domain = do a <- atom
|
||||||
|
xs <- A.many $ do _ <- char '.'
|
||||||
|
x <- atom
|
||||||
|
pure $ "." <> x
|
||||||
|
pure $ a <> A.fold xs
|
||||||
|
|
||||||
|
-- | `obs_dtext`: obsolete domain text.
|
||||||
|
-- |
|
||||||
|
-- | obs-dtext = `obs-NO-WS-CTL / quoted-pair`
|
||||||
|
obs_dtext :: forall e. Parser e String
|
||||||
|
obs_dtext = CU.singleton <$> obs_no_ws_ctl <|> quoted_pair
|
||||||
|
|
||||||
|
-- | obs-NO-WS-CTL: US-ASCII control characters without carriage return,
|
||||||
|
-- | line feed and white space characters.
|
||||||
|
-- |
|
||||||
|
-- | obs-NO-WS-CTL = `%d1-8 / %d11 / %d12 / %d14-31 / %d127`
|
||||||
|
obs_no_ws_ctl :: forall e. Parser e Char
|
||||||
|
obs_no_ws_ctl = sat cond
|
||||||
|
where cond x = let charcode = C.toCharCode x
|
||||||
|
in between 1 8 charcode
|
||||||
|
|| between 11 12 charcode
|
||||||
|
|| between 14 31 charcode
|
||||||
|
|| charcode == 127
|
||||||
|
|
||||||
|
-- | obs-ctext: obsolete comment text.
|
||||||
|
-- |
|
||||||
|
-- | obs-ctext = `obs-NO-WS-CTL`
|
||||||
|
obs_ctext :: forall e. Parser e Char
|
||||||
|
obs_ctext = obs_no_ws_ctl
|
||||||
|
|
||||||
|
-- | `obs_qtext`: obsolete accepted quoted text.
|
||||||
|
-- |
|
||||||
|
-- | obs-qtext = `obs-NO-WS-CTL`
|
||||||
|
obs_qtext :: forall e. Parser e Char
|
||||||
|
obs_qtext = obs_no_ws_ctl
|
||||||
|
|
||||||
|
-- | `obs_utext`: obsolete text.
|
||||||
|
-- |
|
||||||
|
-- | obs-utext = `%d0 / obs-NO-WS-CTL / VCHAR`
|
||||||
|
obs_utext :: forall e. Parser e Char
|
||||||
|
obs_utext = char_num 0 <|> obs_no_ws_ctl <|> vchar
|
||||||
|
|
||||||
|
-- | `obs_qp`: obsolete quoted-pair rule.
|
||||||
|
-- |
|
||||||
|
-- | obs-qp = `"\" (%d0 / obs-NO-WS-CTL / LF / CR)`
|
||||||
|
obs_qp :: forall e. Parser e String
|
||||||
|
obs_qp = do _ <- char '\\'
|
||||||
|
v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr
|
||||||
|
pure $ "\\" <> CU.singleton v
|
||||||
|
|
||||||
|
-- | `obs_body`: obsolete body.
|
||||||
|
-- |
|
||||||
|
-- | Note: the simpler version found in the errata is implemented, which basically accept everything.
|
||||||
|
-- |
|
||||||
|
-- | Note: `text` is replaced by `vchar`.
|
||||||
|
-- |
|
||||||
|
-- | (RFC)
|
||||||
|
-- | obs-body = `*((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)`
|
||||||
|
-- |
|
||||||
|
-- | (RFC Errata v1)
|
||||||
|
-- | obs-body = `*(%d0-127)`
|
||||||
|
-- |
|
||||||
|
-- | (RFC Errata v2)
|
||||||
|
-- | obs-body = `*(d0 /text / LF / CR)`
|
||||||
|
-- Errata v1
|
||||||
|
--obs_body :: forall e. Parser e String
|
||||||
|
--obs_body = CU.fromCharArray <$> A.many item
|
||||||
|
|
||||||
|
-- Errata v2
|
||||||
|
obs_body :: forall e. Parser e String
|
||||||
|
obs_body = CU.fromCharArray <$> do A.many (char_num 0 <|> vchar <|> lf <|> cr)
|
||||||
|
|
||||||
|
--obs_body original
|
||||||
|
--obs_body :: forall e. Parser e String
|
||||||
|
--obs_body = do A.many $ do _ <- A.many lf
|
||||||
|
-- _ <- A.many cr
|
||||||
|
-- v <- A.many $ do x <- char_num 0 <|> text
|
||||||
|
-- _ <- A.many lf
|
||||||
|
-- _ <- A.many cr
|
||||||
|
-- pure x
|
||||||
|
-- pure $ A.fold v
|
||||||
|
-- <|> _ <- crlf
|
||||||
|
-- pure ""
|
||||||
|
|
||||||
|
-- | `obs_unstruct`
|
||||||
|
-- |
|
||||||
|
-- | Note: implement the version found in the Errata page.
|
||||||
|
-- |
|
||||||
|
-- | obs-unstruct = `*((*LF *CR *(obs-utext *LF *CR)) / FWS)`
|
||||||
|
-- |
|
||||||
|
-- | (RFC Errata)
|
||||||
|
-- | obs-unstruct = `*( (*CR 1*(obs-utext / FWS)) / 1*LF ) *CR`
|
||||||
|
obs_unstruct :: forall e. Parser e String
|
||||||
|
obs_unstruct = (CU.fromCharArray <<< A.fold) <$> A.many do _ <- A.many cr
|
||||||
|
many1 (obs_utext <|> do _ <- fws
|
||||||
|
pure ' ')
|
||||||
|
<|> do _ <- fws
|
||||||
|
pure ""
|
||||||
|
|
||||||
|
-- | `obs_phrase`: obsolete "phrase".
|
||||||
|
-- |
|
||||||
|
-- | obs-phrase = `word *(word / "." / CFWS)`
|
||||||
|
obs_phrase :: forall e. Parser e String
|
||||||
|
obs_phrase = do w <- word
|
||||||
|
ws <- A.many (word <|> string "." <|> do _ <- cfws
|
||||||
|
pure "")
|
||||||
|
pure $ w <> A.fold ws
|
||||||
|
|
||||||
|
-- | `word`.
|
||||||
|
-- |
|
||||||
|
-- | word = `atom / quoted-string`
|
||||||
|
word :: forall e. Parser e String
|
||||||
|
word = atom <|> quoted_string
|
||||||
|
|
||||||
|
-- | `phrase`: list of words (at least one) or the obsolete phrase rule.
|
||||||
|
-- |
|
||||||
|
-- | phrase = `1*word / obs-phrase`
|
||||||
|
phrase :: forall e. Parser e String
|
||||||
|
phrase = do ws <- many1 word
|
||||||
|
pure $ A.fold ws
|
||||||
|
<|> obs_phrase
|
||||||
|
|
||||||
|
-- | `unstructured`
|
||||||
|
-- |
|
||||||
|
-- | unstructured = `(*([FWS] VCHAR) *WSP) / obs-unstruct`
|
||||||
|
unstructured :: forall e. Parser e String
|
||||||
|
unstructured = do v <- A.many $ do _ <- fws
|
||||||
|
vchar
|
||||||
|
_ <- A.many wsp
|
||||||
|
pure $ CU.fromCharArray v
|
||||||
|
<|> obs_unstruct
|
||||||
|
|
||||||
|
|
||||||
|
-- | `obs_phrase_list`: obsolete list of phrases.
|
||||||
|
-- |
|
||||||
|
-- | obs-phrase-list = `[phrase / CFWS] *("," [phrase / CFWS])`
|
||||||
|
obs_phrase_list :: forall e. Parser e String
|
||||||
|
obs_phrase_list = do first_phrase <- phrase_or_cfws
|
||||||
|
xs <- A.many $ do _ <- char ','
|
||||||
|
phrase_or_cfws
|
||||||
|
pure $ A.fold $ [first_phrase] <> xs
|
||||||
|
where phrase_or_cfws = do first_phrase <- tryMaybe phrase
|
||||||
|
case first_phrase of
|
||||||
|
Nothing -> do _ <- cfws
|
||||||
|
pure ""
|
||||||
|
Just x -> pure x
|
||||||
|
|
||||||
|
-- | `qtext`: printable US-ASCII characters not including "\" or the quote character.
|
||||||
|
-- |
|
||||||
|
-- | WARNING: contrary to the RFC, the `qtext` rule accepts a space character (`%d32`).
|
||||||
|
-- |
|
||||||
|
-- | qtext = `%d33 / %d35-91 / %d93-126 / obs-qtext`
|
||||||
|
qtext :: forall e. Parser e Char
|
||||||
|
qtext = char_range 32 33 <|> char_range 35 91 <|> char_range 93 126 <|> obs_qtext
|
||||||
|
|
||||||
|
-- | `qcontent`
|
||||||
|
-- |
|
||||||
|
-- | qcontent = `qtext / quoted-pair`
|
||||||
|
qcontent :: forall e. Parser e String
|
||||||
|
qcontent = CU.singleton <$> qtext <|> quoted_pair
|
||||||
|
|
||||||
|
-- | `quoted_string`
|
||||||
|
-- |
|
||||||
|
-- | WARNING: this rule was changed in order to take into account the new `qtext` rule,
|
||||||
|
-- | which now accepts the space character. This new rule, as implemented, still allows
|
||||||
|
-- | for multiple line returns in the quoted string.
|
||||||
|
-- |
|
||||||
|
-- |
|
||||||
|
-- | Original RFC5322 rule:
|
||||||
|
-- |
|
||||||
|
-- | quoted-string = `[CFWS] DQUOTE *([FWS] qcontent) [FWS] DQUOTE [CFWS]`
|
||||||
|
-- |
|
||||||
|
-- | Implemented rule:
|
||||||
|
-- |
|
||||||
|
-- | quoted-string = `[CFWS] DQUOTE *([CRLF] qcontent) [FWS] DQUOTE [CFWS]`
|
||||||
|
quoted_string :: forall e. Parser e String
|
||||||
|
quoted_string = do s <- tryMaybe cfws
|
||||||
|
_ <- char '"'
|
||||||
|
m <- A.many do l <- tryMaybe crlf
|
||||||
|
c <- qcontent
|
||||||
|
pure $ maybe "" id l <> c
|
||||||
|
_ <- char '"'
|
||||||
|
e <- tryMaybe cfws
|
||||||
|
pure $ maybe "" id s <> "\"" <> A.fold m <> "\"" <> maybe "" id e
|
||||||
|
|
@ -14,7 +14,8 @@ import GenericParser.DomainParserRFC1035 as RFC1035
|
||||||
import GenericParser.DomainParser as ModernDomains
|
import GenericParser.DomainParser as ModernDomains
|
||||||
import GenericParser.IPAddress (IPv4Error(..))
|
import GenericParser.IPAddress (IPv4Error(..))
|
||||||
import GenericParser.IPAddress as IP
|
import GenericParser.IPAddress as IP
|
||||||
import GenericParser.EmailAddress as E
|
import GenericParser.EmailAddress as EA
|
||||||
|
import GenericParser.RFC5322 as E
|
||||||
import Test.TestValues as T
|
import Test.TestValues as T
|
||||||
|
|
||||||
run :: forall e v. Parser e v -> String -> P.Result e v
|
run :: forall e v. Parser e v -> String -> P.Result e v
|
||||||
|
|
@ -68,19 +69,20 @@ test_series :: forall e v
|
||||||
test_series l p v e a = foreachE a (\s -> logtest l p s v e)
|
test_series l p v e a = foreachE a (\s -> logtest l p s v e)
|
||||||
|
|
||||||
showerror_ipv6 :: IP.IPv6Error -> String
|
showerror_ipv6 :: IP.IPv6Error -> String
|
||||||
showerror_ipv6 (IP.InvalidCharacter) = "InvalidCharacter"
|
showerror_ipv6 (IP.IP6TooManyHexaDecimalCharacters) = "TooManyHexaDecimalCharacters"
|
||||||
showerror_ipv6 (IP.TooManyHexaDecimalCharacters) = "TooManyHexaDecimalCharacters"
|
showerror_ipv6 (IP.IP6NotEnoughChunks) = "NotEnoughChunks"
|
||||||
showerror_ipv6 (IP.NotEnoughChunks) = "NotEnoughChunks"
|
showerror_ipv6 (IP.IP6TooManyChunks) = "TooManyChunks"
|
||||||
showerror_ipv6 (IP.TooManyChunks) = "TooManyChunks"
|
showerror_ipv6 IP.IP6IrrelevantShortRepresentation = "useless double dots"
|
||||||
showerror_ipv6 IP.IPv6UnrelevantShortRepresentation = "useless double dots"
|
showerror_ipv6 IP.IP6InvalidRange = "invalid IPv6 range"
|
||||||
|
|
||||||
showerror_ipv4 :: IP.IPv4Error -> String
|
showerror_ipv4 :: IP.IPv4Error -> String
|
||||||
showerror_ipv4 (NumberTooBig x) = "value '" <> show x <> "' is > 255"
|
showerror_ipv4 (IP4NumberTooBig x) = "value '" <> show x <> "' is > 255"
|
||||||
showerror_ipv4 IPv4UnrelevantShortRepresentation = "useless double dots"
|
showerror_ipv4 IP4IrrelevantShortRepresentation = "useless double dots"
|
||||||
|
showerror_ipv4 IP4InvalidRange = "invalid IPv4 range"
|
||||||
|
|
||||||
showerror_email :: E.EmailError -> String
|
showerror_email :: EA.EmailError -> String
|
||||||
showerror_email E.InvalidCharacter = "InvalidCharacter"
|
showerror_email EA.InvalidCharacter = "InvalidCharacter"
|
||||||
showerror_email (E.InvalidDomain e) = "invalid domain: " <> maybe "no domain error provided, weird" showerror e
|
showerror_email (EA.InvalidDomain e) = "invalid domain: " <> maybe "no domain error provided, weird" showerror e
|
||||||
|
|
||||||
main :: Effect Unit
|
main :: Effect Unit
|
||||||
main = do
|
main = do
|
||||||
|
|
@ -103,7 +105,7 @@ main = do
|
||||||
test_series "IP.ipv6" IP.ipv6 id showerror_ipv6 T.ipv6_addresses
|
test_series "IP.ipv6" IP.ipv6 id showerror_ipv6 T.ipv6_addresses
|
||||||
|
|
||||||
log ""
|
log ""
|
||||||
test_series "E.email" E.email id showerror_email T.valid_email_addresses
|
test_series "EA.email" EA.email id showerror_email T.valid_email_addresses
|
||||||
|
|
||||||
log ""
|
log ""
|
||||||
test_series "E.address (short)" E.address id showerror_email T.valid_email_addresses_short
|
test_series "E.address (short)" E.address id showerror_email T.valid_email_addresses_short
|
||||||
|
|
@ -125,3 +127,11 @@ main = do
|
||||||
|
|
||||||
log "Does parsers behave correctly (give the exact same input)?"
|
log "Does parsers behave correctly (give the exact same input)?"
|
||||||
foreachE T.valid_email_addresses_short (\s -> compare_parsers "E.address" (P.read_input E.address) E.address s)
|
foreachE T.valid_email_addresses_short (\s -> compare_parsers "E.address" (P.read_input E.address) E.address s)
|
||||||
|
|
||||||
|
log ""
|
||||||
|
let ip4ranges = [ "10.0.0.1/24", "192.168.0.1/32", "1.2.3.4/0", "192.168.0.1/33" ]
|
||||||
|
test_series "IP.ipv4_range" IP.ipv4_range id showerror_ipv4 ip4ranges
|
||||||
|
|
||||||
|
log ""
|
||||||
|
let ip6ranges = [ "2001::1/56", "2001:1:1::1/0", "::1/128", "::1/129" ]
|
||||||
|
test_series "IP.ipv6_range" IP.ipv6_range id showerror_ipv6 ip6ranges
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue