diff --git a/src/GenericParser/EmailAddress.purs b/src/GenericParser/EmailAddress.purs index 05d9230..66ce10c 100644 --- a/src/GenericParser/EmailAddress.purs +++ b/src/GenericParser/EmailAddress.purs @@ -2,7 +2,7 @@ -- | This module is experimental and doesn't follow every rule for an email address, yet. module GenericParser.EmailAddress where -import Prelude (Unit, bind, pure, ($), (<>), (==), (||), between, void) +import Prelude (Unit, (<$>), bind, pure, ($), (<>), (==), (||), between, void) import Control.Alt ((<|>)) import Data.Array as A @@ -12,12 +12,12 @@ import Data.Maybe (Maybe(..)) import Data.String.CodeUnits as CU import GenericParser.Parser (Parser(..) - , sat, char, item, many1, tryMaybe + , sat, char, char_num, char_range, item, many1, tryMaybe , current_input, failureError, parse, rollback, until) import GenericParser.DomainParser.Common (DomainError) import GenericParser.DomainParser (sub_eof) -- ABNF core rules. -import GenericParser.RFC5234 (crlf, digit, wsp) +import GenericParser.RFC5234 (crlf, digit, wsp, vchar, lf, cr) import GenericParser.SomeParsers (letter) data EmailError @@ -56,9 +56,14 @@ ctext = sat cond <|> obs_ctext || between 42 91 charcode || between 93 126 charcode --- | TODO: `quoted_pair` -quoted_pair :: forall e. Parser e Char -quoted_pair = char ' ' +-- | `quoted_pair`: pair of characters. +-- | +-- | quoted-pair = ("\" (VCHAR / WSP)) / obs-qp +quoted_pair :: forall e. Parser e String +quoted_pair = do _ <- char '\\' + v <- vchar <|> wsp + pure $ "\\" <> CU.singleton v + <|> obs_qp -- | ccontent = ctext / quoted-pair / comment -- | @@ -122,8 +127,8 @@ cfws = do void $ many1 $ do _ <- tryMaybe fws -- | dtext = %d33-90 / ; Printable US-ASCII -- | %d94-126 / ; characters not including -- | obs-dtext ; "[", "]", or "\" -dtext :: forall e. Parser e Char -dtext = sat cond <|> obs_dtext +dtext :: forall e. Parser e String +dtext = CU.singleton <$> sat cond <|> obs_dtext where cond x = let charcode = C.toCharCode x in between 33 90 charcode || between 94 126 charcode @@ -147,8 +152,8 @@ dtext = sat cond <|> obs_dtext -- | TODO: Obsolete domain text. -- | obs-dtext = obs-NO-WS-CTL / quoted-pair -obs_dtext :: forall e. Parser e Char -obs_dtext = obs_no_ws_ctl <|> quoted_pair +obs_dtext :: forall e. Parser e String +obs_dtext = CU.singleton <$> obs_no_ws_ctl <|> quoted_pair -- | obs-NO-WS-CTL: US-ASCII control characters without carriage return, -- | line feed and white space characters. @@ -177,9 +182,16 @@ obs_qtext :: forall e. Parser e Char obs_qtext = obs_no_ws_ctl --obs-utext = %d0 / obs-NO-WS-CTL / VCHAR --- ---obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR) --- + + +-- | `obs_qp`: obsolete quoted-pair rule. +-- | +-- | obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR) +obs_qp :: forall e. Parser e String +obs_qp = do _ <- char '\\' + v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr + pure $ "\\" <> CU.singleton v + --obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF) -- --obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS) diff --git a/src/GenericParser/Parser.purs b/src/GenericParser/Parser.purs index 626e8d4..8a99ac2 100644 --- a/src/GenericParser/Parser.purs +++ b/src/GenericParser/Parser.purs @@ -1,11 +1,12 @@ module GenericParser.Parser where -import Prelude +import Prelude (between, (<<<), bind, (==), ($), pure, class Bind, unit, (+), (<>), class Applicative, class Functor, Unit, class Apply) import Control.Alt (class Alt, (<|>)) import Control.Alternative (class Alternative) import Control.Lazy (class Lazy, defer) import Control.Plus (class Plus, empty) import Data.Array as A +import Data.Char as C import Data.Either (Either(..)) import Data.Maybe (Maybe(..), maybe) import Data.String as S @@ -146,6 +147,19 @@ sat p = do pos <- current_position char :: forall e. Char -> Parser e Char char x = sat (_ == x) +-- | `char_num` checks a character based on its decimal code number. +-- | Useful mainly to parse unprintable characters. +-- | +-- | Example: `char_num 32` checks for a space, such as `char ' '`. +char_num :: forall e. Int -> Parser e Char +char_num n = sat (\c -> n == C.toCharCode c) + +-- | `char_range` checks a character based on a range of possible decimal code numbers. +-- | +-- | Example, checking all visible ASCII characters: `char_range 33 126`. +char_range :: forall e. Int -> Int -> Parser e Char +char_range n1 n2 = sat (\c -> between n1 n2 $ C.toCharCode c) + string :: forall e. String -> Parser e String string str = case A.uncons (CU.toCharArray str) of Nothing -> Parser \input -> success input ""