Parser: char_num & char_range, Email: quoted-pair

2024-01-28 01:38:17 +01:00 · 2024-01-28 01:38:17 +01:00 · c88cb8215b
commit c88cb8215b
parent ca52e27594
2 changed files with 40 additions and 14 deletions
--- a/src/GenericParser/EmailAddress.purs
+++ b/src/GenericParser/EmailAddress.purs
@ -2,7 +2,7 @@
 -- | This module is experimental and doesn't follow every rule for an email address, yet.
 module GenericParser.EmailAddress where
-import Prelude (Unit, bind, pure, ($), (<>), (==), (||), between, void)
+import Prelude (Unit, (<$>), bind, pure, ($), (<>), (==), (||), between, void)
 import Control.Alt ((<|>))
 import Data.Array as A
@ -12,12 +12,12 @@ import Data.Maybe (Maybe(..))
 import Data.String.CodeUnits as CU
 import GenericParser.Parser (Parser(..)
-                            , sat, char, item, many1, tryMaybe
+                            , sat, char, char_num, char_range, item, many1, tryMaybe
                            , current_input, failureError, parse, rollback, until)
 import GenericParser.DomainParser.Common (DomainError)
 import GenericParser.DomainParser (sub_eof)
 -- ABNF core rules.
-import GenericParser.RFC5234 (crlf, digit, wsp)
+import GenericParser.RFC5234 (crlf, digit, wsp, vchar, lf, cr)
 import GenericParser.SomeParsers (letter)
 data EmailError
@ -56,9 +56,14 @@ ctext = sat cond <|> obs_ctext
                   || between 42 91 charcode
                   || between 93 126 charcode
-- | TODO: `quoted_pair`
+-- | `quoted_pair`: pair of characters.
-quoted_pair :: forall e. Parser e Char
+-- |
-quoted_pair = char ' '
+-- | quoted-pair     =   ("\" (VCHAR / WSP)) / obs-qp
 quoted_pair :: forall e. Parser e String
 quoted_pair = do _ <- char '\\'
                 v <- vchar <|> wsp
                 pure $ "\\" <> CU.singleton v
               <|> obs_qp
 -- | ccontent        =   ctext / quoted-pair / comment
 -- |
@ -122,8 +127,8 @@ cfws = do void $ many1 $ do _ <- tryMaybe fws
 -- | dtext           =   %d33-90 /          ; Printable US-ASCII
 -- |                     %d94-126 /         ;  characters not including
 -- |                     obs-dtext          ;  "[", "]", or "\"
-dtext :: forall e. Parser e Char
+dtext :: forall e. Parser e String
-dtext = sat cond <|> obs_dtext
+dtext = CU.singleton <$> sat cond <|> obs_dtext
  where cond x = let charcode = C.toCharCode x
                 in  between 33 90 charcode || between 94 126 charcode
@ -147,8 +152,8 @@ dtext = sat cond <|> obs_dtext
 -- | TODO: Obsolete domain text.
 -- | obs-dtext       =   obs-NO-WS-CTL / quoted-pair
-obs_dtext :: forall e. Parser e Char
+obs_dtext :: forall e. Parser e String
-obs_dtext = obs_no_ws_ctl <|> quoted_pair
+obs_dtext = CU.singleton <$> obs_no_ws_ctl <|> quoted_pair
 -- | obs-NO-WS-CTL: US-ASCII control characters without carriage return,
 -- | line feed and white space characters.
@ -177,9 +182,16 @@ obs_qtext :: forall e. Parser e Char
 obs_qtext = obs_no_ws_ctl
 --obs-utext       =   %d0 / obs-NO-WS-CTL / VCHAR
--
+
--obs-qp          =   "\" (%d0 / obs-NO-WS-CTL / LF / CR)
+
--
+-- | `obs_qp`: obsolete quoted-pair rule.
 -- |
 -- | obs-qp         =   "\" (%d0 / obs-NO-WS-CTL / LF / CR)
 obs_qp :: forall e. Parser e String
 obs_qp = do _ <- char '\\'
            v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr
            pure $ "\\" <> CU.singleton v
 --obs-body        =   *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)
 --
 --obs-unstruct    =   *((*LF *CR *(obs-utext *LF *CR)) / FWS)
--- a/src/GenericParser/Parser.purs
+++ b/src/GenericParser/Parser.purs
@ -1,11 +1,12 @@
 module GenericParser.Parser where
-import Prelude
+import Prelude (between, (<<<), bind, (==), ($), pure, class Bind, unit, (+), (<>), class Applicative, class Functor, Unit, class Apply)
 import Control.Alt (class Alt, (<|>))
 import Control.Alternative (class Alternative)
 import Control.Lazy (class Lazy, defer)
 import Control.Plus (class Plus, empty)
 import Data.Array as A
 import Data.Char as C
 import Data.Either (Either(..))
 import Data.Maybe (Maybe(..), maybe)
 import Data.String as S
@ -146,6 +147,19 @@ sat p = do pos <- current_position
 char :: forall e. Char -> Parser e Char
 char x = sat (_ == x)
 -- | `char_num` checks a character based on its decimal code number.
 -- | Useful mainly to parse unprintable characters.
 -- |
 -- | Example: `char_num 32` checks for a space, such as `char ' '`.
 char_num :: forall e. Int -> Parser e Char
 char_num n = sat (\c -> n == C.toCharCode c)
 -- | `char_range` checks a character based on a range of possible decimal code numbers.
 -- |
 -- | Example, checking all visible ASCII characters: `char_range 33 126`.
 char_range :: forall e. Int -> Int -> Parser e Char
 char_range n1 n2 = sat (\c -> between n1 n2 $ C.toCharCode c)
 string :: forall e. String -> Parser e String
 string str = case A.uncons (CU.toCharArray str) of
  Nothing -> Parser \input -> success input ""