Parser: char_num & char_range, Email: quoted-pair

This commit is contained in:
Philippe Pittoli 2024-01-28 01:38:17 +01:00
parent ca52e27594
commit c88cb8215b
2 changed files with 40 additions and 14 deletions

View File

@ -2,7 +2,7 @@
-- | This module is experimental and doesn't follow every rule for an email address, yet.
module GenericParser.EmailAddress where
import Prelude (Unit, bind, pure, ($), (<>), (==), (||), between, void)
import Prelude (Unit, (<$>), bind, pure, ($), (<>), (==), (||), between, void)
import Control.Alt ((<|>))
import Data.Array as A
@ -12,12 +12,12 @@ import Data.Maybe (Maybe(..))
import Data.String.CodeUnits as CU
import GenericParser.Parser (Parser(..)
, sat, char, item, many1, tryMaybe
, sat, char, char_num, char_range, item, many1, tryMaybe
, current_input, failureError, parse, rollback, until)
import GenericParser.DomainParser.Common (DomainError)
import GenericParser.DomainParser (sub_eof)
-- ABNF core rules.
import GenericParser.RFC5234 (crlf, digit, wsp)
import GenericParser.RFC5234 (crlf, digit, wsp, vchar, lf, cr)
import GenericParser.SomeParsers (letter)
data EmailError
@ -56,9 +56,14 @@ ctext = sat cond <|> obs_ctext
|| between 42 91 charcode
|| between 93 126 charcode
-- | TODO: `quoted_pair`
quoted_pair :: forall e. Parser e Char
quoted_pair = char ' '
-- | `quoted_pair`: pair of characters.
-- |
-- | quoted-pair = ("\" (VCHAR / WSP)) / obs-qp
quoted_pair :: forall e. Parser e String
quoted_pair = do _ <- char '\\'
v <- vchar <|> wsp
pure $ "\\" <> CU.singleton v
<|> obs_qp
-- | ccontent = ctext / quoted-pair / comment
-- |
@ -122,8 +127,8 @@ cfws = do void $ many1 $ do _ <- tryMaybe fws
-- | dtext = %d33-90 / ; Printable US-ASCII
-- | %d94-126 / ; characters not including
-- | obs-dtext ; "[", "]", or "\"
dtext :: forall e. Parser e Char
dtext = sat cond <|> obs_dtext
dtext :: forall e. Parser e String
dtext = CU.singleton <$> sat cond <|> obs_dtext
where cond x = let charcode = C.toCharCode x
in between 33 90 charcode || between 94 126 charcode
@ -147,8 +152,8 @@ dtext = sat cond <|> obs_dtext
-- | TODO: Obsolete domain text.
-- | obs-dtext = obs-NO-WS-CTL / quoted-pair
obs_dtext :: forall e. Parser e Char
obs_dtext = obs_no_ws_ctl <|> quoted_pair
obs_dtext :: forall e. Parser e String
obs_dtext = CU.singleton <$> obs_no_ws_ctl <|> quoted_pair
-- | obs-NO-WS-CTL: US-ASCII control characters without carriage return,
-- | line feed and white space characters.
@ -177,9 +182,16 @@ obs_qtext :: forall e. Parser e Char
obs_qtext = obs_no_ws_ctl
--obs-utext = %d0 / obs-NO-WS-CTL / VCHAR
--
--obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR)
--
-- | `obs_qp`: obsolete quoted-pair rule.
-- |
-- | obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR)
obs_qp :: forall e. Parser e String
obs_qp = do _ <- char '\\'
v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr
pure $ "\\" <> CU.singleton v
--obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)
--
--obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS)

View File

@ -1,11 +1,12 @@
module GenericParser.Parser where
import Prelude
import Prelude (between, (<<<), bind, (==), ($), pure, class Bind, unit, (+), (<>), class Applicative, class Functor, Unit, class Apply)
import Control.Alt (class Alt, (<|>))
import Control.Alternative (class Alternative)
import Control.Lazy (class Lazy, defer)
import Control.Plus (class Plus, empty)
import Data.Array as A
import Data.Char as C
import Data.Either (Either(..))
import Data.Maybe (Maybe(..), maybe)
import Data.String as S
@ -146,6 +147,19 @@ sat p = do pos <- current_position
char :: forall e. Char -> Parser e Char
char x = sat (_ == x)
-- | `char_num` checks a character based on its decimal code number.
-- | Useful mainly to parse unprintable characters.
-- |
-- | Example: `char_num 32` checks for a space, such as `char ' '`.
char_num :: forall e. Int -> Parser e Char
char_num n = sat (\c -> n == C.toCharCode c)
-- | `char_range` checks a character based on a range of possible decimal code numbers.
-- |
-- | Example, checking all visible ASCII characters: `char_range 33 126`.
char_range :: forall e. Int -> Int -> Parser e Char
char_range n1 n2 = sat (\c -> between n1 n2 $ C.toCharCode c)
string :: forall e. String -> Parser e String
string str = case A.uncons (CU.toCharArray str) of
Nothing -> Parser \input -> success input ""