Parser: char_num & char_range, Email: quoted-pair

This commit is contained in:
Philippe Pittoli 2024-01-28 01:38:17 +01:00
parent ca52e27594
commit c88cb8215b
2 changed files with 40 additions and 14 deletions

View file

@ -2,7 +2,7 @@
-- | This module is experimental and doesn't follow every rule for an email address, yet. -- | This module is experimental and doesn't follow every rule for an email address, yet.
module GenericParser.EmailAddress where module GenericParser.EmailAddress where
import Prelude (Unit, bind, pure, ($), (<>), (==), (||), between, void) import Prelude (Unit, (<$>), bind, pure, ($), (<>), (==), (||), between, void)
import Control.Alt ((<|>)) import Control.Alt ((<|>))
import Data.Array as A import Data.Array as A
@ -12,12 +12,12 @@ import Data.Maybe (Maybe(..))
import Data.String.CodeUnits as CU import Data.String.CodeUnits as CU
import GenericParser.Parser (Parser(..) import GenericParser.Parser (Parser(..)
, sat, char, item, many1, tryMaybe , sat, char, char_num, char_range, item, many1, tryMaybe
, current_input, failureError, parse, rollback, until) , current_input, failureError, parse, rollback, until)
import GenericParser.DomainParser.Common (DomainError) import GenericParser.DomainParser.Common (DomainError)
import GenericParser.DomainParser (sub_eof) import GenericParser.DomainParser (sub_eof)
-- ABNF core rules. -- ABNF core rules.
import GenericParser.RFC5234 (crlf, digit, wsp) import GenericParser.RFC5234 (crlf, digit, wsp, vchar, lf, cr)
import GenericParser.SomeParsers (letter) import GenericParser.SomeParsers (letter)
data EmailError data EmailError
@ -56,9 +56,14 @@ ctext = sat cond <|> obs_ctext
|| between 42 91 charcode || between 42 91 charcode
|| between 93 126 charcode || between 93 126 charcode
-- | TODO: `quoted_pair` -- | `quoted_pair`: pair of characters.
quoted_pair :: forall e. Parser e Char -- |
quoted_pair = char ' ' -- | quoted-pair = ("\" (VCHAR / WSP)) / obs-qp
quoted_pair :: forall e. Parser e String
quoted_pair = do _ <- char '\\'
v <- vchar <|> wsp
pure $ "\\" <> CU.singleton v
<|> obs_qp
-- | ccontent = ctext / quoted-pair / comment -- | ccontent = ctext / quoted-pair / comment
-- | -- |
@ -122,8 +127,8 @@ cfws = do void $ many1 $ do _ <- tryMaybe fws
-- | dtext = %d33-90 / ; Printable US-ASCII -- | dtext = %d33-90 / ; Printable US-ASCII
-- | %d94-126 / ; characters not including -- | %d94-126 / ; characters not including
-- | obs-dtext ; "[", "]", or "\" -- | obs-dtext ; "[", "]", or "\"
dtext :: forall e. Parser e Char dtext :: forall e. Parser e String
dtext = sat cond <|> obs_dtext dtext = CU.singleton <$> sat cond <|> obs_dtext
where cond x = let charcode = C.toCharCode x where cond x = let charcode = C.toCharCode x
in between 33 90 charcode || between 94 126 charcode in between 33 90 charcode || between 94 126 charcode
@ -147,8 +152,8 @@ dtext = sat cond <|> obs_dtext
-- | TODO: Obsolete domain text. -- | TODO: Obsolete domain text.
-- | obs-dtext = obs-NO-WS-CTL / quoted-pair -- | obs-dtext = obs-NO-WS-CTL / quoted-pair
obs_dtext :: forall e. Parser e Char obs_dtext :: forall e. Parser e String
obs_dtext = obs_no_ws_ctl <|> quoted_pair obs_dtext = CU.singleton <$> obs_no_ws_ctl <|> quoted_pair
-- | obs-NO-WS-CTL: US-ASCII control characters without carriage return, -- | obs-NO-WS-CTL: US-ASCII control characters without carriage return,
-- | line feed and white space characters. -- | line feed and white space characters.
@ -177,9 +182,16 @@ obs_qtext :: forall e. Parser e Char
obs_qtext = obs_no_ws_ctl obs_qtext = obs_no_ws_ctl
--obs-utext = %d0 / obs-NO-WS-CTL / VCHAR --obs-utext = %d0 / obs-NO-WS-CTL / VCHAR
--
--obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR)
-- -- | `obs_qp`: obsolete quoted-pair rule.
-- |
-- | obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR)
obs_qp :: forall e. Parser e String
obs_qp = do _ <- char '\\'
v <- char_num 0 <|> obs_no_ws_ctl <|> lf <|> cr
pure $ "\\" <> CU.singleton v
--obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF) --obs-body = *((*LF *CR *((%d0 / text) *LF *CR)) / CRLF)
-- --
--obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS) --obs-unstruct = *((*LF *CR *(obs-utext *LF *CR)) / FWS)

View file

@ -1,11 +1,12 @@
module GenericParser.Parser where module GenericParser.Parser where
import Prelude import Prelude (between, (<<<), bind, (==), ($), pure, class Bind, unit, (+), (<>), class Applicative, class Functor, Unit, class Apply)
import Control.Alt (class Alt, (<|>)) import Control.Alt (class Alt, (<|>))
import Control.Alternative (class Alternative) import Control.Alternative (class Alternative)
import Control.Lazy (class Lazy, defer) import Control.Lazy (class Lazy, defer)
import Control.Plus (class Plus, empty) import Control.Plus (class Plus, empty)
import Data.Array as A import Data.Array as A
import Data.Char as C
import Data.Either (Either(..)) import Data.Either (Either(..))
import Data.Maybe (Maybe(..), maybe) import Data.Maybe (Maybe(..), maybe)
import Data.String as S import Data.String as S
@ -146,6 +147,19 @@ sat p = do pos <- current_position
char :: forall e. Char -> Parser e Char char :: forall e. Char -> Parser e Char
char x = sat (_ == x) char x = sat (_ == x)
-- | `char_num` checks a character based on its decimal code number.
-- | Useful mainly to parse unprintable characters.
-- |
-- | Example: `char_num 32` checks for a space, such as `char ' '`.
char_num :: forall e. Int -> Parser e Char
char_num n = sat (\c -> n == C.toCharCode c)
-- | `char_range` checks a character based on a range of possible decimal code numbers.
-- |
-- | Example, checking all visible ASCII characters: `char_range 33 126`.
char_range :: forall e. Int -> Int -> Parser e Char
char_range n1 n2 = sat (\c -> between n1 n2 $ C.toCharCode c)
string :: forall e. String -> Parser e String string :: forall e. String -> Parser e String
string str = case A.uncons (CU.toCharArray str) of string str = case A.uncons (CU.toCharArray str) of
Nothing -> Parser \input -> success input "" Nothing -> Parser \input -> success input ""