Split most parsers from the GenericParser.Parser module.

master
Philippe Pittoli 2024-01-27 08:03:16 +01:00
parent 1951d893a9
commit 8e68099e52
9 changed files with 123 additions and 95 deletions

View File

@ -2,8 +2,11 @@ module GenericParser
( module GenericParser.Parser
, module GenericParser.DomainParser.Common
, module GenericParser.DomainParser
--, module GenericParser.RFC5234
) where
import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size)
import GenericParser.DomainParser (domain, label, subdomain, sub_eof)
import GenericParser.Parser (alphanum, char, current_input, current_position, digit, eof, Error, failure, failureError, hex, ident, identifier, Input, int, integer, item, letter, lookahead, lower, many1, nat, natural, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, space, string, success, symbol, token, try, tryMaybe, until, upper, Value)
import GenericParser.Parser (char, current_input, current_position, Error, failure, failureError, Input, item, lookahead, many1, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, string, success, try, tryMaybe, until, Value)
import GenericParser.SomeParsers (alphanum, eof, ident, identifier, int, integer, letter, lower, nat, natural, space, symbol, token, upper)
-- import GenericParser.RFC5234 ()

View File

@ -16,9 +16,10 @@ import GenericParser.DomainParser.Common (DomainError(..), eof, ldh_str, let_dig
import GenericParser.Parser (Parser(..)
, failureError
, current_position
, char, letter, string
, char, string
, parse_last_char
, tryMaybe)
import GenericParser.SomeParsers (letter)
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
-- | In practice, the first character can be an underscore (for example, see `_dmarc.example.com`).

View File

@ -7,7 +7,8 @@ import Data.String as S
import GenericParser.Parser (Parser(..)
, success, failureError
, alphanum, char, many1)
, char, many1)
import GenericParser.SomeParsers (alphanum)
type Size = Int
-- | `DomainError` expresses all possible errors that can occur while parsing a domain.

View File

@ -18,8 +18,9 @@ import GenericParser.DomainParser.Common (DomainError(..), eof, ldh_str, let_dig
import GenericParser.Parser (Parser(..)
, failureError
, current_position
, char, letter, parse, string
, char, parse, string
, tryMaybe)
import GenericParser.SomeParsers (letter)
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
label :: Parser DomainError String

View File

@ -11,14 +11,14 @@ import Data.Either (Either(..))
import Data.Maybe (Maybe(..))
import Data.String.CodeUnits as CU
-- ABNF core rules.
import GenericParser.RFC5234
import GenericParser.Parser (Parser(..)
, sat, char, item, many1, tryMaybe
, current_input, failureError, parse, rollback, until)
import GenericParser.DomainParser.Common (DomainError)
import GenericParser.DomainParser (sub_eof)
import GenericParser.Parser (Parser(..)
, sat, char , digit , letter, item, many1, tryMaybe
, current_input, failureError, parse, rollback, until)
-- ABNF core rules.
import GenericParser.RFC5234 (crlf, digit, wsp)
import GenericParser.SomeParsers (letter)
data EmailError
= InvalidCharacter

View File

@ -13,8 +13,10 @@ import GenericParser.Parser (Parser(..)
, current_position
, string
, many1, lookahead
, char, nat, hex)
, char)
import GenericParser.BaseFunctions (repeat)
import GenericParser.SomeParsers (nat)
import GenericParser.RFC5234 (hexdig)
data IPv6Error
= InvalidCharacter
@ -27,7 +29,7 @@ data IPv6Error
-- | Return an error (TooManyHexaDecimalCharacters) in case the group has more than 4 characters.
ipv6_chunk :: Parser IPv6Error String
ipv6_chunk = do pos <- current_position
hexachars <- many1 hex
hexachars <- many1 hexdig
if A.length hexachars > 4
then Parser \_ -> failureError pos (Just TooManyHexaDecimalCharacters)
else pure $ CU.fromCharArray hexachars

View File

@ -7,12 +7,11 @@ import Control.Lazy (class Lazy, defer)
import Control.Plus (class Plus, empty)
import Data.Array as A
import Data.Either (Either(..))
import Data.Int as Int
import Data.Maybe (Maybe(..), maybe)
import Data.String as S
import Data.String.CodeUnits (toCharArray, fromCharArray, singleton)
import Data.String.CodeUnits as CU
import GenericParser.BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper, isHexaDecimal)
import GenericParser.BaseFunctions (concat)
type Position = Int
type PositionString = { string :: String, position :: Position }
@ -70,9 +69,9 @@ success suffix result = Right { suffix, result }
item :: forall e. Parser e Char
item = Parser p
where
p input = case A.uncons (toCharArray input.string) of
p input = case A.uncons (CU.toCharArray input.string) of
Nothing -> failure input.position
Just { head: x, tail: xs } -> success { string: (fromCharArray xs), position: input.position+1 } x
Just { head: x, tail: xs } -> success { string: (CU.fromCharArray xs), position: input.position+1 } x
instance functorParser :: Functor (Parser e) where
map :: forall a b. (a -> b) -> Parser e a -> Parser e b
@ -144,78 +143,16 @@ sat p = do pos <- current_position
x <- item
if p x then pure x else Parser \_ -> failure pos
digit :: forall e. Parser e Char
digit = sat isDigit
lower :: forall e. Parser e Char
lower = sat isLower
upper :: forall e. Parser e Char
upper = sat isUpper
letter :: forall e. Parser e Char
letter = sat isAlpha
alphanum :: forall e. Parser e Char
alphanum = sat isAlphaNum
char :: forall e. Char -> Parser e Char
char x = sat (_ == x)
string :: forall e. String -> Parser e String
string str = case A.uncons (toCharArray str) of
string str = case A.uncons (CU.toCharArray str) of
Nothing -> Parser \input -> success input ""
Just { head: x, tail: xs } -> do c <- char x
rest <- string (fromCharArray xs)
rest <- string (CU.fromCharArray xs)
pure (concat c rest)
ident :: forall e. Parser e String
ident = do x <- lower
xs <- A.many alphanum
pure (fromCharArray $ A.cons x xs)
nat :: forall e. Parser e Int
nat = do xs <- A.some digit
case Int.fromString (fromCharArray xs) of
Nothing -> empty
Just x -> pure x
int :: forall e. Parser e Int
int = do _ <- char '-'
n <- nat
pure (-n)
<|> nat
space :: forall e. Parser e Unit
space = do _ <- A.many (sat isSpace)
pure unit
token :: forall e a. Parser e a -> Parser e a
token p = do space
v <- p
_ <- space
pure v
identifier :: forall e. Parser e String
identifier = token ident
natural :: forall e. Parser e Int
natural = token nat
integer :: forall e. Parser e Int
integer = token int
symbol :: forall e. String -> Parser e String
symbol xs = token (string xs)
hex :: forall e. Parser e Char
hex = sat isHexaDecimal
eof :: forall e. Parser e Unit
eof = Parser \input -> case S.length input.string of
0 -> success input unit
_ -> failure input.position
many1 :: forall e v. Parser e v -> Parser e (Array v)
many1 p = do first <- p
rest <- A.many p
@ -245,10 +182,10 @@ until parser_end p = do
parse_last_char :: forall e. String -> Parser e Char -> Boolean
parse_last_char s p = case last_char s of
Nothing -> false
Just c -> case parse p { string: singleton c, position: 0 } of
Just c -> case parse p { string: CU.singleton c, position: 0 } of
Left _ -> false
_ -> true
where
-- Get the last character of a String.
last_char :: String -> Maybe Char
last_char = A.last <<< toCharArray
last_char = A.last <<< CU.toCharArray

View File

@ -2,27 +2,33 @@
-- | This module implements core rules found in appendix B.1.
module GenericParser.RFC5234 where
import Prelude (Unit, between, bind, void, ($))
import Prelude (Unit, between, (<<<), (||), (==), bind, void, ($))
import Control.Alt ((<|>))
import Data.Char as C
import GenericParser.BaseFunctions (isHexaDecimal)
import GenericParser.BaseFunctions (isAlpha, isDigit, isHexaDecimal)
import GenericParser.Parser (Parser, char, sat)
-- | RFC 5234:
-- | ALPHA: any letter, upper or lower case.
-- |
-- | ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
alpha :: forall e. Parser e Char
alpha = sat isAlpha
--ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
--BIT = "0" / "1"
-- | BIT: either character '0' or '1'.
-- |
-- | BIT = "0" / "1"
bit :: forall e. Parser e Char
bit = char '0' <|> char '1'
-- | CHAR (renamed `asciichar` to fix naming conflict with `GenericParser.char`):
-- | any 7-bit US-ASCII character, excluding NUL.
-- |
-- | CHAR = %x01-7F
asciichar :: forall e. Parser e Char
asciichar = sat (\x -> between 1 127 $ C.toCharCode x)
asciichar = sat (between 1 127 <<< C.toCharCode)
-- | CR: carriage return.
-- |
@ -37,12 +43,19 @@ crlf :: forall e. Parser e Unit
crlf = do _ <- char '\r'
void $ char '\n'
--CTL = %x00-1F / %x7F
-- ; controls
--
-- | CTL: control characters.
-- |
-- | CTL = %x00-1F / %x7F
ctl :: forall e. Parser e Char
ctl = sat cond
where cond x = (between 0 31 $ C.toCharCode x)
|| C.toCharCode x == 127
--DIGIT = %x30-39
-- ; 0-9
--
digit :: forall e. Parser e Char
digit = sat isDigit
--DQUOTE = %x22
-- ; " (Double Quote)
@ -84,7 +97,7 @@ sp = char ' '
-- |
-- | Visible printing characters.
vchar :: forall e. Parser e Char
vchar = sat (\x -> between 33 126 $ C.toCharCode x)
vchar = sat (between 33 126 <<< C.toCharCode)
-- | WSP: white space.
-- |

View File

@ -0,0 +1,70 @@
module GenericParser.SomeParsers where
import Prelude (Unit, bind, discard, negate, pure, unit, ($))
import Control.Alt ((<|>))
import Control.Plus (empty)
import Data.Array as A
import Data.Maybe (Maybe(..))
import Data.Int as Int
import Data.String as S
import Data.String.CodeUnits as CU
import GenericParser.Parser
import GenericParser.BaseFunctions (isAlpha, isAlphaNum, isLower, isSpace, isUpper)
import GenericParser.RFC5234 (digit)
lower :: forall e. Parser e Char
lower = sat isLower
upper :: forall e. Parser e Char
upper = sat isUpper
letter :: forall e. Parser e Char
letter = sat isAlpha
alphanum :: forall e. Parser e Char
alphanum = sat isAlphaNum
ident :: forall e. Parser e String
ident = do x <- lower
xs <- A.many alphanum
pure (CU.fromCharArray $ A.cons x xs)
nat :: forall e. Parser e Int
nat = do xs <- A.some digit
case Int.fromString (CU.fromCharArray xs) of
Nothing -> empty
Just x -> pure x
int :: forall e. Parser e Int
int = do _ <- char '-'
n <- nat
pure (-n)
<|> nat
space :: forall e. Parser e Unit
space = do _ <- A.many (sat isSpace)
pure unit
token :: forall e a. Parser e a -> Parser e a
token p = do space
v <- p
_ <- space
pure v
identifier :: forall e. Parser e String
identifier = token ident
natural :: forall e. Parser e Int
natural = token nat
integer :: forall e. Parser e Int
integer = token int
symbol :: forall e. String -> Parser e String
symbol xs = token (string xs)
eof :: forall e. Parser e Unit
eof = Parser \input -> case S.length input.string of
0 -> success input unit
_ -> failure input.position