Split most parsers from the GenericParser.Parser module.

This commit is contained in:
Philippe Pittoli 2024-01-27 08:03:16 +01:00
parent 1951d893a9
commit 8e68099e52
9 changed files with 123 additions and 95 deletions

View File

@ -2,8 +2,11 @@ module GenericParser
( module GenericParser.Parser ( module GenericParser.Parser
, module GenericParser.DomainParser.Common , module GenericParser.DomainParser.Common
, module GenericParser.DomainParser , module GenericParser.DomainParser
--, module GenericParser.RFC5234
) where ) where
import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size) import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size)
import GenericParser.DomainParser (domain, label, subdomain, sub_eof) import GenericParser.DomainParser (domain, label, subdomain, sub_eof)
import GenericParser.Parser (alphanum, char, current_input, current_position, digit, eof, Error, failure, failureError, hex, ident, identifier, Input, int, integer, item, letter, lookahead, lower, many1, nat, natural, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, space, string, success, symbol, token, try, tryMaybe, until, upper, Value) import GenericParser.Parser (char, current_input, current_position, Error, failure, failureError, Input, item, lookahead, many1, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, string, success, try, tryMaybe, until, Value)
import GenericParser.SomeParsers (alphanum, eof, ident, identifier, int, integer, letter, lower, nat, natural, space, symbol, token, upper)
-- import GenericParser.RFC5234 ()

View File

@ -16,9 +16,10 @@ import GenericParser.DomainParser.Common (DomainError(..), eof, ldh_str, let_dig
import GenericParser.Parser (Parser(..) import GenericParser.Parser (Parser(..)
, failureError , failureError
, current_position , current_position
, char, letter, string , char, string
, parse_last_char , parse_last_char
, tryMaybe) , tryMaybe)
import GenericParser.SomeParsers (letter)
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ] -- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
-- | In practice, the first character can be an underscore (for example, see `_dmarc.example.com`). -- | In practice, the first character can be an underscore (for example, see `_dmarc.example.com`).

View File

@ -7,7 +7,8 @@ import Data.String as S
import GenericParser.Parser (Parser(..) import GenericParser.Parser (Parser(..)
, success, failureError , success, failureError
, alphanum, char, many1) , char, many1)
import GenericParser.SomeParsers (alphanum)
type Size = Int type Size = Int
-- | `DomainError` expresses all possible errors that can occur while parsing a domain. -- | `DomainError` expresses all possible errors that can occur while parsing a domain.

View File

@ -18,8 +18,9 @@ import GenericParser.DomainParser.Common (DomainError(..), eof, ldh_str, let_dig
import GenericParser.Parser (Parser(..) import GenericParser.Parser (Parser(..)
, failureError , failureError
, current_position , current_position
, char, letter, parse, string , char, parse, string
, tryMaybe) , tryMaybe)
import GenericParser.SomeParsers (letter)
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ] -- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
label :: Parser DomainError String label :: Parser DomainError String

View File

@ -11,14 +11,14 @@ import Data.Either (Either(..))
import Data.Maybe (Maybe(..)) import Data.Maybe (Maybe(..))
import Data.String.CodeUnits as CU import Data.String.CodeUnits as CU
-- ABNF core rules. import GenericParser.Parser (Parser(..)
import GenericParser.RFC5234 , sat, char, item, many1, tryMaybe
, current_input, failureError, parse, rollback, until)
import GenericParser.DomainParser.Common (DomainError) import GenericParser.DomainParser.Common (DomainError)
import GenericParser.DomainParser (sub_eof) import GenericParser.DomainParser (sub_eof)
import GenericParser.Parser (Parser(..) -- ABNF core rules.
, sat, char , digit , letter, item, many1, tryMaybe import GenericParser.RFC5234 (crlf, digit, wsp)
, current_input, failureError, parse, rollback, until) import GenericParser.SomeParsers (letter)
data EmailError data EmailError
= InvalidCharacter = InvalidCharacter

View File

@ -13,8 +13,10 @@ import GenericParser.Parser (Parser(..)
, current_position , current_position
, string , string
, many1, lookahead , many1, lookahead
, char, nat, hex) , char)
import GenericParser.BaseFunctions (repeat) import GenericParser.BaseFunctions (repeat)
import GenericParser.SomeParsers (nat)
import GenericParser.RFC5234 (hexdig)
data IPv6Error data IPv6Error
= InvalidCharacter = InvalidCharacter
@ -27,7 +29,7 @@ data IPv6Error
-- | Return an error (TooManyHexaDecimalCharacters) in case the group has more than 4 characters. -- | Return an error (TooManyHexaDecimalCharacters) in case the group has more than 4 characters.
ipv6_chunk :: Parser IPv6Error String ipv6_chunk :: Parser IPv6Error String
ipv6_chunk = do pos <- current_position ipv6_chunk = do pos <- current_position
hexachars <- many1 hex hexachars <- many1 hexdig
if A.length hexachars > 4 if A.length hexachars > 4
then Parser \_ -> failureError pos (Just TooManyHexaDecimalCharacters) then Parser \_ -> failureError pos (Just TooManyHexaDecimalCharacters)
else pure $ CU.fromCharArray hexachars else pure $ CU.fromCharArray hexachars

View File

@ -7,12 +7,11 @@ import Control.Lazy (class Lazy, defer)
import Control.Plus (class Plus, empty) import Control.Plus (class Plus, empty)
import Data.Array as A import Data.Array as A
import Data.Either (Either(..)) import Data.Either (Either(..))
import Data.Int as Int
import Data.Maybe (Maybe(..), maybe) import Data.Maybe (Maybe(..), maybe)
import Data.String as S import Data.String as S
import Data.String.CodeUnits (toCharArray, fromCharArray, singleton) import Data.String.CodeUnits as CU
import GenericParser.BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper, isHexaDecimal) import GenericParser.BaseFunctions (concat)
type Position = Int type Position = Int
type PositionString = { string :: String, position :: Position } type PositionString = { string :: String, position :: Position }
@ -70,9 +69,9 @@ success suffix result = Right { suffix, result }
item :: forall e. Parser e Char item :: forall e. Parser e Char
item = Parser p item = Parser p
where where
p input = case A.uncons (toCharArray input.string) of p input = case A.uncons (CU.toCharArray input.string) of
Nothing -> failure input.position Nothing -> failure input.position
Just { head: x, tail: xs } -> success { string: (fromCharArray xs), position: input.position+1 } x Just { head: x, tail: xs } -> success { string: (CU.fromCharArray xs), position: input.position+1 } x
instance functorParser :: Functor (Parser e) where instance functorParser :: Functor (Parser e) where
map :: forall a b. (a -> b) -> Parser e a -> Parser e b map :: forall a b. (a -> b) -> Parser e a -> Parser e b
@ -144,78 +143,16 @@ sat p = do pos <- current_position
x <- item x <- item
if p x then pure x else Parser \_ -> failure pos if p x then pure x else Parser \_ -> failure pos
digit :: forall e. Parser e Char
digit = sat isDigit
lower :: forall e. Parser e Char
lower = sat isLower
upper :: forall e. Parser e Char
upper = sat isUpper
letter :: forall e. Parser e Char
letter = sat isAlpha
alphanum :: forall e. Parser e Char
alphanum = sat isAlphaNum
char :: forall e. Char -> Parser e Char char :: forall e. Char -> Parser e Char
char x = sat (_ == x) char x = sat (_ == x)
string :: forall e. String -> Parser e String string :: forall e. String -> Parser e String
string str = case A.uncons (toCharArray str) of string str = case A.uncons (CU.toCharArray str) of
Nothing -> Parser \input -> success input "" Nothing -> Parser \input -> success input ""
Just { head: x, tail: xs } -> do c <- char x Just { head: x, tail: xs } -> do c <- char x
rest <- string (fromCharArray xs) rest <- string (CU.fromCharArray xs)
pure (concat c rest) pure (concat c rest)
ident :: forall e. Parser e String
ident = do x <- lower
xs <- A.many alphanum
pure (fromCharArray $ A.cons x xs)
nat :: forall e. Parser e Int
nat = do xs <- A.some digit
case Int.fromString (fromCharArray xs) of
Nothing -> empty
Just x -> pure x
int :: forall e. Parser e Int
int = do _ <- char '-'
n <- nat
pure (-n)
<|> nat
space :: forall e. Parser e Unit
space = do _ <- A.many (sat isSpace)
pure unit
token :: forall e a. Parser e a -> Parser e a
token p = do space
v <- p
_ <- space
pure v
identifier :: forall e. Parser e String
identifier = token ident
natural :: forall e. Parser e Int
natural = token nat
integer :: forall e. Parser e Int
integer = token int
symbol :: forall e. String -> Parser e String
symbol xs = token (string xs)
hex :: forall e. Parser e Char
hex = sat isHexaDecimal
eof :: forall e. Parser e Unit
eof = Parser \input -> case S.length input.string of
0 -> success input unit
_ -> failure input.position
many1 :: forall e v. Parser e v -> Parser e (Array v) many1 :: forall e v. Parser e v -> Parser e (Array v)
many1 p = do first <- p many1 p = do first <- p
rest <- A.many p rest <- A.many p
@ -245,10 +182,10 @@ until parser_end p = do
parse_last_char :: forall e. String -> Parser e Char -> Boolean parse_last_char :: forall e. String -> Parser e Char -> Boolean
parse_last_char s p = case last_char s of parse_last_char s p = case last_char s of
Nothing -> false Nothing -> false
Just c -> case parse p { string: singleton c, position: 0 } of Just c -> case parse p { string: CU.singleton c, position: 0 } of
Left _ -> false Left _ -> false
_ -> true _ -> true
where where
-- Get the last character of a String. -- Get the last character of a String.
last_char :: String -> Maybe Char last_char :: String -> Maybe Char
last_char = A.last <<< toCharArray last_char = A.last <<< CU.toCharArray

View File

@ -2,27 +2,33 @@
-- | This module implements core rules found in appendix B.1. -- | This module implements core rules found in appendix B.1.
module GenericParser.RFC5234 where module GenericParser.RFC5234 where
import Prelude (Unit, between, bind, void, ($)) import Prelude (Unit, between, (<<<), (||), (==), bind, void, ($))
import Control.Alt ((<|>)) import Control.Alt ((<|>))
import Data.Char as C import Data.Char as C
import GenericParser.BaseFunctions (isHexaDecimal) import GenericParser.BaseFunctions (isAlpha, isDigit, isHexaDecimal)
import GenericParser.Parser (Parser, char, sat) import GenericParser.Parser (Parser, char, sat)
-- | RFC 5234: -- | ALPHA: any letter, upper or lower case.
-- |
-- | ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
alpha :: forall e. Parser e Char
alpha = sat isAlpha
--ALPHA = %x41-5A / %x61-7A ; A-Z / a-z -- | BIT: either character '0' or '1'.
-- |
--BIT = "0" / "1" -- | BIT = "0" / "1"
bit :: forall e. Parser e Char
bit = char '0' <|> char '1'
-- | CHAR (renamed `asciichar` to fix naming conflict with `GenericParser.char`): -- | CHAR (renamed `asciichar` to fix naming conflict with `GenericParser.char`):
-- | any 7-bit US-ASCII character, excluding NUL. -- | any 7-bit US-ASCII character, excluding NUL.
-- | -- |
-- | CHAR = %x01-7F -- | CHAR = %x01-7F
asciichar :: forall e. Parser e Char asciichar :: forall e. Parser e Char
asciichar = sat (\x -> between 1 127 $ C.toCharCode x) asciichar = sat (between 1 127 <<< C.toCharCode)
-- | CR: carriage return. -- | CR: carriage return.
-- | -- |
@ -37,12 +43,19 @@ crlf :: forall e. Parser e Unit
crlf = do _ <- char '\r' crlf = do _ <- char '\r'
void $ char '\n' void $ char '\n'
--CTL = %x00-1F / %x7F -- | CTL: control characters.
-- ; controls -- |
-- -- | CTL = %x00-1F / %x7F
ctl :: forall e. Parser e Char
ctl = sat cond
where cond x = (between 0 31 $ C.toCharCode x)
|| C.toCharCode x == 127
--DIGIT = %x30-39 --DIGIT = %x30-39
-- ; 0-9 -- ; 0-9
-- digit :: forall e. Parser e Char
digit = sat isDigit
--DQUOTE = %x22 --DQUOTE = %x22
-- ; " (Double Quote) -- ; " (Double Quote)
@ -84,7 +97,7 @@ sp = char ' '
-- | -- |
-- | Visible printing characters. -- | Visible printing characters.
vchar :: forall e. Parser e Char vchar :: forall e. Parser e Char
vchar = sat (\x -> between 33 126 $ C.toCharCode x) vchar = sat (between 33 126 <<< C.toCharCode)
-- | WSP: white space. -- | WSP: white space.
-- | -- |

View File

@ -0,0 +1,70 @@
module GenericParser.SomeParsers where
import Prelude (Unit, bind, discard, negate, pure, unit, ($))
import Control.Alt ((<|>))
import Control.Plus (empty)
import Data.Array as A
import Data.Maybe (Maybe(..))
import Data.Int as Int
import Data.String as S
import Data.String.CodeUnits as CU
import GenericParser.Parser
import GenericParser.BaseFunctions (isAlpha, isAlphaNum, isLower, isSpace, isUpper)
import GenericParser.RFC5234 (digit)
lower :: forall e. Parser e Char
lower = sat isLower
upper :: forall e. Parser e Char
upper = sat isUpper
letter :: forall e. Parser e Char
letter = sat isAlpha
alphanum :: forall e. Parser e Char
alphanum = sat isAlphaNum
ident :: forall e. Parser e String
ident = do x <- lower
xs <- A.many alphanum
pure (CU.fromCharArray $ A.cons x xs)
nat :: forall e. Parser e Int
nat = do xs <- A.some digit
case Int.fromString (CU.fromCharArray xs) of
Nothing -> empty
Just x -> pure x
int :: forall e. Parser e Int
int = do _ <- char '-'
n <- nat
pure (-n)
<|> nat
space :: forall e. Parser e Unit
space = do _ <- A.many (sat isSpace)
pure unit
token :: forall e a. Parser e a -> Parser e a
token p = do space
v <- p
_ <- space
pure v
identifier :: forall e. Parser e String
identifier = token ident
natural :: forall e. Parser e Int
natural = token nat
integer :: forall e. Parser e Int
integer = token int
symbol :: forall e. String -> Parser e String
symbol xs = token (string xs)
eof :: forall e. Parser e Unit
eof = Parser \input -> case S.length input.string of
0 -> success input unit
_ -> failure input.position