Split most parsers from the GenericParser.Parser module.
This commit is contained in:
parent
1951d893a9
commit
8e68099e52
@ -2,8 +2,11 @@ module GenericParser
|
|||||||
( module GenericParser.Parser
|
( module GenericParser.Parser
|
||||||
, module GenericParser.DomainParser.Common
|
, module GenericParser.DomainParser.Common
|
||||||
, module GenericParser.DomainParser
|
, module GenericParser.DomainParser
|
||||||
|
--, module GenericParser.RFC5234
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size)
|
import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size)
|
||||||
import GenericParser.DomainParser (domain, label, subdomain, sub_eof)
|
import GenericParser.DomainParser (domain, label, subdomain, sub_eof)
|
||||||
import GenericParser.Parser (alphanum, char, current_input, current_position, digit, eof, Error, failure, failureError, hex, ident, identifier, Input, int, integer, item, letter, lookahead, lower, many1, nat, natural, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, space, string, success, symbol, token, try, tryMaybe, until, upper, Value)
|
import GenericParser.Parser (char, current_input, current_position, Error, failure, failureError, Input, item, lookahead, many1, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, string, success, try, tryMaybe, until, Value)
|
||||||
|
import GenericParser.SomeParsers (alphanum, eof, ident, identifier, int, integer, letter, lower, nat, natural, space, symbol, token, upper)
|
||||||
|
-- import GenericParser.RFC5234 ()
|
||||||
|
@ -16,9 +16,10 @@ import GenericParser.DomainParser.Common (DomainError(..), eof, ldh_str, let_dig
|
|||||||
import GenericParser.Parser (Parser(..)
|
import GenericParser.Parser (Parser(..)
|
||||||
, failureError
|
, failureError
|
||||||
, current_position
|
, current_position
|
||||||
, char, letter, string
|
, char, string
|
||||||
, parse_last_char
|
, parse_last_char
|
||||||
, tryMaybe)
|
, tryMaybe)
|
||||||
|
import GenericParser.SomeParsers (letter)
|
||||||
|
|
||||||
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
|
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
|
||||||
-- | In practice, the first character can be an underscore (for example, see `_dmarc.example.com`).
|
-- | In practice, the first character can be an underscore (for example, see `_dmarc.example.com`).
|
||||||
|
@ -7,7 +7,8 @@ import Data.String as S
|
|||||||
|
|
||||||
import GenericParser.Parser (Parser(..)
|
import GenericParser.Parser (Parser(..)
|
||||||
, success, failureError
|
, success, failureError
|
||||||
, alphanum, char, many1)
|
, char, many1)
|
||||||
|
import GenericParser.SomeParsers (alphanum)
|
||||||
|
|
||||||
type Size = Int
|
type Size = Int
|
||||||
-- | `DomainError` expresses all possible errors that can occur while parsing a domain.
|
-- | `DomainError` expresses all possible errors that can occur while parsing a domain.
|
||||||
|
@ -18,8 +18,9 @@ import GenericParser.DomainParser.Common (DomainError(..), eof, ldh_str, let_dig
|
|||||||
import GenericParser.Parser (Parser(..)
|
import GenericParser.Parser (Parser(..)
|
||||||
, failureError
|
, failureError
|
||||||
, current_position
|
, current_position
|
||||||
, char, letter, parse, string
|
, char, parse, string
|
||||||
, tryMaybe)
|
, tryMaybe)
|
||||||
|
import GenericParser.SomeParsers (letter)
|
||||||
|
|
||||||
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
|
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
|
||||||
label :: Parser DomainError String
|
label :: Parser DomainError String
|
||||||
|
@ -11,14 +11,14 @@ import Data.Either (Either(..))
|
|||||||
import Data.Maybe (Maybe(..))
|
import Data.Maybe (Maybe(..))
|
||||||
import Data.String.CodeUnits as CU
|
import Data.String.CodeUnits as CU
|
||||||
|
|
||||||
-- ABNF core rules.
|
import GenericParser.Parser (Parser(..)
|
||||||
import GenericParser.RFC5234
|
, sat, char, item, many1, tryMaybe
|
||||||
|
, current_input, failureError, parse, rollback, until)
|
||||||
import GenericParser.DomainParser.Common (DomainError)
|
import GenericParser.DomainParser.Common (DomainError)
|
||||||
import GenericParser.DomainParser (sub_eof)
|
import GenericParser.DomainParser (sub_eof)
|
||||||
import GenericParser.Parser (Parser(..)
|
-- ABNF core rules.
|
||||||
, sat, char , digit , letter, item, many1, tryMaybe
|
import GenericParser.RFC5234 (crlf, digit, wsp)
|
||||||
, current_input, failureError, parse, rollback, until)
|
import GenericParser.SomeParsers (letter)
|
||||||
|
|
||||||
data EmailError
|
data EmailError
|
||||||
= InvalidCharacter
|
= InvalidCharacter
|
||||||
|
@ -13,8 +13,10 @@ import GenericParser.Parser (Parser(..)
|
|||||||
, current_position
|
, current_position
|
||||||
, string
|
, string
|
||||||
, many1, lookahead
|
, many1, lookahead
|
||||||
, char, nat, hex)
|
, char)
|
||||||
import GenericParser.BaseFunctions (repeat)
|
import GenericParser.BaseFunctions (repeat)
|
||||||
|
import GenericParser.SomeParsers (nat)
|
||||||
|
import GenericParser.RFC5234 (hexdig)
|
||||||
|
|
||||||
data IPv6Error
|
data IPv6Error
|
||||||
= InvalidCharacter
|
= InvalidCharacter
|
||||||
@ -27,7 +29,7 @@ data IPv6Error
|
|||||||
-- | Return an error (TooManyHexaDecimalCharacters) in case the group has more than 4 characters.
|
-- | Return an error (TooManyHexaDecimalCharacters) in case the group has more than 4 characters.
|
||||||
ipv6_chunk :: Parser IPv6Error String
|
ipv6_chunk :: Parser IPv6Error String
|
||||||
ipv6_chunk = do pos <- current_position
|
ipv6_chunk = do pos <- current_position
|
||||||
hexachars <- many1 hex
|
hexachars <- many1 hexdig
|
||||||
if A.length hexachars > 4
|
if A.length hexachars > 4
|
||||||
then Parser \_ -> failureError pos (Just TooManyHexaDecimalCharacters)
|
then Parser \_ -> failureError pos (Just TooManyHexaDecimalCharacters)
|
||||||
else pure $ CU.fromCharArray hexachars
|
else pure $ CU.fromCharArray hexachars
|
||||||
|
@ -7,12 +7,11 @@ import Control.Lazy (class Lazy, defer)
|
|||||||
import Control.Plus (class Plus, empty)
|
import Control.Plus (class Plus, empty)
|
||||||
import Data.Array as A
|
import Data.Array as A
|
||||||
import Data.Either (Either(..))
|
import Data.Either (Either(..))
|
||||||
import Data.Int as Int
|
|
||||||
import Data.Maybe (Maybe(..), maybe)
|
import Data.Maybe (Maybe(..), maybe)
|
||||||
import Data.String as S
|
import Data.String as S
|
||||||
import Data.String.CodeUnits (toCharArray, fromCharArray, singleton)
|
import Data.String.CodeUnits as CU
|
||||||
|
|
||||||
import GenericParser.BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper, isHexaDecimal)
|
import GenericParser.BaseFunctions (concat)
|
||||||
|
|
||||||
type Position = Int
|
type Position = Int
|
||||||
type PositionString = { string :: String, position :: Position }
|
type PositionString = { string :: String, position :: Position }
|
||||||
@ -70,9 +69,9 @@ success suffix result = Right { suffix, result }
|
|||||||
item :: forall e. Parser e Char
|
item :: forall e. Parser e Char
|
||||||
item = Parser p
|
item = Parser p
|
||||||
where
|
where
|
||||||
p input = case A.uncons (toCharArray input.string) of
|
p input = case A.uncons (CU.toCharArray input.string) of
|
||||||
Nothing -> failure input.position
|
Nothing -> failure input.position
|
||||||
Just { head: x, tail: xs } -> success { string: (fromCharArray xs), position: input.position+1 } x
|
Just { head: x, tail: xs } -> success { string: (CU.fromCharArray xs), position: input.position+1 } x
|
||||||
|
|
||||||
instance functorParser :: Functor (Parser e) where
|
instance functorParser :: Functor (Parser e) where
|
||||||
map :: forall a b. (a -> b) -> Parser e a -> Parser e b
|
map :: forall a b. (a -> b) -> Parser e a -> Parser e b
|
||||||
@ -144,78 +143,16 @@ sat p = do pos <- current_position
|
|||||||
x <- item
|
x <- item
|
||||||
if p x then pure x else Parser \_ -> failure pos
|
if p x then pure x else Parser \_ -> failure pos
|
||||||
|
|
||||||
digit :: forall e. Parser e Char
|
|
||||||
digit = sat isDigit
|
|
||||||
|
|
||||||
lower :: forall e. Parser e Char
|
|
||||||
lower = sat isLower
|
|
||||||
|
|
||||||
upper :: forall e. Parser e Char
|
|
||||||
upper = sat isUpper
|
|
||||||
|
|
||||||
letter :: forall e. Parser e Char
|
|
||||||
letter = sat isAlpha
|
|
||||||
|
|
||||||
alphanum :: forall e. Parser e Char
|
|
||||||
alphanum = sat isAlphaNum
|
|
||||||
|
|
||||||
char :: forall e. Char -> Parser e Char
|
char :: forall e. Char -> Parser e Char
|
||||||
char x = sat (_ == x)
|
char x = sat (_ == x)
|
||||||
|
|
||||||
string :: forall e. String -> Parser e String
|
string :: forall e. String -> Parser e String
|
||||||
string str = case A.uncons (toCharArray str) of
|
string str = case A.uncons (CU.toCharArray str) of
|
||||||
Nothing -> Parser \input -> success input ""
|
Nothing -> Parser \input -> success input ""
|
||||||
Just { head: x, tail: xs } -> do c <- char x
|
Just { head: x, tail: xs } -> do c <- char x
|
||||||
rest <- string (fromCharArray xs)
|
rest <- string (CU.fromCharArray xs)
|
||||||
pure (concat c rest)
|
pure (concat c rest)
|
||||||
|
|
||||||
ident :: forall e. Parser e String
|
|
||||||
ident = do x <- lower
|
|
||||||
xs <- A.many alphanum
|
|
||||||
pure (fromCharArray $ A.cons x xs)
|
|
||||||
|
|
||||||
nat :: forall e. Parser e Int
|
|
||||||
nat = do xs <- A.some digit
|
|
||||||
case Int.fromString (fromCharArray xs) of
|
|
||||||
Nothing -> empty
|
|
||||||
Just x -> pure x
|
|
||||||
|
|
||||||
int :: forall e. Parser e Int
|
|
||||||
int = do _ <- char '-'
|
|
||||||
n <- nat
|
|
||||||
pure (-n)
|
|
||||||
<|> nat
|
|
||||||
|
|
||||||
space :: forall e. Parser e Unit
|
|
||||||
space = do _ <- A.many (sat isSpace)
|
|
||||||
pure unit
|
|
||||||
|
|
||||||
token :: forall e a. Parser e a -> Parser e a
|
|
||||||
token p = do space
|
|
||||||
v <- p
|
|
||||||
_ <- space
|
|
||||||
pure v
|
|
||||||
|
|
||||||
identifier :: forall e. Parser e String
|
|
||||||
identifier = token ident
|
|
||||||
|
|
||||||
natural :: forall e. Parser e Int
|
|
||||||
natural = token nat
|
|
||||||
|
|
||||||
integer :: forall e. Parser e Int
|
|
||||||
integer = token int
|
|
||||||
|
|
||||||
symbol :: forall e. String -> Parser e String
|
|
||||||
symbol xs = token (string xs)
|
|
||||||
|
|
||||||
hex :: forall e. Parser e Char
|
|
||||||
hex = sat isHexaDecimal
|
|
||||||
|
|
||||||
eof :: forall e. Parser e Unit
|
|
||||||
eof = Parser \input -> case S.length input.string of
|
|
||||||
0 -> success input unit
|
|
||||||
_ -> failure input.position
|
|
||||||
|
|
||||||
many1 :: forall e v. Parser e v -> Parser e (Array v)
|
many1 :: forall e v. Parser e v -> Parser e (Array v)
|
||||||
many1 p = do first <- p
|
many1 p = do first <- p
|
||||||
rest <- A.many p
|
rest <- A.many p
|
||||||
@ -245,10 +182,10 @@ until parser_end p = do
|
|||||||
parse_last_char :: forall e. String -> Parser e Char -> Boolean
|
parse_last_char :: forall e. String -> Parser e Char -> Boolean
|
||||||
parse_last_char s p = case last_char s of
|
parse_last_char s p = case last_char s of
|
||||||
Nothing -> false
|
Nothing -> false
|
||||||
Just c -> case parse p { string: singleton c, position: 0 } of
|
Just c -> case parse p { string: CU.singleton c, position: 0 } of
|
||||||
Left _ -> false
|
Left _ -> false
|
||||||
_ -> true
|
_ -> true
|
||||||
where
|
where
|
||||||
-- Get the last character of a String.
|
-- Get the last character of a String.
|
||||||
last_char :: String -> Maybe Char
|
last_char :: String -> Maybe Char
|
||||||
last_char = A.last <<< toCharArray
|
last_char = A.last <<< CU.toCharArray
|
||||||
|
@ -2,27 +2,33 @@
|
|||||||
-- | This module implements core rules found in appendix B.1.
|
-- | This module implements core rules found in appendix B.1.
|
||||||
module GenericParser.RFC5234 where
|
module GenericParser.RFC5234 where
|
||||||
|
|
||||||
import Prelude (Unit, between, bind, void, ($))
|
import Prelude (Unit, between, (<<<), (||), (==), bind, void, ($))
|
||||||
|
|
||||||
import Control.Alt ((<|>))
|
import Control.Alt ((<|>))
|
||||||
import Data.Char as C
|
import Data.Char as C
|
||||||
|
|
||||||
import GenericParser.BaseFunctions (isHexaDecimal)
|
import GenericParser.BaseFunctions (isAlpha, isDigit, isHexaDecimal)
|
||||||
|
|
||||||
import GenericParser.Parser (Parser, char, sat)
|
import GenericParser.Parser (Parser, char, sat)
|
||||||
|
|
||||||
-- | RFC 5234:
|
-- | ALPHA: any letter, upper or lower case.
|
||||||
|
-- |
|
||||||
|
-- | ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
|
||||||
|
alpha :: forall e. Parser e Char
|
||||||
|
alpha = sat isAlpha
|
||||||
|
|
||||||
--ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
|
-- | BIT: either character '0' or '1'.
|
||||||
|
-- |
|
||||||
--BIT = "0" / "1"
|
-- | BIT = "0" / "1"
|
||||||
|
bit :: forall e. Parser e Char
|
||||||
|
bit = char '0' <|> char '1'
|
||||||
|
|
||||||
-- | CHAR (renamed `asciichar` to fix naming conflict with `GenericParser.char`):
|
-- | CHAR (renamed `asciichar` to fix naming conflict with `GenericParser.char`):
|
||||||
-- | any 7-bit US-ASCII character, excluding NUL.
|
-- | any 7-bit US-ASCII character, excluding NUL.
|
||||||
-- |
|
-- |
|
||||||
-- | CHAR = %x01-7F
|
-- | CHAR = %x01-7F
|
||||||
asciichar :: forall e. Parser e Char
|
asciichar :: forall e. Parser e Char
|
||||||
asciichar = sat (\x -> between 1 127 $ C.toCharCode x)
|
asciichar = sat (between 1 127 <<< C.toCharCode)
|
||||||
|
|
||||||
-- | CR: carriage return.
|
-- | CR: carriage return.
|
||||||
-- |
|
-- |
|
||||||
@ -37,12 +43,19 @@ crlf :: forall e. Parser e Unit
|
|||||||
crlf = do _ <- char '\r'
|
crlf = do _ <- char '\r'
|
||||||
void $ char '\n'
|
void $ char '\n'
|
||||||
|
|
||||||
--CTL = %x00-1F / %x7F
|
-- | CTL: control characters.
|
||||||
-- ; controls
|
-- |
|
||||||
--
|
-- | CTL = %x00-1F / %x7F
|
||||||
|
ctl :: forall e. Parser e Char
|
||||||
|
ctl = sat cond
|
||||||
|
where cond x = (between 0 31 $ C.toCharCode x)
|
||||||
|
|| C.toCharCode x == 127
|
||||||
|
|
||||||
--DIGIT = %x30-39
|
--DIGIT = %x30-39
|
||||||
-- ; 0-9
|
-- ; 0-9
|
||||||
--
|
digit :: forall e. Parser e Char
|
||||||
|
digit = sat isDigit
|
||||||
|
|
||||||
--DQUOTE = %x22
|
--DQUOTE = %x22
|
||||||
-- ; " (Double Quote)
|
-- ; " (Double Quote)
|
||||||
|
|
||||||
@ -84,7 +97,7 @@ sp = char ' '
|
|||||||
-- |
|
-- |
|
||||||
-- | Visible printing characters.
|
-- | Visible printing characters.
|
||||||
vchar :: forall e. Parser e Char
|
vchar :: forall e. Parser e Char
|
||||||
vchar = sat (\x -> between 33 126 $ C.toCharCode x)
|
vchar = sat (between 33 126 <<< C.toCharCode)
|
||||||
|
|
||||||
-- | WSP: white space.
|
-- | WSP: white space.
|
||||||
-- |
|
-- |
|
||||||
|
70
src/GenericParser/SomeParsers.purs
Normal file
70
src/GenericParser/SomeParsers.purs
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
module GenericParser.SomeParsers where
|
||||||
|
import Prelude (Unit, bind, discard, negate, pure, unit, ($))
|
||||||
|
|
||||||
|
import Control.Alt ((<|>))
|
||||||
|
import Control.Plus (empty)
|
||||||
|
import Data.Array as A
|
||||||
|
import Data.Maybe (Maybe(..))
|
||||||
|
import Data.Int as Int
|
||||||
|
import Data.String as S
|
||||||
|
import Data.String.CodeUnits as CU
|
||||||
|
|
||||||
|
import GenericParser.Parser
|
||||||
|
import GenericParser.BaseFunctions (isAlpha, isAlphaNum, isLower, isSpace, isUpper)
|
||||||
|
import GenericParser.RFC5234 (digit)
|
||||||
|
|
||||||
|
lower :: forall e. Parser e Char
|
||||||
|
lower = sat isLower
|
||||||
|
|
||||||
|
upper :: forall e. Parser e Char
|
||||||
|
upper = sat isUpper
|
||||||
|
|
||||||
|
letter :: forall e. Parser e Char
|
||||||
|
letter = sat isAlpha
|
||||||
|
|
||||||
|
alphanum :: forall e. Parser e Char
|
||||||
|
alphanum = sat isAlphaNum
|
||||||
|
|
||||||
|
ident :: forall e. Parser e String
|
||||||
|
ident = do x <- lower
|
||||||
|
xs <- A.many alphanum
|
||||||
|
pure (CU.fromCharArray $ A.cons x xs)
|
||||||
|
|
||||||
|
nat :: forall e. Parser e Int
|
||||||
|
nat = do xs <- A.some digit
|
||||||
|
case Int.fromString (CU.fromCharArray xs) of
|
||||||
|
Nothing -> empty
|
||||||
|
Just x -> pure x
|
||||||
|
|
||||||
|
int :: forall e. Parser e Int
|
||||||
|
int = do _ <- char '-'
|
||||||
|
n <- nat
|
||||||
|
pure (-n)
|
||||||
|
<|> nat
|
||||||
|
|
||||||
|
space :: forall e. Parser e Unit
|
||||||
|
space = do _ <- A.many (sat isSpace)
|
||||||
|
pure unit
|
||||||
|
|
||||||
|
token :: forall e a. Parser e a -> Parser e a
|
||||||
|
token p = do space
|
||||||
|
v <- p
|
||||||
|
_ <- space
|
||||||
|
pure v
|
||||||
|
|
||||||
|
identifier :: forall e. Parser e String
|
||||||
|
identifier = token ident
|
||||||
|
|
||||||
|
natural :: forall e. Parser e Int
|
||||||
|
natural = token nat
|
||||||
|
|
||||||
|
integer :: forall e. Parser e Int
|
||||||
|
integer = token int
|
||||||
|
|
||||||
|
symbol :: forall e. String -> Parser e String
|
||||||
|
symbol xs = token (string xs)
|
||||||
|
|
||||||
|
eof :: forall e. Parser e Unit
|
||||||
|
eof = Parser \input -> case S.length input.string of
|
||||||
|
0 -> success input unit
|
||||||
|
_ -> failure input.position
|
Loading…
Reference in New Issue
Block a user