Split most parsers from the GenericParser.Parser module.
This commit is contained in:
parent
1951d893a9
commit
8e68099e52
@ -2,8 +2,11 @@ module GenericParser
|
||||
( module GenericParser.Parser
|
||||
, module GenericParser.DomainParser.Common
|
||||
, module GenericParser.DomainParser
|
||||
--, module GenericParser.RFC5234
|
||||
) where
|
||||
|
||||
import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size)
|
||||
import GenericParser.DomainParser (domain, label, subdomain, sub_eof)
|
||||
import GenericParser.Parser (alphanum, char, current_input, current_position, digit, eof, Error, failure, failureError, hex, ident, identifier, Input, int, integer, item, letter, lookahead, lower, many1, nat, natural, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, space, string, success, symbol, token, try, tryMaybe, until, upper, Value)
|
||||
import GenericParser.Parser (char, current_input, current_position, Error, failure, failureError, Input, item, lookahead, many1, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, string, success, try, tryMaybe, until, Value)
|
||||
import GenericParser.SomeParsers (alphanum, eof, ident, identifier, int, integer, letter, lower, nat, natural, space, symbol, token, upper)
|
||||
-- import GenericParser.RFC5234 ()
|
||||
|
@ -16,9 +16,10 @@ import GenericParser.DomainParser.Common (DomainError(..), eof, ldh_str, let_dig
|
||||
import GenericParser.Parser (Parser(..)
|
||||
, failureError
|
||||
, current_position
|
||||
, char, letter, string
|
||||
, char, string
|
||||
, parse_last_char
|
||||
, tryMaybe)
|
||||
import GenericParser.SomeParsers (letter)
|
||||
|
||||
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
|
||||
-- | In practice, the first character can be an underscore (for example, see `_dmarc.example.com`).
|
||||
|
@ -7,7 +7,8 @@ import Data.String as S
|
||||
|
||||
import GenericParser.Parser (Parser(..)
|
||||
, success, failureError
|
||||
, alphanum, char, many1)
|
||||
, char, many1)
|
||||
import GenericParser.SomeParsers (alphanum)
|
||||
|
||||
type Size = Int
|
||||
-- | `DomainError` expresses all possible errors that can occur while parsing a domain.
|
||||
|
@ -18,8 +18,9 @@ import GenericParser.DomainParser.Common (DomainError(..), eof, ldh_str, let_dig
|
||||
import GenericParser.Parser (Parser(..)
|
||||
, failureError
|
||||
, current_position
|
||||
, char, letter, parse, string
|
||||
, char, parse, string
|
||||
, tryMaybe)
|
||||
import GenericParser.SomeParsers (letter)
|
||||
|
||||
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
|
||||
label :: Parser DomainError String
|
||||
|
@ -11,14 +11,14 @@ import Data.Either (Either(..))
|
||||
import Data.Maybe (Maybe(..))
|
||||
import Data.String.CodeUnits as CU
|
||||
|
||||
-- ABNF core rules.
|
||||
import GenericParser.RFC5234
|
||||
|
||||
import GenericParser.Parser (Parser(..)
|
||||
, sat, char, item, many1, tryMaybe
|
||||
, current_input, failureError, parse, rollback, until)
|
||||
import GenericParser.DomainParser.Common (DomainError)
|
||||
import GenericParser.DomainParser (sub_eof)
|
||||
import GenericParser.Parser (Parser(..)
|
||||
, sat, char , digit , letter, item, many1, tryMaybe
|
||||
, current_input, failureError, parse, rollback, until)
|
||||
-- ABNF core rules.
|
||||
import GenericParser.RFC5234 (crlf, digit, wsp)
|
||||
import GenericParser.SomeParsers (letter)
|
||||
|
||||
data EmailError
|
||||
= InvalidCharacter
|
||||
|
@ -13,8 +13,10 @@ import GenericParser.Parser (Parser(..)
|
||||
, current_position
|
||||
, string
|
||||
, many1, lookahead
|
||||
, char, nat, hex)
|
||||
, char)
|
||||
import GenericParser.BaseFunctions (repeat)
|
||||
import GenericParser.SomeParsers (nat)
|
||||
import GenericParser.RFC5234 (hexdig)
|
||||
|
||||
data IPv6Error
|
||||
= InvalidCharacter
|
||||
@ -27,7 +29,7 @@ data IPv6Error
|
||||
-- | Return an error (TooManyHexaDecimalCharacters) in case the group has more than 4 characters.
|
||||
ipv6_chunk :: Parser IPv6Error String
|
||||
ipv6_chunk = do pos <- current_position
|
||||
hexachars <- many1 hex
|
||||
hexachars <- many1 hexdig
|
||||
if A.length hexachars > 4
|
||||
then Parser \_ -> failureError pos (Just TooManyHexaDecimalCharacters)
|
||||
else pure $ CU.fromCharArray hexachars
|
||||
|
@ -7,12 +7,11 @@ import Control.Lazy (class Lazy, defer)
|
||||
import Control.Plus (class Plus, empty)
|
||||
import Data.Array as A
|
||||
import Data.Either (Either(..))
|
||||
import Data.Int as Int
|
||||
import Data.Maybe (Maybe(..), maybe)
|
||||
import Data.String as S
|
||||
import Data.String.CodeUnits (toCharArray, fromCharArray, singleton)
|
||||
import Data.String.CodeUnits as CU
|
||||
|
||||
import GenericParser.BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper, isHexaDecimal)
|
||||
import GenericParser.BaseFunctions (concat)
|
||||
|
||||
type Position = Int
|
||||
type PositionString = { string :: String, position :: Position }
|
||||
@ -70,9 +69,9 @@ success suffix result = Right { suffix, result }
|
||||
item :: forall e. Parser e Char
|
||||
item = Parser p
|
||||
where
|
||||
p input = case A.uncons (toCharArray input.string) of
|
||||
p input = case A.uncons (CU.toCharArray input.string) of
|
||||
Nothing -> failure input.position
|
||||
Just { head: x, tail: xs } -> success { string: (fromCharArray xs), position: input.position+1 } x
|
||||
Just { head: x, tail: xs } -> success { string: (CU.fromCharArray xs), position: input.position+1 } x
|
||||
|
||||
instance functorParser :: Functor (Parser e) where
|
||||
map :: forall a b. (a -> b) -> Parser e a -> Parser e b
|
||||
@ -144,78 +143,16 @@ sat p = do pos <- current_position
|
||||
x <- item
|
||||
if p x then pure x else Parser \_ -> failure pos
|
||||
|
||||
digit :: forall e. Parser e Char
|
||||
digit = sat isDigit
|
||||
|
||||
lower :: forall e. Parser e Char
|
||||
lower = sat isLower
|
||||
|
||||
upper :: forall e. Parser e Char
|
||||
upper = sat isUpper
|
||||
|
||||
letter :: forall e. Parser e Char
|
||||
letter = sat isAlpha
|
||||
|
||||
alphanum :: forall e. Parser e Char
|
||||
alphanum = sat isAlphaNum
|
||||
|
||||
char :: forall e. Char -> Parser e Char
|
||||
char x = sat (_ == x)
|
||||
|
||||
string :: forall e. String -> Parser e String
|
||||
string str = case A.uncons (toCharArray str) of
|
||||
string str = case A.uncons (CU.toCharArray str) of
|
||||
Nothing -> Parser \input -> success input ""
|
||||
Just { head: x, tail: xs } -> do c <- char x
|
||||
rest <- string (fromCharArray xs)
|
||||
rest <- string (CU.fromCharArray xs)
|
||||
pure (concat c rest)
|
||||
|
||||
ident :: forall e. Parser e String
|
||||
ident = do x <- lower
|
||||
xs <- A.many alphanum
|
||||
pure (fromCharArray $ A.cons x xs)
|
||||
|
||||
nat :: forall e. Parser e Int
|
||||
nat = do xs <- A.some digit
|
||||
case Int.fromString (fromCharArray xs) of
|
||||
Nothing -> empty
|
||||
Just x -> pure x
|
||||
|
||||
int :: forall e. Parser e Int
|
||||
int = do _ <- char '-'
|
||||
n <- nat
|
||||
pure (-n)
|
||||
<|> nat
|
||||
|
||||
space :: forall e. Parser e Unit
|
||||
space = do _ <- A.many (sat isSpace)
|
||||
pure unit
|
||||
|
||||
token :: forall e a. Parser e a -> Parser e a
|
||||
token p = do space
|
||||
v <- p
|
||||
_ <- space
|
||||
pure v
|
||||
|
||||
identifier :: forall e. Parser e String
|
||||
identifier = token ident
|
||||
|
||||
natural :: forall e. Parser e Int
|
||||
natural = token nat
|
||||
|
||||
integer :: forall e. Parser e Int
|
||||
integer = token int
|
||||
|
||||
symbol :: forall e. String -> Parser e String
|
||||
symbol xs = token (string xs)
|
||||
|
||||
hex :: forall e. Parser e Char
|
||||
hex = sat isHexaDecimal
|
||||
|
||||
eof :: forall e. Parser e Unit
|
||||
eof = Parser \input -> case S.length input.string of
|
||||
0 -> success input unit
|
||||
_ -> failure input.position
|
||||
|
||||
many1 :: forall e v. Parser e v -> Parser e (Array v)
|
||||
many1 p = do first <- p
|
||||
rest <- A.many p
|
||||
@ -245,10 +182,10 @@ until parser_end p = do
|
||||
parse_last_char :: forall e. String -> Parser e Char -> Boolean
|
||||
parse_last_char s p = case last_char s of
|
||||
Nothing -> false
|
||||
Just c -> case parse p { string: singleton c, position: 0 } of
|
||||
Just c -> case parse p { string: CU.singleton c, position: 0 } of
|
||||
Left _ -> false
|
||||
_ -> true
|
||||
where
|
||||
-- Get the last character of a String.
|
||||
last_char :: String -> Maybe Char
|
||||
last_char = A.last <<< toCharArray
|
||||
last_char = A.last <<< CU.toCharArray
|
||||
|
@ -2,27 +2,33 @@
|
||||
-- | This module implements core rules found in appendix B.1.
|
||||
module GenericParser.RFC5234 where
|
||||
|
||||
import Prelude (Unit, between, bind, void, ($))
|
||||
import Prelude (Unit, between, (<<<), (||), (==), bind, void, ($))
|
||||
|
||||
import Control.Alt ((<|>))
|
||||
import Data.Char as C
|
||||
|
||||
import GenericParser.BaseFunctions (isHexaDecimal)
|
||||
import GenericParser.BaseFunctions (isAlpha, isDigit, isHexaDecimal)
|
||||
|
||||
import GenericParser.Parser (Parser, char, sat)
|
||||
|
||||
-- | RFC 5234:
|
||||
-- | ALPHA: any letter, upper or lower case.
|
||||
-- |
|
||||
-- | ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
|
||||
alpha :: forall e. Parser e Char
|
||||
alpha = sat isAlpha
|
||||
|
||||
--ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
|
||||
|
||||
--BIT = "0" / "1"
|
||||
-- | BIT: either character '0' or '1'.
|
||||
-- |
|
||||
-- | BIT = "0" / "1"
|
||||
bit :: forall e. Parser e Char
|
||||
bit = char '0' <|> char '1'
|
||||
|
||||
-- | CHAR (renamed `asciichar` to fix naming conflict with `GenericParser.char`):
|
||||
-- | any 7-bit US-ASCII character, excluding NUL.
|
||||
-- |
|
||||
-- | CHAR = %x01-7F
|
||||
asciichar :: forall e. Parser e Char
|
||||
asciichar = sat (\x -> between 1 127 $ C.toCharCode x)
|
||||
asciichar = sat (between 1 127 <<< C.toCharCode)
|
||||
|
||||
-- | CR: carriage return.
|
||||
-- |
|
||||
@ -37,12 +43,19 @@ crlf :: forall e. Parser e Unit
|
||||
crlf = do _ <- char '\r'
|
||||
void $ char '\n'
|
||||
|
||||
--CTL = %x00-1F / %x7F
|
||||
-- ; controls
|
||||
--
|
||||
-- | CTL: control characters.
|
||||
-- |
|
||||
-- | CTL = %x00-1F / %x7F
|
||||
ctl :: forall e. Parser e Char
|
||||
ctl = sat cond
|
||||
where cond x = (between 0 31 $ C.toCharCode x)
|
||||
|| C.toCharCode x == 127
|
||||
|
||||
--DIGIT = %x30-39
|
||||
-- ; 0-9
|
||||
--
|
||||
digit :: forall e. Parser e Char
|
||||
digit = sat isDigit
|
||||
|
||||
--DQUOTE = %x22
|
||||
-- ; " (Double Quote)
|
||||
|
||||
@ -84,7 +97,7 @@ sp = char ' '
|
||||
-- |
|
||||
-- | Visible printing characters.
|
||||
vchar :: forall e. Parser e Char
|
||||
vchar = sat (\x -> between 33 126 $ C.toCharCode x)
|
||||
vchar = sat (between 33 126 <<< C.toCharCode)
|
||||
|
||||
-- | WSP: white space.
|
||||
-- |
|
||||
|
70
src/GenericParser/SomeParsers.purs
Normal file
70
src/GenericParser/SomeParsers.purs
Normal file
@ -0,0 +1,70 @@
|
||||
module GenericParser.SomeParsers where
|
||||
import Prelude (Unit, bind, discard, negate, pure, unit, ($))
|
||||
|
||||
import Control.Alt ((<|>))
|
||||
import Control.Plus (empty)
|
||||
import Data.Array as A
|
||||
import Data.Maybe (Maybe(..))
|
||||
import Data.Int as Int
|
||||
import Data.String as S
|
||||
import Data.String.CodeUnits as CU
|
||||
|
||||
import GenericParser.Parser
|
||||
import GenericParser.BaseFunctions (isAlpha, isAlphaNum, isLower, isSpace, isUpper)
|
||||
import GenericParser.RFC5234 (digit)
|
||||
|
||||
lower :: forall e. Parser e Char
|
||||
lower = sat isLower
|
||||
|
||||
upper :: forall e. Parser e Char
|
||||
upper = sat isUpper
|
||||
|
||||
letter :: forall e. Parser e Char
|
||||
letter = sat isAlpha
|
||||
|
||||
alphanum :: forall e. Parser e Char
|
||||
alphanum = sat isAlphaNum
|
||||
|
||||
ident :: forall e. Parser e String
|
||||
ident = do x <- lower
|
||||
xs <- A.many alphanum
|
||||
pure (CU.fromCharArray $ A.cons x xs)
|
||||
|
||||
nat :: forall e. Parser e Int
|
||||
nat = do xs <- A.some digit
|
||||
case Int.fromString (CU.fromCharArray xs) of
|
||||
Nothing -> empty
|
||||
Just x -> pure x
|
||||
|
||||
int :: forall e. Parser e Int
|
||||
int = do _ <- char '-'
|
||||
n <- nat
|
||||
pure (-n)
|
||||
<|> nat
|
||||
|
||||
space :: forall e. Parser e Unit
|
||||
space = do _ <- A.many (sat isSpace)
|
||||
pure unit
|
||||
|
||||
token :: forall e a. Parser e a -> Parser e a
|
||||
token p = do space
|
||||
v <- p
|
||||
_ <- space
|
||||
pure v
|
||||
|
||||
identifier :: forall e. Parser e String
|
||||
identifier = token ident
|
||||
|
||||
natural :: forall e. Parser e Int
|
||||
natural = token nat
|
||||
|
||||
integer :: forall e. Parser e Int
|
||||
integer = token int
|
||||
|
||||
symbol :: forall e. String -> Parser e String
|
||||
symbol xs = token (string xs)
|
||||
|
||||
eof :: forall e. Parser e Unit
|
||||
eof = Parser \input -> case S.length input.string of
|
||||
0 -> success input unit
|
||||
_ -> failure input.position
|
Loading…
Reference in New Issue
Block a user