parser/src/GenericParser/Parser.purs

225 lines
9.1 KiB
Text

module GenericParser.Parser where
import Prelude (between, (<<<), bind, (==), ($), pure, class Bind, unit, (+), (-), (<>), class Applicative, class Functor, Unit, class Apply)
import Control.Alt (class Alt)
import Control.Alternative (class Alternative)
import Control.Lazy (class Lazy, defer)
import Control.Plus (class Plus)
import Data.Array as A
import Data.Char as C
import Data.Either (Either(..))
import Data.Maybe (Maybe(..), maybe)
import Data.String.CodeUnits as CU
import GenericParser.BaseFunctions (concat)
type Position = Int
type PositionString = { string :: String, position :: Position }
type Input = PositionString
type Error e = { position :: Position, error :: Maybe e }
type Value v = { result :: v, suffix :: Input }
type Result e v = Either (Error e) (Value v)
newtype Parser e v = Parser (Input -> Result e v)
--newtype Parser v = Parser (String -> Maybe (Tuple v String))
parse :: forall e v. Parser e v -> (Input -> Result e v)
parse (Parser p) = p
-- | `current_position` provides the current position in the input.
-- | This is used notably before actual parsing and to provide the
-- | starting position where parsing failed, not just the last character position.
-- |
-- | This function cannot fail since no parsing is performed.
current_position :: forall e. Parser e Position
current_position = Parser \input -> success input input.position
-- | `current_input` provides the current state of the input.
-- | This is used notably to look ahead.
-- |
-- | This function cannot fail since no parsing is performed.
current_input :: forall e. Parser e Input
current_input = Parser \input -> success input input
-- | `rollback` replaces the current input.
-- |
-- | This function cannot fail since no parsing is performed.
rollback :: forall e. Input -> Parser e Unit
rollback newinput = Parser \_ -> success newinput unit
-- | Fail with a specified error.
-- | When a parsing has a specified error, no alternative will be tried and the error is reported.
failureError :: forall e v. Position -> Maybe e -> Result e v
failureError position error = Left { position, error }
-- | Fail without a specified error.
-- | This is used in generic parsers not attached to a specified context,
-- | such as `digit` or `letter`.
-- | Also, this can be used to express a possibly expected invalid parsing that should not
-- | halt the parsing, but rather let an alternative path to be tried.
failure :: forall e v. Position -> Result e v
failure position = failureError position Nothing
-- | `success` constructs a result value for a successful parsing.
-- | It requires the input (a string with its current position) and the result.
success :: forall e v. Input -> v -> Result e v
success suffix result = Right { suffix, result }
-- | Read a single char from the input.
item :: forall e. Parser e Char
item = Parser p
where
p input = case A.uncons (CU.toCharArray input.string) of
Nothing -> failure input.position
Just { head: x, tail: xs } -> success { string: (CU.fromCharArray xs), position: input.position+1 } x
instance functorParser :: Functor (Parser e) where
map :: forall a b. (a -> b) -> Parser e a -> Parser e b
map f (Parser p) =
Parser $ \s0 -> case p s0 of
Right { suffix, result } -> success suffix (f result)
Left error -> Left error
instance applyParser :: Apply (Parser e) where
apply (Parser p1) (Parser p2)
= Parser $ \s0 -> case p1 s0 of
Right { suffix: suffix1, result: result1 } -> case p2 suffix1 of
Right { suffix: suffix2, result: result2 } -> success suffix2 (result1 result2)
Left error -> Left error
Left error -> Left error
instance applicativeParser :: Applicative (Parser e) where
pure a = Parser $ \input -> success input a
instance bindParser :: Bind (Parser e) where
bind (Parser p) f = Parser $ \s0 -> case p s0 of
Right { suffix, result } -> parse (f result) suffix
Left error -> Left error
-- | `Alt` instance: in case of an error that produced an error value, the computation stops.
instance altParser :: Alt (Parser e) where
alt :: forall v. Parser e v -> Parser e v -> Parser e v
alt (Parser p1) (Parser p2) = Parser p
where
p stream = case p1 stream of
Left { position, error } -> case error of
Nothing -> p2 stream
_ -> failureError position error
Right right -> Right right
instance plusParser :: Plus (Parser e) where
empty :: forall v. Parser e v
empty = Parser \input -> failure input.position
instance alternativeParser :: Alternative (Parser e)
instance lazyParser :: Lazy (Parser e v) where
defer f = Parser \input -> parse (f unit) input
-- Generic parsing functions.
-- | `tryMaybe` provides a way to accept a faulty parser and
-- | just rewinds back to previous input state if an error occurs.
tryMaybe :: forall e a. Parser e a -> Parser e (Maybe a)
tryMaybe p = Parser p'
where p' input = case parse p input of
Left _ -> success input Nothing
Right { suffix, result } -> success suffix (Just result)
-- | `try` provides a way to accept a faulty parser and
-- | just rewinds back to previous input state if a non-specific error occurs.
-- | The difference with `tryMaybe` is that `try` will forward the error if it is
-- | a specific one, meaning that `error` isn't `Nothing`.
try :: forall e a. Parser e a -> Parser e (Maybe a)
try p = Parser p'
where p' input = case parse p input of
Right { suffix, result } -> success suffix (Just result)
Left { position, error } -> case error of
Nothing -> success input Nothing
_ -> failureError position error
sat :: forall e. (Char -> Boolean) -> Parser e Char
sat p = do pos <- current_position
x <- item
if p x then pure x else Parser \_ -> failure pos
char :: forall e. Char -> Parser e Char
char x = sat (_ == x)
-- | `char_num` checks a character based on its decimal code number.
-- | Useful mainly to parse unprintable characters.
-- |
-- | Example: `char_num 32` checks for a space, such as `char ' '`.
char_num :: forall e. Int -> Parser e Char
char_num n = sat (\c -> n == C.toCharCode c)
-- | `char_range` checks a character based on a range of possible decimal code numbers.
-- |
-- | Example, checking all visible ASCII characters: `char_range 33 126`.
char_range :: forall e. Int -> Int -> Parser e Char
char_range n1 n2 = sat (\c -> between n1 n2 $ C.toCharCode c)
string :: forall e. String -> Parser e String
string str = case A.uncons (CU.toCharArray str) of
Nothing -> Parser \input -> success input ""
Just { head: x, tail: xs } -> do c <- char x
rest <- string (CU.fromCharArray xs)
pure (concat c rest)
many1 :: forall e v. Parser e v -> Parser e (Array v)
many1 p = do first <- p
rest <- A.many p
pure $ A.cons first rest
-- | `lookahead` reads an input but doesn't consume it.
lookahead :: forall e v. Parser e v -> Parser e v
lookahead p = do input <- current_input
v <- p
_ <- rollback input
pure $ v
-- | `until` parses the input until an ending parser succeed.
-- | Arguments are the end-parser then the parser to read the input.
until :: forall e v. Parser e v -> Parser e v -> Parser e (Array v)
until parser_end p = do
input <- current_input
case parse parser_end input of
Left _ -> do v <- p
mayberest <- tryMaybe $ defer \_ -> until parser_end p
pure $ [v] <> maybe [] id mayberest
Right _ -> Parser \_ -> failure input.position
where id x = x
-- | Parse the last character of a String.
-- | Return false in case the string is empty.
parse_last_char :: forall e. String -> Parser e Char -> Boolean
parse_last_char s p = case last_char s of
Nothing -> false
Just c -> case parse p { string: CU.singleton c, position: 0 } of
Left _ -> false
_ -> true
where
-- Get the last character of a String.
last_char :: String -> Maybe Char
last_char = A.last <<< CU.toCharArray
-- | `read_input`: take a parser and provide the whole read input segment, unaltered.
-- | This function allows to create complex parsers only reading input, not re-assembling it once read.
-- | This prevents errors while concatenating strings while the unaltered input already is there to be used.
-- |
-- | This function can be used to verify a parser doesn't alter its input.
-- |
-- | Example, the following function `func` uses a combination of several
-- | parsers without having to handle the input in any way (no manipulation whatsoever).
-- | Yet, thanks to `read_input` the returned value of `func` is the whole, unaltered read input.
-- | ```
-- | func :: forall e. Parser e String
-- | func = read_input do _ <- something
-- | _ <- tryMaybe something_else
-- | void very_complex_stuff
-- | ```
read_input :: forall e s. Parser e s -> Parser e String
read_input p = do input <- current_input
_ <- p
endpos <- current_position
pure $ CU.take (endpos - input.position) input.string