Big improvement on the base structure type.

This commit is contained in:
Philippe Pittoli 2024-01-18 04:51:50 +01:00
parent 0702ba184e
commit efe41a0d3f
2 changed files with 69 additions and 53 deletions

View File

@ -12,14 +12,17 @@ import Data.String.CodeUnits as CU
import Control.Alt ((<|>)) import Control.Alt ((<|>))
import Control.Plus (empty) import Control.Plus (empty)
import Parser (Parser(..), Position
, success, fail, failError
, alphanum, char, letter, many1, parse, string)
type Size = Int
data DomainError data DomainError
= SubdomainTooLarge Position Size = SubdomainTooLarge Position Size
| DomainTooLarge Size | DomainTooLarge Size
| InvalidCharacter Position | InvalidCharacter Position
| EOFExpected | EOFExpected
import Parser (Parser(..), success, fail, failError, alphanum, char, letter, many1, parse, string)
-- | From RFC 1035: <let-dig> ::= <letter> | <digit> -- | From RFC 1035: <let-dig> ::= <letter> | <digit>
let_dig :: forall e. Parser e Char let_dig :: forall e. Parser e Char
let_dig = alphanum let_dig = alphanum
@ -47,7 +50,7 @@ last_char = A.last <<< CU.toCharArray
parse_last_char :: forall e. String -> Parser e Char -> Boolean parse_last_char :: forall e. String -> Parser e Char -> Boolean
parse_last_char s p = case last_char s of parse_last_char s p = case last_char s of
Nothing -> false Nothing -> false
Just c -> case parse p (CU.singleton c) of Just c -> case parse p { string: CU.singleton c, position: 0 } of
Left _ -> false Left _ -> false
_ -> true _ -> true
@ -56,21 +59,20 @@ parse_last_char s p = case last_char s of
-- | the code will "continue to work" but without what's been parsed. -- | the code will "continue to work" but without what's been parsed.
-- | This may sound reasonable but it prevents knowing if a problem actually occured! -- | This may sound reasonable but it prevents knowing if a problem actually occured!
-- | We cannot do a `try parser <|> alternative` since it will always work! -- | We cannot do a `try parser <|> alternative` since it will always work!
try :: forall e a. Parser e a -> Parser e a try :: forall e a. Parser e a -> Parser e (Maybe a)
try p = Parser p' try p = Parser p'
where p' str = case parse p str of where p' input = case parse p input of
Left { pos: _, input: _, error: e } -> case e of Left _ -> Right { suffix: input, result: Nothing }
Nothing -> fail 0 str Right { suffix, result } -> Right { suffix, result: Just result }
error -> failError 0 str error
right -> right
-- Right { pos: pos, input: input, result: value } -> success pos input value
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ] -- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
label :: forall e. Parser e String label :: forall e. Parser e String
label = do label = do
l <- letter l <- letter
s <- try ldh_str s <- try ldh_str
let labelstr = CU.singleton l <> CU.fromCharArray s let labelstr = CU.singleton l <> case s of
Nothing -> ""
Just content -> CU.fromCharArray content
if (S.length labelstr > label_maxsize) if (S.length labelstr > label_maxsize)
then empty then empty
else if (S.length labelstr > 1 && not (parse_last_char labelstr let_dig)) else if (S.length labelstr > 1 && not (parse_last_char labelstr let_dig))
@ -86,31 +88,31 @@ subdomain = do
_ <- char '.' _ <- char '.'
sub <- defer \_ -> subdomain sub <- defer \_ -> subdomain
pure sub pure sub
if (S.length upperlabels == 0) -- This is related to the problem of not having a proper base structure. case upperlabels of
then pure lab Nothing -> pure lab
else pure $ lab <> "." <> upperlabels Just l -> pure $ lab <> "." <> l
-- | Test for the end-of-file (no more input). -- | Test for the end-of-file (no more input).
-- | If not EOF the parser fails (Nothing), otherwise it provides an empty string. -- | If not EOF the parser fails (Nothing), otherwise it provides an empty string.
eof :: forall e. Parser e String eof :: Parser DomainError String
eof = Parser \str -> case S.length str of eof = Parser \input -> case S.length input.string of
0 -> success 0 str "" 0 -> success input ""
_ -> failError 0 str (Just EOFExpected) _ -> failError input.position (Just EOFExpected)
-- | Test for the domain to be a list of subdomains then an end-of-file. -- | Test for the domain to be a list of subdomains then an end-of-file.
-- | Said otherwise, the input must only contain a domain (with or without a final dot '.'). -- | Said otherwise, the input must only contain a domain (with or without a final dot '.').
sub_eof :: forall e. Parser e String sub_eof :: Parser DomainError String
sub_eof = do sub_eof = do
sub <- subdomain sub <- subdomain
maybe_final_point <- try char '.' maybe_final_point <- try $ char '.'
_ <- eof -- In case there is still some input, it fails. _ <- eof -- In case there is still some input, it fails.
let parsed_domain = did_we_parsed_the_final_point maybe_final_point sub let parsed_domain = did_we_parsed_the_final_point maybe_final_point sub
if S.length parsed_domain > max_domain_length if S.length parsed_domain > max_domain_length
then empty -- TODO: error management. then empty -- TODO: error management.
else pure parsed_domain else pure parsed_domain
where where
did_we_parsed_the_final_point '.' sub = sub <> "." did_we_parsed_the_final_point Nothing sub = sub
did_we_parsed_the_final_point _ sub = sub did_we_parsed_the_final_point _ sub = sub <> "."
-- | From RFC 1035: <domain> ::= <subdomain> | " " -- | From RFC 1035: <domain> ::= <subdomain> | " "
-- | -- |
@ -119,5 +121,5 @@ sub_eof = do
-- | However, this last '.' character should be acceptable in most applications. -- | However, this last '.' character should be acceptable in most applications.
-- | In some cases, a fully qualified domain name (FQDN) such as `example.com.` -- | In some cases, a fully qualified domain name (FQDN) such as `example.com.`
-- | has to be differenciated from a "relative" name (www). -- | has to be differenciated from a "relative" name (www).
domain :: forall e. Parser e String domain :: Parser DomainError String
domain = (string " " *> eof) <|> sub_eof domain = (string " " *> eof) <|> sub_eof

View File

@ -10,16 +10,17 @@ import Data.Array as A
import Data.Int as Int import Data.Int as Int
import Data.Either (Either(..)) import Data.Either (Either(..))
import Data.Maybe (Maybe(..)) import Data.Maybe (Maybe(..))
import Data.String.CodeUnits (uncons, toCharArray, fromCharArray) import Data.String.CodeUnits (toCharArray, fromCharArray)
-- import Data.Tuple (Tuple(..)) -- import Data.Tuple (Tuple(..))
import BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper) import BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper)
type Input = (Tuple Position String)
type Size = Int
type Position = Int type Position = Int
type Error e = { pos :: Position, input :: String, error :: Maybe e } type PosString = { string :: String, position :: Position }
type Value v = { pos :: Position, input :: String, result :: v } type Input = PosString
type Error e = { position :: Position, error :: Maybe e }
type Value v = { result :: v, suffix :: Input }
type Result e v = Either (Error e) (Value v) type Result e v = Either (Error e) (Value v)
newtype Parser e v = Parser (Input -> Result e v) newtype Parser e v = Parser (Input -> Result e v)
@ -27,64 +28,77 @@ newtype Parser e v = Parser (Input -> Result e v)
parse :: forall e v. Parser e v -> (Input -> Result e v) parse :: forall e v. Parser e v -> (Input -> Result e v)
parse (Parser p) = p parse (Parser p) = p
failError :: forall e v. Position -> Input -> Maybe e -> Result e v -- | Fail with a specified error.
failError pos input error = Left { pos: pos, error: error, input: input } -- | When a parsing has a specified error, no alternative will be tried and the error is reported.
-- TODO: stop the parsing when an error is provided.
failError :: forall e v. Position -> Maybe e -> Result e v
failError position error = Left { position, error }
fail :: forall e v. Position -> Input -> Result e v -- | Fail without a specified error.
fail pos input = failError pos input Nothing -- | This is used in generic parsers not attached to a specified context,
-- | such as `digit` or `letter`.
-- | Also, this can be used to express a possibly expected invalid parsing that should not
-- | halt the parsing, but rather let an alternative path to be tried.
-- TODO: stop the parsing when an error is provided.
fail :: forall e v. Position -> Result e v
fail position = failError position Nothing
success :: forall e v. Position -> Input -> v -> Result e v -- | `success` constructs a result value for a successful parsing.
success pos input v = Right { pos: pos, input: input, result: v } -- | It requires the input (a string with its current position) and the result.
success :: forall e v. Input -> v -> Result e v
success suffix result = Right { suffix, result }
-- | Read a single char from the input.
item :: forall e. Parser e Char item :: forall e. Parser e Char
item = Parser p item = Parser p
where p str = case uncons str of where
Nothing -> fail 0 str p input = case A.uncons (toCharArray input.string) of
Just { head: x, tail: xs } -> success 1 xs x Nothing -> fail input.position
Just { head: x, tail: xs } -> success (input { string = (fromCharArray xs)
, position = input.position+1 }) x
instance functorParser :: Functor (Parser e) where instance functorParser :: Functor (Parser e) where
map :: forall a b. (a -> b) -> Parser e a -> Parser e b map :: forall a b. (a -> b) -> Parser e a -> Parser e b
map f (Parser p) = map f (Parser p) =
Parser $ \s0 -> case p s0 of Parser $ \s0 -> case p s0 of
Right { pos: pos, input: input, result: v } -> success pos input (f v) Right { suffix, result } -> success suffix (f result)
Left { pos: pos, input: input, error: e } -> failError pos input e Left error -> Left error
instance applyParser :: Apply (Parser e) where instance applyParser :: Apply (Parser e) where
apply (Parser p1) (Parser p2) apply (Parser p1) (Parser p2)
= Parser $ \s0 -> case p1 s0 of = Parser $ \s0 -> case p1 s0 of
Right { suffix: suffix1, result: result1 } -> case p2 suffix1 of
Right { suffix: suffix2, result: result2 } -> success suffix2 (result1 result2)
Left error -> Left error Left error -> Left error
Right { pos: pos1, input: s1, result: r1 } -> case p2 s1 of
Left error -> Left error Left error -> Left error
Right { pos: pos2, input: s2, result: r2 } -> success (pos1 + pos2) s2 (r1 r2)
instance applicativeParser :: Applicative (Parser e) where instance applicativeParser :: Applicative (Parser e) where
pure a = Parser $ \str -> success 0 str a pure a = Parser $ \input -> success input a
instance bindParser :: Bind (Parser e) where instance bindParser :: Bind (Parser e) where
bind (Parser p) f = Parser $ \s0 -> case p s0 of bind (Parser p) f = Parser $ \s0 -> case p s0 of
Right { suffix, result } -> parse (f result) suffix
Left error -> Left error Left error -> Left error
Right { pos: _, input: input, result: result } ->
let (Parser p2) = f result
in p2 input
-- | `Alt` instance: in case of an error that produced an error value, the computation stops.
instance altParser :: Alt (Parser e) where instance altParser :: Alt (Parser e) where
alt :: forall v. Parser e v -> Parser e v -> Parser e v alt :: forall v. Parser e v -> Parser e v -> Parser e v
alt (Parser p1) (Parser p2) = Parser p alt (Parser p1) (Parser p2) = Parser p
where where
p stream = case p1 stream of p stream = case p1 stream of
Left { pos: pos, error: error, input: input } -> case error of Left { position, error } -> case error of
Nothing -> p2 stream Nothing -> p2 stream
_ -> failError pos input error _ -> failError position error
right -> right Right right -> Right right
instance plusParser :: Plus (Parser e) where instance plusParser :: Plus (Parser e) where
empty :: forall v. Parser e v empty :: forall v. Parser e v
empty = Parser \input -> fail 0 input empty = Parser \_ -> fail 0
instance alternativeParser :: Alternative (Parser e) instance alternativeParser :: Alternative (Parser e)
instance lazyParser :: Lazy (Parser e v) where instance lazyParser :: Lazy (Parser e v) where
defer f = Parser \str -> parse (f unit) str defer f = Parser \input -> parse (f unit) input
-- Generic parsing functions. -- Generic parsing functions.
@ -112,7 +126,7 @@ char x = sat (_ == x)
string :: forall e. String -> Parser e String string :: forall e. String -> Parser e String
string str = case A.uncons (toCharArray str) of string str = case A.uncons (toCharArray str) of
Nothing -> Parser \stream -> success 0 stream "" Nothing -> Parser \input -> success input ""
Just { head: x, tail: xs } -> do c <- char x Just { head: x, tail: xs } -> do c <- char x
rest <- string (fromCharArray xs) rest <- string (fromCharArray xs)
pure (concat c rest) pure (concat c rest)