Big improvement on the base structure type.
This commit is contained in:
parent
0702ba184e
commit
efe41a0d3f
@ -12,14 +12,17 @@ import Data.String.CodeUnits as CU
|
||||
import Control.Alt ((<|>))
|
||||
import Control.Plus (empty)
|
||||
|
||||
import Parser (Parser(..), Position
|
||||
, success, fail, failError
|
||||
, alphanum, char, letter, many1, parse, string)
|
||||
|
||||
type Size = Int
|
||||
data DomainError
|
||||
= SubdomainTooLarge Position Size
|
||||
| DomainTooLarge Size
|
||||
| InvalidCharacter Position
|
||||
| EOFExpected
|
||||
|
||||
import Parser (Parser(..), success, fail, failError, alphanum, char, letter, many1, parse, string)
|
||||
|
||||
-- | From RFC 1035: <let-dig> ::= <letter> | <digit>
|
||||
let_dig :: forall e. Parser e Char
|
||||
let_dig = alphanum
|
||||
@ -47,7 +50,7 @@ last_char = A.last <<< CU.toCharArray
|
||||
parse_last_char :: forall e. String -> Parser e Char -> Boolean
|
||||
parse_last_char s p = case last_char s of
|
||||
Nothing -> false
|
||||
Just c -> case parse p (CU.singleton c) of
|
||||
Just c -> case parse p { string: CU.singleton c, position: 0 } of
|
||||
Left _ -> false
|
||||
_ -> true
|
||||
|
||||
@ -56,21 +59,20 @@ parse_last_char s p = case last_char s of
|
||||
-- | the code will "continue to work" but without what's been parsed.
|
||||
-- | This may sound reasonable but it prevents knowing if a problem actually occured!
|
||||
-- | We cannot do a `try parser <|> alternative` since it will always work!
|
||||
try :: forall e a. Parser e a -> Parser e a
|
||||
try :: forall e a. Parser e a -> Parser e (Maybe a)
|
||||
try p = Parser p'
|
||||
where p' str = case parse p str of
|
||||
Left { pos: _, input: _, error: e } -> case e of
|
||||
Nothing -> fail 0 str
|
||||
error -> failError 0 str error
|
||||
right -> right
|
||||
-- Right { pos: pos, input: input, result: value } -> success pos input value
|
||||
where p' input = case parse p input of
|
||||
Left _ -> Right { suffix: input, result: Nothing }
|
||||
Right { suffix, result } -> Right { suffix, result: Just result }
|
||||
|
||||
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
|
||||
label :: forall e. Parser e String
|
||||
label = do
|
||||
l <- letter
|
||||
s <- try ldh_str
|
||||
let labelstr = CU.singleton l <> CU.fromCharArray s
|
||||
let labelstr = CU.singleton l <> case s of
|
||||
Nothing -> ""
|
||||
Just content -> CU.fromCharArray content
|
||||
if (S.length labelstr > label_maxsize)
|
||||
then empty
|
||||
else if (S.length labelstr > 1 && not (parse_last_char labelstr let_dig))
|
||||
@ -86,31 +88,31 @@ subdomain = do
|
||||
_ <- char '.'
|
||||
sub <- defer \_ -> subdomain
|
||||
pure sub
|
||||
if (S.length upperlabels == 0) -- This is related to the problem of not having a proper base structure.
|
||||
then pure lab
|
||||
else pure $ lab <> "." <> upperlabels
|
||||
case upperlabels of
|
||||
Nothing -> pure lab
|
||||
Just l -> pure $ lab <> "." <> l
|
||||
|
||||
-- | Test for the end-of-file (no more input).
|
||||
-- | If not EOF the parser fails (Nothing), otherwise it provides an empty string.
|
||||
eof :: forall e. Parser e String
|
||||
eof = Parser \str -> case S.length str of
|
||||
0 -> success 0 str ""
|
||||
_ -> failError 0 str (Just EOFExpected)
|
||||
eof :: Parser DomainError String
|
||||
eof = Parser \input -> case S.length input.string of
|
||||
0 -> success input ""
|
||||
_ -> failError input.position (Just EOFExpected)
|
||||
|
||||
-- | Test for the domain to be a list of subdomains then an end-of-file.
|
||||
-- | Said otherwise, the input must only contain a domain (with or without a final dot '.').
|
||||
sub_eof :: forall e. Parser e String
|
||||
sub_eof :: Parser DomainError String
|
||||
sub_eof = do
|
||||
sub <- subdomain
|
||||
maybe_final_point <- try char '.'
|
||||
maybe_final_point <- try $ char '.'
|
||||
_ <- eof -- In case there is still some input, it fails.
|
||||
let parsed_domain = did_we_parsed_the_final_point maybe_final_point sub
|
||||
if S.length parsed_domain > max_domain_length
|
||||
then empty -- TODO: error management.
|
||||
else pure parsed_domain
|
||||
where
|
||||
did_we_parsed_the_final_point '.' sub = sub <> "."
|
||||
did_we_parsed_the_final_point _ sub = sub
|
||||
did_we_parsed_the_final_point Nothing sub = sub
|
||||
did_we_parsed_the_final_point _ sub = sub <> "."
|
||||
|
||||
-- | From RFC 1035: <domain> ::= <subdomain> | " "
|
||||
-- |
|
||||
@ -119,5 +121,5 @@ sub_eof = do
|
||||
-- | However, this last '.' character should be acceptable in most applications.
|
||||
-- | In some cases, a fully qualified domain name (FQDN) such as `example.com.`
|
||||
-- | has to be differenciated from a "relative" name (www).
|
||||
domain :: forall e. Parser e String
|
||||
domain :: Parser DomainError String
|
||||
domain = (string " " *> eof) <|> sub_eof
|
||||
|
@ -10,16 +10,17 @@ import Data.Array as A
|
||||
import Data.Int as Int
|
||||
import Data.Either (Either(..))
|
||||
import Data.Maybe (Maybe(..))
|
||||
import Data.String.CodeUnits (uncons, toCharArray, fromCharArray)
|
||||
import Data.String.CodeUnits (toCharArray, fromCharArray)
|
||||
-- import Data.Tuple (Tuple(..))
|
||||
|
||||
import BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper)
|
||||
|
||||
type Input = (Tuple Position String)
|
||||
type Size = Int
|
||||
type Position = Int
|
||||
type Error e = { pos :: Position, input :: String, error :: Maybe e }
|
||||
type Value v = { pos :: Position, input :: String, result :: v }
|
||||
type PosString = { string :: String, position :: Position }
|
||||
type Input = PosString
|
||||
|
||||
type Error e = { position :: Position, error :: Maybe e }
|
||||
type Value v = { result :: v, suffix :: Input }
|
||||
type Result e v = Either (Error e) (Value v)
|
||||
|
||||
newtype Parser e v = Parser (Input -> Result e v)
|
||||
@ -27,64 +28,77 @@ newtype Parser e v = Parser (Input -> Result e v)
|
||||
parse :: forall e v. Parser e v -> (Input -> Result e v)
|
||||
parse (Parser p) = p
|
||||
|
||||
failError :: forall e v. Position -> Input -> Maybe e -> Result e v
|
||||
failError pos input error = Left { pos: pos, error: error, input: input }
|
||||
-- | Fail with a specified error.
|
||||
-- | When a parsing has a specified error, no alternative will be tried and the error is reported.
|
||||
-- TODO: stop the parsing when an error is provided.
|
||||
failError :: forall e v. Position -> Maybe e -> Result e v
|
||||
failError position error = Left { position, error }
|
||||
|
||||
fail :: forall e v. Position -> Input -> Result e v
|
||||
fail pos input = failError pos input Nothing
|
||||
-- | Fail without a specified error.
|
||||
-- | This is used in generic parsers not attached to a specified context,
|
||||
-- | such as `digit` or `letter`.
|
||||
-- | Also, this can be used to express a possibly expected invalid parsing that should not
|
||||
-- | halt the parsing, but rather let an alternative path to be tried.
|
||||
-- TODO: stop the parsing when an error is provided.
|
||||
fail :: forall e v. Position -> Result e v
|
||||
fail position = failError position Nothing
|
||||
|
||||
success :: forall e v. Position -> Input -> v -> Result e v
|
||||
success pos input v = Right { pos: pos, input: input, result: v }
|
||||
-- | `success` constructs a result value for a successful parsing.
|
||||
-- | It requires the input (a string with its current position) and the result.
|
||||
success :: forall e v. Input -> v -> Result e v
|
||||
success suffix result = Right { suffix, result }
|
||||
|
||||
-- | Read a single char from the input.
|
||||
item :: forall e. Parser e Char
|
||||
item = Parser p
|
||||
where p str = case uncons str of
|
||||
Nothing -> fail 0 str
|
||||
Just { head: x, tail: xs } -> success 1 xs x
|
||||
where
|
||||
p input = case A.uncons (toCharArray input.string) of
|
||||
Nothing -> fail input.position
|
||||
Just { head: x, tail: xs } -> success (input { string = (fromCharArray xs)
|
||||
, position = input.position+1 }) x
|
||||
|
||||
instance functorParser :: Functor (Parser e) where
|
||||
map :: forall a b. (a -> b) -> Parser e a -> Parser e b
|
||||
map f (Parser p) =
|
||||
Parser $ \s0 -> case p s0 of
|
||||
Right { pos: pos, input: input, result: v } -> success pos input (f v)
|
||||
Left { pos: pos, input: input, error: e } -> failError pos input e
|
||||
Right { suffix, result } -> success suffix (f result)
|
||||
Left error -> Left error
|
||||
|
||||
instance applyParser :: Apply (Parser e) where
|
||||
apply (Parser p1) (Parser p2)
|
||||
= Parser $ \s0 -> case p1 s0 of
|
||||
Left error -> Left error
|
||||
Right { pos: pos1, input: s1, result: r1 } -> case p2 s1 of
|
||||
Right { suffix: suffix1, result: result1 } -> case p2 suffix1 of
|
||||
Right { suffix: suffix2, result: result2 } -> success suffix2 (result1 result2)
|
||||
Left error -> Left error
|
||||
Right { pos: pos2, input: s2, result: r2 } -> success (pos1 + pos2) s2 (r1 r2)
|
||||
Left error -> Left error
|
||||
|
||||
instance applicativeParser :: Applicative (Parser e) where
|
||||
pure a = Parser $ \str -> success 0 str a
|
||||
pure a = Parser $ \input -> success input a
|
||||
|
||||
instance bindParser :: Bind (Parser e) where
|
||||
bind (Parser p) f = Parser $ \s0 -> case p s0 of
|
||||
Left error -> Left error
|
||||
Right { pos: _, input: input, result: result } ->
|
||||
let (Parser p2) = f result
|
||||
in p2 input
|
||||
Right { suffix, result } -> parse (f result) suffix
|
||||
Left error -> Left error
|
||||
|
||||
-- | `Alt` instance: in case of an error that produced an error value, the computation stops.
|
||||
instance altParser :: Alt (Parser e) where
|
||||
alt :: forall v. Parser e v -> Parser e v -> Parser e v
|
||||
alt (Parser p1) (Parser p2) = Parser p
|
||||
where
|
||||
p stream = case p1 stream of
|
||||
Left { pos: pos, error: error, input: input } -> case error of
|
||||
Left { position, error } -> case error of
|
||||
Nothing -> p2 stream
|
||||
_ -> failError pos input error
|
||||
right -> right
|
||||
_ -> failError position error
|
||||
Right right -> Right right
|
||||
|
||||
instance plusParser :: Plus (Parser e) where
|
||||
empty :: forall v. Parser e v
|
||||
empty = Parser \input -> fail 0 input
|
||||
empty = Parser \_ -> fail 0
|
||||
|
||||
instance alternativeParser :: Alternative (Parser e)
|
||||
|
||||
instance lazyParser :: Lazy (Parser e v) where
|
||||
defer f = Parser \str -> parse (f unit) str
|
||||
defer f = Parser \input -> parse (f unit) input
|
||||
|
||||
-- Generic parsing functions.
|
||||
|
||||
@ -112,7 +126,7 @@ char x = sat (_ == x)
|
||||
|
||||
string :: forall e. String -> Parser e String
|
||||
string str = case A.uncons (toCharArray str) of
|
||||
Nothing -> Parser \stream -> success 0 stream ""
|
||||
Nothing -> Parser \input -> success input ""
|
||||
Just { head: x, tail: xs } -> do c <- char x
|
||||
rest <- string (fromCharArray xs)
|
||||
pure (concat c rest)
|
||||
|
Loading…
Reference in New Issue
Block a user