Big improvement on the base structure type.
This commit is contained in:
parent
0702ba184e
commit
efe41a0d3f
@ -12,14 +12,17 @@ import Data.String.CodeUnits as CU
|
|||||||
import Control.Alt ((<|>))
|
import Control.Alt ((<|>))
|
||||||
import Control.Plus (empty)
|
import Control.Plus (empty)
|
||||||
|
|
||||||
|
import Parser (Parser(..), Position
|
||||||
|
, success, fail, failError
|
||||||
|
, alphanum, char, letter, many1, parse, string)
|
||||||
|
|
||||||
|
type Size = Int
|
||||||
data DomainError
|
data DomainError
|
||||||
= SubdomainTooLarge Position Size
|
= SubdomainTooLarge Position Size
|
||||||
| DomainTooLarge Size
|
| DomainTooLarge Size
|
||||||
| InvalidCharacter Position
|
| InvalidCharacter Position
|
||||||
| EOFExpected
|
| EOFExpected
|
||||||
|
|
||||||
import Parser (Parser(..), success, fail, failError, alphanum, char, letter, many1, parse, string)
|
|
||||||
|
|
||||||
-- | From RFC 1035: <let-dig> ::= <letter> | <digit>
|
-- | From RFC 1035: <let-dig> ::= <letter> | <digit>
|
||||||
let_dig :: forall e. Parser e Char
|
let_dig :: forall e. Parser e Char
|
||||||
let_dig = alphanum
|
let_dig = alphanum
|
||||||
@ -47,7 +50,7 @@ last_char = A.last <<< CU.toCharArray
|
|||||||
parse_last_char :: forall e. String -> Parser e Char -> Boolean
|
parse_last_char :: forall e. String -> Parser e Char -> Boolean
|
||||||
parse_last_char s p = case last_char s of
|
parse_last_char s p = case last_char s of
|
||||||
Nothing -> false
|
Nothing -> false
|
||||||
Just c -> case parse p (CU.singleton c) of
|
Just c -> case parse p { string: CU.singleton c, position: 0 } of
|
||||||
Left _ -> false
|
Left _ -> false
|
||||||
_ -> true
|
_ -> true
|
||||||
|
|
||||||
@ -56,21 +59,20 @@ parse_last_char s p = case last_char s of
|
|||||||
-- | the code will "continue to work" but without what's been parsed.
|
-- | the code will "continue to work" but without what's been parsed.
|
||||||
-- | This may sound reasonable but it prevents knowing if a problem actually occured!
|
-- | This may sound reasonable but it prevents knowing if a problem actually occured!
|
||||||
-- | We cannot do a `try parser <|> alternative` since it will always work!
|
-- | We cannot do a `try parser <|> alternative` since it will always work!
|
||||||
try :: forall e a. Parser e a -> Parser e a
|
try :: forall e a. Parser e a -> Parser e (Maybe a)
|
||||||
try p = Parser p'
|
try p = Parser p'
|
||||||
where p' str = case parse p str of
|
where p' input = case parse p input of
|
||||||
Left { pos: _, input: _, error: e } -> case e of
|
Left _ -> Right { suffix: input, result: Nothing }
|
||||||
Nothing -> fail 0 str
|
Right { suffix, result } -> Right { suffix, result: Just result }
|
||||||
error -> failError 0 str error
|
|
||||||
right -> right
|
|
||||||
-- Right { pos: pos, input: input, result: value } -> success pos input value
|
|
||||||
|
|
||||||
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
|
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
|
||||||
label :: forall e. Parser e String
|
label :: forall e. Parser e String
|
||||||
label = do
|
label = do
|
||||||
l <- letter
|
l <- letter
|
||||||
s <- try ldh_str
|
s <- try ldh_str
|
||||||
let labelstr = CU.singleton l <> CU.fromCharArray s
|
let labelstr = CU.singleton l <> case s of
|
||||||
|
Nothing -> ""
|
||||||
|
Just content -> CU.fromCharArray content
|
||||||
if (S.length labelstr > label_maxsize)
|
if (S.length labelstr > label_maxsize)
|
||||||
then empty
|
then empty
|
||||||
else if (S.length labelstr > 1 && not (parse_last_char labelstr let_dig))
|
else if (S.length labelstr > 1 && not (parse_last_char labelstr let_dig))
|
||||||
@ -86,31 +88,31 @@ subdomain = do
|
|||||||
_ <- char '.'
|
_ <- char '.'
|
||||||
sub <- defer \_ -> subdomain
|
sub <- defer \_ -> subdomain
|
||||||
pure sub
|
pure sub
|
||||||
if (S.length upperlabels == 0) -- This is related to the problem of not having a proper base structure.
|
case upperlabels of
|
||||||
then pure lab
|
Nothing -> pure lab
|
||||||
else pure $ lab <> "." <> upperlabels
|
Just l -> pure $ lab <> "." <> l
|
||||||
|
|
||||||
-- | Test for the end-of-file (no more input).
|
-- | Test for the end-of-file (no more input).
|
||||||
-- | If not EOF the parser fails (Nothing), otherwise it provides an empty string.
|
-- | If not EOF the parser fails (Nothing), otherwise it provides an empty string.
|
||||||
eof :: forall e. Parser e String
|
eof :: Parser DomainError String
|
||||||
eof = Parser \str -> case S.length str of
|
eof = Parser \input -> case S.length input.string of
|
||||||
0 -> success 0 str ""
|
0 -> success input ""
|
||||||
_ -> failError 0 str (Just EOFExpected)
|
_ -> failError input.position (Just EOFExpected)
|
||||||
|
|
||||||
-- | Test for the domain to be a list of subdomains then an end-of-file.
|
-- | Test for the domain to be a list of subdomains then an end-of-file.
|
||||||
-- | Said otherwise, the input must only contain a domain (with or without a final dot '.').
|
-- | Said otherwise, the input must only contain a domain (with or without a final dot '.').
|
||||||
sub_eof :: forall e. Parser e String
|
sub_eof :: Parser DomainError String
|
||||||
sub_eof = do
|
sub_eof = do
|
||||||
sub <- subdomain
|
sub <- subdomain
|
||||||
maybe_final_point <- try char '.'
|
maybe_final_point <- try $ char '.'
|
||||||
_ <- eof -- In case there is still some input, it fails.
|
_ <- eof -- In case there is still some input, it fails.
|
||||||
let parsed_domain = did_we_parsed_the_final_point maybe_final_point sub
|
let parsed_domain = did_we_parsed_the_final_point maybe_final_point sub
|
||||||
if S.length parsed_domain > max_domain_length
|
if S.length parsed_domain > max_domain_length
|
||||||
then empty -- TODO: error management.
|
then empty -- TODO: error management.
|
||||||
else pure parsed_domain
|
else pure parsed_domain
|
||||||
where
|
where
|
||||||
did_we_parsed_the_final_point '.' sub = sub <> "."
|
did_we_parsed_the_final_point Nothing sub = sub
|
||||||
did_we_parsed_the_final_point _ sub = sub
|
did_we_parsed_the_final_point _ sub = sub <> "."
|
||||||
|
|
||||||
-- | From RFC 1035: <domain> ::= <subdomain> | " "
|
-- | From RFC 1035: <domain> ::= <subdomain> | " "
|
||||||
-- |
|
-- |
|
||||||
@ -119,5 +121,5 @@ sub_eof = do
|
|||||||
-- | However, this last '.' character should be acceptable in most applications.
|
-- | However, this last '.' character should be acceptable in most applications.
|
||||||
-- | In some cases, a fully qualified domain name (FQDN) such as `example.com.`
|
-- | In some cases, a fully qualified domain name (FQDN) such as `example.com.`
|
||||||
-- | has to be differenciated from a "relative" name (www).
|
-- | has to be differenciated from a "relative" name (www).
|
||||||
domain :: forall e. Parser e String
|
domain :: Parser DomainError String
|
||||||
domain = (string " " *> eof) <|> sub_eof
|
domain = (string " " *> eof) <|> sub_eof
|
||||||
|
@ -10,16 +10,17 @@ import Data.Array as A
|
|||||||
import Data.Int as Int
|
import Data.Int as Int
|
||||||
import Data.Either (Either(..))
|
import Data.Either (Either(..))
|
||||||
import Data.Maybe (Maybe(..))
|
import Data.Maybe (Maybe(..))
|
||||||
import Data.String.CodeUnits (uncons, toCharArray, fromCharArray)
|
import Data.String.CodeUnits (toCharArray, fromCharArray)
|
||||||
-- import Data.Tuple (Tuple(..))
|
-- import Data.Tuple (Tuple(..))
|
||||||
|
|
||||||
import BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper)
|
import BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper)
|
||||||
|
|
||||||
type Input = (Tuple Position String)
|
|
||||||
type Size = Int
|
|
||||||
type Position = Int
|
type Position = Int
|
||||||
type Error e = { pos :: Position, input :: String, error :: Maybe e }
|
type PosString = { string :: String, position :: Position }
|
||||||
type Value v = { pos :: Position, input :: String, result :: v }
|
type Input = PosString
|
||||||
|
|
||||||
|
type Error e = { position :: Position, error :: Maybe e }
|
||||||
|
type Value v = { result :: v, suffix :: Input }
|
||||||
type Result e v = Either (Error e) (Value v)
|
type Result e v = Either (Error e) (Value v)
|
||||||
|
|
||||||
newtype Parser e v = Parser (Input -> Result e v)
|
newtype Parser e v = Parser (Input -> Result e v)
|
||||||
@ -27,64 +28,77 @@ newtype Parser e v = Parser (Input -> Result e v)
|
|||||||
parse :: forall e v. Parser e v -> (Input -> Result e v)
|
parse :: forall e v. Parser e v -> (Input -> Result e v)
|
||||||
parse (Parser p) = p
|
parse (Parser p) = p
|
||||||
|
|
||||||
failError :: forall e v. Position -> Input -> Maybe e -> Result e v
|
-- | Fail with a specified error.
|
||||||
failError pos input error = Left { pos: pos, error: error, input: input }
|
-- | When a parsing has a specified error, no alternative will be tried and the error is reported.
|
||||||
|
-- TODO: stop the parsing when an error is provided.
|
||||||
|
failError :: forall e v. Position -> Maybe e -> Result e v
|
||||||
|
failError position error = Left { position, error }
|
||||||
|
|
||||||
fail :: forall e v. Position -> Input -> Result e v
|
-- | Fail without a specified error.
|
||||||
fail pos input = failError pos input Nothing
|
-- | This is used in generic parsers not attached to a specified context,
|
||||||
|
-- | such as `digit` or `letter`.
|
||||||
|
-- | Also, this can be used to express a possibly expected invalid parsing that should not
|
||||||
|
-- | halt the parsing, but rather let an alternative path to be tried.
|
||||||
|
-- TODO: stop the parsing when an error is provided.
|
||||||
|
fail :: forall e v. Position -> Result e v
|
||||||
|
fail position = failError position Nothing
|
||||||
|
|
||||||
success :: forall e v. Position -> Input -> v -> Result e v
|
-- | `success` constructs a result value for a successful parsing.
|
||||||
success pos input v = Right { pos: pos, input: input, result: v }
|
-- | It requires the input (a string with its current position) and the result.
|
||||||
|
success :: forall e v. Input -> v -> Result e v
|
||||||
|
success suffix result = Right { suffix, result }
|
||||||
|
|
||||||
|
-- | Read a single char from the input.
|
||||||
item :: forall e. Parser e Char
|
item :: forall e. Parser e Char
|
||||||
item = Parser p
|
item = Parser p
|
||||||
where p str = case uncons str of
|
where
|
||||||
Nothing -> fail 0 str
|
p input = case A.uncons (toCharArray input.string) of
|
||||||
Just { head: x, tail: xs } -> success 1 xs x
|
Nothing -> fail input.position
|
||||||
|
Just { head: x, tail: xs } -> success (input { string = (fromCharArray xs)
|
||||||
|
, position = input.position+1 }) x
|
||||||
|
|
||||||
instance functorParser :: Functor (Parser e) where
|
instance functorParser :: Functor (Parser e) where
|
||||||
map :: forall a b. (a -> b) -> Parser e a -> Parser e b
|
map :: forall a b. (a -> b) -> Parser e a -> Parser e b
|
||||||
map f (Parser p) =
|
map f (Parser p) =
|
||||||
Parser $ \s0 -> case p s0 of
|
Parser $ \s0 -> case p s0 of
|
||||||
Right { pos: pos, input: input, result: v } -> success pos input (f v)
|
Right { suffix, result } -> success suffix (f result)
|
||||||
Left { pos: pos, input: input, error: e } -> failError pos input e
|
Left error -> Left error
|
||||||
|
|
||||||
instance applyParser :: Apply (Parser e) where
|
instance applyParser :: Apply (Parser e) where
|
||||||
apply (Parser p1) (Parser p2)
|
apply (Parser p1) (Parser p2)
|
||||||
= Parser $ \s0 -> case p1 s0 of
|
= Parser $ \s0 -> case p1 s0 of
|
||||||
|
Right { suffix: suffix1, result: result1 } -> case p2 suffix1 of
|
||||||
|
Right { suffix: suffix2, result: result2 } -> success suffix2 (result1 result2)
|
||||||
Left error -> Left error
|
Left error -> Left error
|
||||||
Right { pos: pos1, input: s1, result: r1 } -> case p2 s1 of
|
|
||||||
Left error -> Left error
|
Left error -> Left error
|
||||||
Right { pos: pos2, input: s2, result: r2 } -> success (pos1 + pos2) s2 (r1 r2)
|
|
||||||
|
|
||||||
instance applicativeParser :: Applicative (Parser e) where
|
instance applicativeParser :: Applicative (Parser e) where
|
||||||
pure a = Parser $ \str -> success 0 str a
|
pure a = Parser $ \input -> success input a
|
||||||
|
|
||||||
instance bindParser :: Bind (Parser e) where
|
instance bindParser :: Bind (Parser e) where
|
||||||
bind (Parser p) f = Parser $ \s0 -> case p s0 of
|
bind (Parser p) f = Parser $ \s0 -> case p s0 of
|
||||||
|
Right { suffix, result } -> parse (f result) suffix
|
||||||
Left error -> Left error
|
Left error -> Left error
|
||||||
Right { pos: _, input: input, result: result } ->
|
|
||||||
let (Parser p2) = f result
|
|
||||||
in p2 input
|
|
||||||
|
|
||||||
|
-- | `Alt` instance: in case of an error that produced an error value, the computation stops.
|
||||||
instance altParser :: Alt (Parser e) where
|
instance altParser :: Alt (Parser e) where
|
||||||
alt :: forall v. Parser e v -> Parser e v -> Parser e v
|
alt :: forall v. Parser e v -> Parser e v -> Parser e v
|
||||||
alt (Parser p1) (Parser p2) = Parser p
|
alt (Parser p1) (Parser p2) = Parser p
|
||||||
where
|
where
|
||||||
p stream = case p1 stream of
|
p stream = case p1 stream of
|
||||||
Left { pos: pos, error: error, input: input } -> case error of
|
Left { position, error } -> case error of
|
||||||
Nothing -> p2 stream
|
Nothing -> p2 stream
|
||||||
_ -> failError pos input error
|
_ -> failError position error
|
||||||
right -> right
|
Right right -> Right right
|
||||||
|
|
||||||
instance plusParser :: Plus (Parser e) where
|
instance plusParser :: Plus (Parser e) where
|
||||||
empty :: forall v. Parser e v
|
empty :: forall v. Parser e v
|
||||||
empty = Parser \input -> fail 0 input
|
empty = Parser \_ -> fail 0
|
||||||
|
|
||||||
instance alternativeParser :: Alternative (Parser e)
|
instance alternativeParser :: Alternative (Parser e)
|
||||||
|
|
||||||
instance lazyParser :: Lazy (Parser e v) where
|
instance lazyParser :: Lazy (Parser e v) where
|
||||||
defer f = Parser \str -> parse (f unit) str
|
defer f = Parser \input -> parse (f unit) input
|
||||||
|
|
||||||
-- Generic parsing functions.
|
-- Generic parsing functions.
|
||||||
|
|
||||||
@ -112,7 +126,7 @@ char x = sat (_ == x)
|
|||||||
|
|
||||||
string :: forall e. String -> Parser e String
|
string :: forall e. String -> Parser e String
|
||||||
string str = case A.uncons (toCharArray str) of
|
string str = case A.uncons (toCharArray str) of
|
||||||
Nothing -> Parser \stream -> success 0 stream ""
|
Nothing -> Parser \input -> success input ""
|
||||||
Just { head: x, tail: xs } -> do c <- char x
|
Just { head: x, tail: xs } -> do c <- char x
|
||||||
rest <- string (fromCharArray xs)
|
rest <- string (fromCharArray xs)
|
||||||
pure (concat c rest)
|
pure (concat c rest)
|
||||||
|
Loading…
Reference in New Issue
Block a user