diff --git a/src/DomainParser.purs b/src/DomainParser.purs index be6d495..7636a5a 100644 --- a/src/DomainParser.purs +++ b/src/DomainParser.purs @@ -1,10 +1,10 @@ -- | `DomainParser` is a simple parser for domain names as described in RFC 1035. module DomainParser where -import Prelude (bind, not, pure, ($), (&&), (*>), (<<<), (<>), (==), (>)) +import Prelude (bind, not, pure, ($), (&&), (*>), (<<<), (<>), (>)) import Control.Lazy (defer) -import Data.Maybe (Maybe(..)) +import Data.Maybe (Maybe(..), maybe) import Data.Either (Either(..)) import Data.Array as A import Data.String as S @@ -13,7 +13,8 @@ import Control.Alt ((<|>)) import Control.Plus (empty) import Parser (Parser(..), Position - , success, fail, failError + , success, failError + , current_position , alphanum, char, letter, many1, parse, string) type Size = Int @@ -54,11 +55,8 @@ parse_last_char s p = case last_char s of Left _ -> false _ -> true --- | FIXME: This is flawed. --- | We cannot know if it worked: in case there is a problem with the parser `p`, --- | the code will "continue to work" but without what's been parsed. --- | This may sound reasonable but it prevents knowing if a problem actually occured! --- | We cannot do a `try parser <|> alternative` since it will always work! +-- | `try` provides a way to accept a faulty parser and +-- | just rewinds back to previous input state if an error occurs. try :: forall e a. Parser e a -> Parser e (Maybe a) try p = Parser p' where p' input = case parse p input of @@ -70,9 +68,7 @@ label :: forall e. Parser e String label = do l <- letter s <- try ldh_str - let labelstr = CU.singleton l <> case s of - Nothing -> "" - Just content -> CU.fromCharArray content + let labelstr = CU.singleton l <> maybe "" (\v -> CU.fromCharArray v) s if (S.length labelstr > label_maxsize) then empty else if (S.length labelstr > 1 && not (parse_last_char labelstr let_dig)) @@ -106,9 +102,10 @@ sub_eof = do sub <- subdomain maybe_final_point <- try $ char '.' _ <- eof -- In case there is still some input, it fails. + pos <- current_position let parsed_domain = did_we_parsed_the_final_point maybe_final_point sub if S.length parsed_domain > max_domain_length - then empty -- TODO: error management. + then Parser \_ -> failError pos (Just <<< DomainTooLarge $ S.length parsed_domain) else pure parsed_domain where did_we_parsed_the_final_point Nothing sub = sub diff --git a/src/Main.purs b/src/Main.purs index 983da83..0647a28 100644 --- a/src/Main.purs +++ b/src/Main.purs @@ -1,7 +1,7 @@ module Main where import Parser (Parser(..)) -import DomainParser (domain, label, ldh_str, sub_eof, subdomain) +import DomainParser (domain, label, ldh_str, sub_eof, subdomain, DomainError(..)) import Prelude (Unit, discard, show, ($), (<>)) @@ -9,7 +9,7 @@ import Effect (Effect) import Effect.Console (log) import Data.Either (Either(..)) ---import Data.Maybe (Maybe(..)) +import Data.Maybe (Maybe(..)) --import Data.Tuple (Tuple(..)) import Data.String.CodeUnits (fromCharArray) @@ -39,112 +39,120 @@ import Data.String.CodeUnits (fromCharArray) -- let toprint = if b then "FOUND IT" else "not found" -- log $ toprint <> ", rest: " <> str -logtest :: forall e v. String -> Parser e v -> String -> (v -> String) -> Effect Unit -logtest fname (Parser p) str r = do +logtest :: forall e v. String -> Parser e v -> String -> (v -> String) -> (e -> String) -> Effect Unit +logtest fname (Parser p) str r e = do log $ "(" <> fname <> ") parsing '" <> str <> "': " <> case p { string: str, position: 0 } of - Left { position, error: _ } -> "failed at position " <> show position + Left { position, error } -> "failed at position " <> show position <> case error of + Nothing -> " -> no error reported" + Just err -> " -> error: " <> e err Right { suffix, result } -> show (r result) <> " '" <> suffix.string <> "'" id :: forall a. a -> a id a = a +showerror :: DomainError -> String +showerror (SubdomainTooLarge position size) = "SubdomainTooLarge (position: " <> show position <> ", size: " <> show size <> ")" +showerror (DomainTooLarge size) = "DomainTooLarge (size: " <> show size <> ")" +showerror (InvalidCharacter position) = "InvalidCharacter (position: " <> show position <> ")" +showerror (EOFExpected) = "EOFExpected" + main :: Effect Unit main = do - logtest "ldh_str" ldh_str "a12B.fl" fromCharArray - logtest "ldh_str" ldh_str "1efg.x1" fromCharArray - logtest "ldh_str" ldh_str ".qjzleb" fromCharArray - logtest "ldh_str" ldh_str "a-b.b-a" fromCharArray - logtest "ldh_str" ldh_str "" fromCharArray + logtest "ldh_str" ldh_str "a12B.fl" fromCharArray showerror + logtest "ldh_str" ldh_str "1efg.x1" fromCharArray showerror + logtest "ldh_str" ldh_str ".qjzleb" fromCharArray showerror + logtest "ldh_str" ldh_str "a-b.b-a" fromCharArray showerror + logtest "ldh_str" ldh_str "" fromCharArray showerror log "" - logtest "label" label "example.org" id - logtest "label" label "" id - logtest "label" label "a.x" id - logtest "label" label "a2.org" id - logtest "label" label "a33.org" id - logtest "label" label "a444.org" id - logtest "label" label "a5555.org" id - logtest "label" label "a66666.org" id - logtest "label" label "a777777.org" id - logtest "label" label "a8888888.org" id + logtest "label" label "example.org" id showerror + logtest "label" label "" id showerror + logtest "label" label "a.x" id showerror + logtest "label" label "a2.org" id showerror + logtest "label" label "a33.org" id showerror + logtest "label" label "a444.org" id showerror + logtest "label" label "a5555.org" id showerror + logtest "label" label "a66666.org" id showerror + logtest "label" label "a777777.org" id showerror + logtest "label" label "a8888888.org" id showerror log "" - logtest "label" label "-" id - logtest "label" label "a-" id + logtest "label" label "-" id showerror + logtest "label" label "a-" id showerror log "" - logtest "subdomain" subdomain "example.org" id - logtest "subdomain" subdomain "" id - logtest "subdomain" subdomain "a.x" id - logtest "subdomain" subdomain "a2.org" id - logtest "subdomain" subdomain "a33.org" id - logtest "subdomain" subdomain "a444.org" id - logtest "subdomain" subdomain "a5555.org" id - logtest "subdomain" subdomain "a66666.org" id - logtest "subdomain" subdomain "a777777.org" id - logtest "subdomain" subdomain "a8888888.org" id - logtest "subdomain" subdomain "xblah.a.x" id - logtest "subdomain" subdomain "xblah.a2.org" id - logtest "subdomain" subdomain "xblah.a33.org" id - logtest "subdomain" subdomain "xblah.a444.org" id - logtest "subdomain" subdomain "xblah.a5555.org" id - logtest "subdomain" subdomain "xblah.a66666.org" id - logtest "subdomain" subdomain "xblah.a777777.org" id - logtest "subdomain" subdomain "xblah.a8888888.org" id - logtest "subdomain" subdomain "-" id - logtest "subdomain" subdomain "a-" id + logtest "subdomain" subdomain "example.org" id showerror + logtest "subdomain" subdomain "" id showerror + logtest "subdomain" subdomain "a.x" id showerror + logtest "subdomain" subdomain "a2.org" id showerror + logtest "subdomain" subdomain "a33.org" id showerror + logtest "subdomain" subdomain "a444.org" id showerror + logtest "subdomain" subdomain "a5555.org" id showerror + logtest "subdomain" subdomain "a66666.org" id showerror + logtest "subdomain" subdomain "a777777.org" id showerror + logtest "subdomain" subdomain "a8888888.org" id showerror + logtest "subdomain" subdomain "xblah.a.x" id showerror + logtest "subdomain" subdomain "xblah.a2.org" id showerror + logtest "subdomain" subdomain "xblah.a33.org" id showerror + logtest "subdomain" subdomain "xblah.a444.org" id showerror + logtest "subdomain" subdomain "xblah.a5555.org" id showerror + logtest "subdomain" subdomain "xblah.a66666.org" id showerror + logtest "subdomain" subdomain "xblah.a777777.org" id showerror + logtest "subdomain" subdomain "xblah.a8888888.org" id showerror + logtest "subdomain" subdomain "-" id showerror + logtest "subdomain" subdomain "a-" id showerror log "" - logtest "sub_eof" sub_eof " " id - logtest "sub_eof" sub_eof " " id - logtest "sub_eof" sub_eof "example.org" id - logtest "sub_eof" sub_eof "" id - logtest "sub_eof" sub_eof "a.x" id - logtest "sub_eof" sub_eof "a2.org" id - logtest "sub_eof" sub_eof "a33.org" id - logtest "sub_eof" sub_eof "a444.org" id - logtest "sub_eof" sub_eof "a5555.org" id - logtest "sub_eof" sub_eof "a66666.org" id - logtest "sub_eof" sub_eof "a777777.org" id - logtest "sub_eof" sub_eof "a8888888.org" id - logtest "sub_eof" sub_eof "xblah.a.x" id - logtest "sub_eof" sub_eof "xblah.a2.org" id - logtest "sub_eof" sub_eof "xblah.a33.org" id - logtest "sub_eof" sub_eof "xblah.a444.org" id - logtest "sub_eof" sub_eof "xblah.a5555.org" id - logtest "sub_eof" sub_eof "xblah.a66666.org" id - logtest "sub_eof" sub_eof "xblah.a777777.org" id - logtest "sub_eof" sub_eof "xblah.a8888888.org" id - logtest "sub_eof" sub_eof "-" id - logtest "sub_eof" sub_eof "a-" id + logtest "sub_eof" sub_eof " " id showerror + logtest "sub_eof" sub_eof " " id showerror + logtest "sub_eof" sub_eof "example.org" id showerror + logtest "sub_eof" sub_eof "" id showerror + logtest "sub_eof" sub_eof "a.x" id showerror + logtest "sub_eof" sub_eof "a2.org" id showerror + logtest "sub_eof" sub_eof "a33.org" id showerror + logtest "sub_eof" sub_eof "a444.org" id showerror + logtest "sub_eof" sub_eof "a5555.org" id showerror + logtest "sub_eof" sub_eof "a66666.org" id showerror + logtest "sub_eof" sub_eof "a777777.org" id showerror + logtest "sub_eof" sub_eof "a8888888.org" id showerror + logtest "sub_eof" sub_eof "xblah.a.x" id showerror + logtest "sub_eof" sub_eof "xblah.a2.org" id showerror + logtest "sub_eof" sub_eof "xblah.a33.org" id showerror + logtest "sub_eof" sub_eof "xblah.a444.org" id showerror + logtest "sub_eof" sub_eof "xblah.a5555.org" id showerror + logtest "sub_eof" sub_eof "xblah.a66666.org" id showerror + logtest "sub_eof" sub_eof "xblah.a777777.org" id showerror + logtest "sub_eof" sub_eof "xblah.a8888888.org" id showerror + logtest "sub_eof" sub_eof "-" id showerror + logtest "sub_eof" sub_eof "a-" id showerror log "" - logtest "domain" domain " " id - logtest "domain" domain " " id - logtest "domain" domain "example.org" id - logtest "domain" domain "" id - logtest "domain" domain "a.x" id - logtest "domain" domain "a2.org" id - logtest "domain" domain "a33.org" id - logtest "domain" domain "a444.org" id - logtest "domain" domain "a5555.org" id - logtest "domain" domain "a66666.org" id - logtest "domain" domain "a777777.org" id - logtest "domain" domain "a8888888.org" id - logtest "domain" domain "xblah.a.x" id - logtest "domain" domain "xblah.a2.org" id - logtest "domain" domain "xblah.a33.org" id - logtest "domain" domain "xblah.a444.org" id - logtest "domain" domain "xblah.a5555.org" id - logtest "domain" domain "xblah.a66666.org" id - logtest "domain" domain "xblah.a777777.org" id - logtest "domain" domain "xblah.a8888888.org" id - logtest "domain" domain "-" id - logtest "domain" domain "a-" id + logtest "domain" domain " " id showerror + logtest "domain" domain " " id showerror + logtest "domain" domain "example.org" id showerror + logtest "domain" domain "" id showerror + logtest "domain" domain "a.x" id showerror + logtest "domain" domain "a2.org" id showerror + logtest "domain" domain "a33.org" id showerror + logtest "domain" domain "a444.org" id showerror + logtest "domain" domain "a5555.org" id showerror + logtest "domain" domain "a66666.org" id showerror + logtest "domain" domain "a777777.org" id showerror + logtest "domain" domain "a8888888.org" id showerror + logtest "domain" domain "xblah.a.x" id showerror + logtest "domain" domain "xblah.a2.org" id showerror + logtest "domain" domain "xblah.a33.org" id showerror + logtest "domain" domain "xblah.a444.org" id showerror + logtest "domain" domain "xblah.a5555.org" id showerror + logtest "domain" domain "xblah.a66666.org" id showerror + logtest "domain" domain "xblah.a777777.org" id showerror + logtest "domain" domain "xblah.a8888888.org" id showerror + logtest "domain" domain "-" id showerror + logtest "domain" domain "a-" id showerror -- log $ "parsing the first 'f' in 'fiction' (sat): " <> case parse (sat (\x -> x == 'f')) "fiction" of -- Just (Tuple x y) -> show x <> " " <> show y diff --git a/src/Parser.purs b/src/Parser.purs index 846abd4..889f0cf 100644 --- a/src/Parser.purs +++ b/src/Parser.purs @@ -11,7 +11,6 @@ import Data.Int as Int import Data.Either (Either(..)) import Data.Maybe (Maybe(..)) import Data.String.CodeUnits (toCharArray, fromCharArray) --- import Data.Tuple (Tuple(..)) import BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper) @@ -28,9 +27,11 @@ newtype Parser e v = Parser (Input -> Result e v) parse :: forall e v. Parser e v -> (Input -> Result e v) parse (Parser p) = p +current_position :: forall e. Parser e Position +current_position = Parser \input -> success input input.position + -- | Fail with a specified error. -- | When a parsing has a specified error, no alternative will be tried and the error is reported. --- TODO: stop the parsing when an error is provided. failError :: forall e v. Position -> Maybe e -> Result e v failError position error = Left { position, error } @@ -39,7 +40,6 @@ failError position error = Left { position, error } -- | such as `digit` or `letter`. -- | Also, this can be used to express a possibly expected invalid parsing that should not -- | halt the parsing, but rather let an alternative path to be tried. --- TODO: stop the parsing when an error is provided. fail :: forall e v. Position -> Result e v fail position = failError position Nothing