Big improvement on the base structure type.

2024-01-18 04:51:50 +01:00 · 2024-01-18 04:51:50 +01:00 · efe41a0d3f
commit efe41a0d3f
parent 0702ba184e
2 changed files with 69 additions and 53 deletions
--- a/src/DomainParser.purs
+++ b/src/DomainParser.purs
@ -12,14 +12,17 @@ import Data.String.CodeUnits as CU
 import Control.Alt ((<|>))
 import Control.Plus (empty)
 import Parser (Parser(..), Position
              , success, fail, failError
              , alphanum, char, letter, many1, parse, string)
 type Size = Int
 data DomainError
  = SubdomainTooLarge Position Size
  | DomainTooLarge Size
  | InvalidCharacter Position
  | EOFExpected
 import Parser (Parser(..), success, fail, failError, alphanum, char, letter, many1, parse, string)
 -- | From RFC 1035: <let-dig> ::= <letter> | <digit>
 let_dig :: forall e. Parser e Char
 let_dig = alphanum
@ -47,7 +50,7 @@ last_char = A.last <<< CU.toCharArray
 parse_last_char :: forall e. String -> Parser e Char -> Boolean
 parse_last_char s p = case last_char s of
  Nothing -> false
-  Just c -> case parse p (CU.singleton c) of
+  Just c -> case parse p { string: CU.singleton c, position: 0 } of
    Left _  -> false
    _       -> true
@ -56,21 +59,20 @@ parse_last_char s p = case last_char s of
 -- | the code will "continue to work" but without what's been parsed.
 -- | This may sound reasonable but it prevents knowing if a problem actually occured!
 -- | We cannot do a `try parser <|> alternative` since it will always work!
-try :: forall e a. Parser e a -> Parser e a
+try :: forall e a. Parser e a -> Parser e (Maybe a)
 try p = Parser p'
-  where p' str = case parse p str of
+  where p' input = case parse p input of
-                   Left { pos: _, input: _, error: e } -> case e of
+                   Left _                   -> Right { suffix: input, result: Nothing }
-                     Nothing    -> fail 0 str
+                   Right { suffix, result } -> Right { suffix,        result: Just result }
                     error      -> failError 0 str error
                   right -> right
                   -- Right { pos: pos, input: input, result: value } -> success pos input value
 -- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
 label :: forall e. Parser e String
 label = do
  l <- letter
  s <- try ldh_str
-  let labelstr = CU.singleton l <> CU.fromCharArray s
+  let labelstr = CU.singleton l <> case s of
                                     Nothing      -> ""
                                     Just content -> CU.fromCharArray content
  if (S.length labelstr > label_maxsize)
  then empty
  else if (S.length labelstr > 1 && not (parse_last_char labelstr let_dig))
@ -86,31 +88,31 @@ subdomain = do
    _ <- char '.'
    sub <- defer \_ -> subdomain
    pure sub
-  if (S.length upperlabels == 0) -- This is related to the problem of not having a proper base structure.
+  case upperlabels of
-  then pure lab
+    Nothing -> pure lab
-  else pure $ lab <> "." <> upperlabels
+    Just l  -> pure $ lab <> "." <> l
 -- | Test for the end-of-file (no more input).
 -- | If not EOF the parser fails (Nothing), otherwise it provides an empty string.
-eof :: forall e. Parser e String
+eof :: Parser DomainError String
-eof = Parser \str -> case S.length str of
+eof = Parser \input -> case S.length input.string of
-  0 -> success 0 str ""
+  0 -> success input ""
-  _ -> failError 0 str (Just EOFExpected)
+  _ -> failError input.position (Just EOFExpected)
 -- | Test for the domain to be a list of subdomains then an end-of-file.
 -- | Said otherwise, the input must only contain a domain (with or without a final dot '.').
-sub_eof :: forall e. Parser e String
+sub_eof :: Parser DomainError String
 sub_eof = do
  sub <- subdomain
-  maybe_final_point <- try char '.'
+  maybe_final_point <- try $ char '.'
  _ <- eof -- In case there is still some input, it fails.
  let parsed_domain = did_we_parsed_the_final_point maybe_final_point sub
  if S.length parsed_domain > max_domain_length
  then empty -- TODO: error management.
  else pure parsed_domain
  where
-    did_we_parsed_the_final_point '.' sub = sub <> "."
+    did_we_parsed_the_final_point Nothing sub = sub
-    did_we_parsed_the_final_point _   sub = sub
+    did_we_parsed_the_final_point _       sub = sub <> "."
 -- | From RFC 1035: <domain> ::= <subdomain> | " "
 -- |
@ -119,5 +121,5 @@ sub_eof = do
 -- | However, this last '.' character should be acceptable in most applications.
 -- | In some cases, a fully qualified domain name (FQDN) such as `example.com.`
 -- | has to be differenciated from a "relative" name (www).
-domain :: forall e. Parser e String
+domain :: Parser DomainError String
 domain = (string " " *> eof) <|> sub_eof
--- a/src/Parser.purs
+++ b/src/Parser.purs
@ -10,16 +10,17 @@ import Data.Array as A
 import Data.Int as Int
 import Data.Either (Either(..))
 import Data.Maybe (Maybe(..))
-import Data.String.CodeUnits (uncons, toCharArray, fromCharArray)
+import Data.String.CodeUnits (toCharArray, fromCharArray)
 -- import Data.Tuple (Tuple(..))
 import BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper)
 type Input = (Tuple Position String)
 type Size = Int
 type Position = Int
-type Error e = { pos :: Position, input :: String, error :: Maybe e }
+type PosString = { string :: String, position :: Position }
-type Value v = { pos :: Position, input :: String, result :: v }
+type Input = PosString
 type Error e = { position :: Position, error :: Maybe e }
 type Value v = { result :: v, suffix :: Input }
 type Result e v = Either (Error e) (Value v)
 newtype Parser e v = Parser (Input -> Result e v)
@ -27,64 +28,77 @@ newtype Parser e v = Parser (Input -> Result e v)
 parse :: forall e v. Parser e v -> (Input -> Result e v)
 parse (Parser p) = p
-failError :: forall e v. Position -> Input -> Maybe e -> Result e v
+-- | Fail with a specified error.
-failError pos input error = Left { pos: pos, error: error, input: input }
+-- | When a parsing has a specified error, no alternative will be tried and the error is reported.
 -- TODO: stop the parsing when an error is provided.
 failError :: forall e v. Position -> Maybe e -> Result e v
 failError position error = Left { position, error }
-fail :: forall e v. Position -> Input -> Result e v
+-- | Fail without a specified error.
-fail pos input = failError pos input Nothing
+-- | This is used in generic parsers not attached to a specified context,
 -- | such as `digit` or `letter`.
 -- | Also, this can be used to express a possibly expected invalid parsing that should not
 -- | halt the parsing, but rather let an alternative path to be tried.
 -- TODO: stop the parsing when an error is provided.
 fail :: forall e v. Position -> Result e v
 fail position = failError position Nothing
-success :: forall e v. Position -> Input -> v -> Result e v
+-- | `success` constructs a result value for a successful parsing.
-success pos input v = Right { pos: pos, input: input, result: v }
+-- | It requires the input (a string with its current position) and the result.
 success :: forall e v. Input -> v -> Result e v
 success suffix result = Right { suffix, result }
 -- | Read a single char from the input.
 item :: forall e. Parser e Char
 item = Parser p
-  where p str = case uncons str of
+  where
-                  Nothing                    -> fail 0 str
+    p input = case A.uncons (toCharArray input.string) of
-                  Just { head: x, tail: xs } -> success 1 xs x
+      Nothing                    -> fail input.position
      Just { head: x, tail: xs } -> success (input { string = (fromCharArray xs)
                                                   , position = input.position+1 }) x
 instance functorParser :: Functor (Parser e) where
  map :: forall a b. (a -> b) -> Parser e a -> Parser e b
  map f (Parser p) =
    Parser $ \s0 -> case p s0 of
-      Right { pos: pos, input: input, result: v } -> success pos input (f v)
+      Right { suffix, result } -> success suffix (f result)
-      Left  { pos: pos, input: input, error: e }  -> failError pos input e
+      Left error               -> Left error
 instance applyParser :: Apply (Parser e) where
  apply (Parser p1) (Parser p2)
    = Parser $ \s0 -> case p1 s0 of
        Right { suffix: suffix1, result: result1 } -> case p2 suffix1 of
          Right { suffix: suffix2, result: result2 } -> success suffix2 (result1 result2)
          Left error -> Left error
        Right { pos: pos1, input: s1, result: r1 } -> case p2 s1 of
        Left error -> Left error
          Right { pos: pos2, input: s2, result: r2 } -> success (pos1 + pos2) s2 (r1 r2)
 instance applicativeParser :: Applicative (Parser e) where
-  pure a = Parser $ \str -> success 0 str a
+  pure a = Parser $ \input -> success input a
 instance bindParser :: Bind (Parser e) where
  bind (Parser p) f = Parser $ \s0 -> case p s0 of
    Right { suffix, result } -> parse (f result) suffix
    Left error               -> Left error
    Right { pos: _, input: input, result: result } ->
      let (Parser p2) = f result
      in p2 input
 -- | `Alt` instance: in case of an error that produced an error value, the computation stops.
 instance altParser :: Alt (Parser e) where
  alt :: forall v. Parser e v -> Parser e v -> Parser e v
  alt (Parser p1) (Parser p2) = Parser p
    where
      p stream = case p1 stream of
-        Left { pos: pos, error: error, input: input } -> case error of
+        Left { position, error } -> case error of
          Nothing -> p2 stream
-          _ -> failError pos input error
+          _       -> failError position error
-        right -> right
+        Right right -> Right right
 instance plusParser :: Plus (Parser e) where
  empty :: forall v. Parser e v
-  empty = Parser \input -> fail 0 input
+  empty = Parser \_ -> fail 0
 instance alternativeParser :: Alternative (Parser e)
 instance lazyParser :: Lazy (Parser e v) where
-  defer f = Parser \str -> parse (f unit) str
+  defer f = Parser \input -> parse (f unit) input
 -- Generic parsing functions.
@ -112,7 +126,7 @@ char x = sat (_ == x)
 string :: forall e. String -> Parser e String
 string str = case A.uncons (toCharArray str) of
-  Nothing -> Parser \stream -> success 0 stream ""
+  Nothing -> Parser \input -> success input ""
  Just { head: x, tail: xs } -> do c <- char x
                                   rest <- string (fromCharArray xs)
                                   pure (concat c rest)