Split most parsers from the GenericParser.Parser module.

2024-01-27 08:03:16 +01:00 · 2024-01-27 08:03:16 +01:00 · 8e68099e52
commit 8e68099e52
parent 1951d893a9
9 changed files with 123 additions and 95 deletions
--- a/src/GenericParser.purs
+++ b/src/GenericParser.purs
@ -2,8 +2,11 @@ module GenericParser
  ( module GenericParser.Parser
  , module GenericParser.DomainParser.Common
  , module GenericParser.DomainParser
+  --, module GenericParser.RFC5234
  ) where

 import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size)
 import GenericParser.DomainParser (domain, label, subdomain, sub_eof)
-import GenericParser.Parser (alphanum, char, current_input, current_position, digit, eof, Error, failure, failureError, hex, ident, identifier, Input, int, integer, item, letter, lookahead, lower, many1, nat, natural, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, space, string, success, symbol, token, try, tryMaybe, until, upper, Value)
+import GenericParser.Parser (char, current_input, current_position, Error, failure, failureError, Input, item, lookahead, many1, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, string, success, try, tryMaybe, until, Value)
+import GenericParser.SomeParsers (alphanum, eof, ident, identifier, int, integer, letter, lower, nat, natural, space, symbol, token, upper)
+-- import GenericParser.RFC5234 ()
--- a/src/GenericParser/DomainParser.purs
+++ b/src/GenericParser/DomainParser.purs
@ -16,9 +16,10 @@ import GenericParser.DomainParser.Common (DomainError(..), eof, ldh_str, let_dig
 import GenericParser.Parser (Parser(..)
                            , failureError
                            , current_position
-                            , char, letter, string
+                            , char, string
                            , parse_last_char
                            , tryMaybe)
+import GenericParser.SomeParsers (letter)

 -- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
 -- | In practice, the first character can be an underscore (for example, see `_dmarc.example.com`).
--- a/src/GenericParser/DomainParser/Common.purs
+++ b/src/GenericParser/DomainParser/Common.purs
@ -7,7 +7,8 @@ import Data.String as S

 import GenericParser.Parser (Parser(..)
                            , success, failureError
-                            , alphanum, char, many1)
+                            , char, many1)
+import GenericParser.SomeParsers (alphanum)

 type Size = Int
 -- | `DomainError` expresses all possible errors that can occur while parsing a domain.
--- a/src/GenericParser/DomainParserRFC1035.purs
+++ b/src/GenericParser/DomainParserRFC1035.purs
@ -18,8 +18,9 @@ import GenericParser.DomainParser.Common (DomainError(..), eof, ldh_str, let_dig
 import GenericParser.Parser (Parser(..)
                            , failureError
                            , current_position
-                            , char, letter, parse, string
+                            , char, parse, string
                            , tryMaybe)
+import GenericParser.SomeParsers (letter)

 -- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
 label :: Parser DomainError String
--- a/src/GenericParser/EmailAddress.purs
+++ b/src/GenericParser/EmailAddress.purs
@ -11,14 +11,14 @@ import Data.Either (Either(..))
 import Data.Maybe (Maybe(..))
 import Data.String.CodeUnits as CU

-- ABNF core rules.
-import GenericParser.RFC5234
-
+import GenericParser.Parser (Parser(..)
+                            , sat, char, item, many1, tryMaybe
+                            , current_input, failureError, parse, rollback, until)
 import GenericParser.DomainParser.Common (DomainError)
 import GenericParser.DomainParser (sub_eof)
-import GenericParser.Parser (Parser(..)
-                            , sat, char , digit , letter, item, many1, tryMaybe
-                            , current_input, failureError, parse, rollback, until)
+-- ABNF core rules.
+import GenericParser.RFC5234 (crlf, digit, wsp)
+import GenericParser.SomeParsers (letter)

 data EmailError
  = InvalidCharacter
--- a/src/GenericParser/IPAddress.purs
+++ b/src/GenericParser/IPAddress.purs
@ -13,8 +13,10 @@ import GenericParser.Parser (Parser(..)
                            , current_position
                            , string
                            , many1, lookahead
-                            , char, nat, hex)
+                            , char)
 import GenericParser.BaseFunctions (repeat)
+import GenericParser.SomeParsers (nat)
+import GenericParser.RFC5234 (hexdig)

 data IPv6Error
  = InvalidCharacter
@ -27,7 +29,7 @@ data IPv6Error
 -- | Return an error (TooManyHexaDecimalCharacters) in case the group has more than 4 characters.
 ipv6_chunk :: Parser IPv6Error String
 ipv6_chunk = do pos <- current_position
-                hexachars <- many1 hex
+                hexachars <- many1 hexdig
                if A.length hexachars > 4
                then Parser \_ -> failureError pos (Just TooManyHexaDecimalCharacters)
                else pure $ CU.fromCharArray hexachars
--- a/src/GenericParser/Parser.purs
+++ b/src/GenericParser/Parser.purs
@ -7,12 +7,11 @@ import Control.Lazy (class Lazy, defer)
 import Control.Plus (class Plus, empty)
 import Data.Array as A
 import Data.Either (Either(..))
-import Data.Int as Int
 import Data.Maybe (Maybe(..), maybe)
 import Data.String as S
-import Data.String.CodeUnits (toCharArray, fromCharArray, singleton)
+import Data.String.CodeUnits as CU

-import GenericParser.BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper, isHexaDecimal)
+import GenericParser.BaseFunctions (concat)

 type Position = Int
 type PositionString = { string :: String, position :: Position }
@ -70,9 +69,9 @@ success suffix result = Right { suffix, result }
 item :: forall e. Parser e Char
 item = Parser p
  where
-    p input = case A.uncons (toCharArray input.string) of
+    p input = case A.uncons (CU.toCharArray input.string) of
      Nothing                    -> failure input.position
-      Just { head: x, tail: xs } -> success { string: (fromCharArray xs), position: input.position+1 } x
+      Just { head: x, tail: xs } -> success { string: (CU.fromCharArray xs), position: input.position+1 } x

 instance functorParser :: Functor (Parser e) where
  map :: forall a b. (a -> b) -> Parser e a -> Parser e b
@ -144,78 +143,16 @@ sat p = do pos <- current_position
           x <- item
           if p x then pure x else Parser \_ -> failure pos

-digit :: forall e. Parser e Char
-digit = sat isDigit
-
-lower :: forall e. Parser e Char
-lower = sat isLower
-
-upper :: forall e. Parser e Char
-upper = sat isUpper
-
-letter :: forall e. Parser e Char
-letter = sat isAlpha
-
-alphanum :: forall e. Parser e Char
-alphanum = sat isAlphaNum
-
 char :: forall e. Char -> Parser e Char
 char x = sat (_ == x)

 string :: forall e. String -> Parser e String
-string str = case A.uncons (toCharArray str) of
+string str = case A.uncons (CU.toCharArray str) of
  Nothing -> Parser \input -> success input ""
  Just { head: x, tail: xs } -> do c <- char x
-                                   rest <- string (fromCharArray xs)
+                                   rest <- string (CU.fromCharArray xs)
                                   pure (concat c rest)

-ident :: forall e. Parser e String
-ident = do x  <- lower
-           xs <- A.many alphanum
-           pure (fromCharArray $ A.cons x xs)
-
-nat :: forall e. Parser e Int
-nat = do xs <- A.some digit
-         case Int.fromString (fromCharArray xs) of
-           Nothing -> empty
-           Just x -> pure x
-
-int :: forall e. Parser e Int
-int = do _ <- char '-'
-         n <- nat
-         pure (-n)
-       <|> nat
-
-space :: forall e. Parser e Unit
-space = do _ <- A.many (sat isSpace)
-           pure unit
-
-token :: forall e a. Parser e a -> Parser e a
-token p = do space
-             v <- p
-             _ <- space
-             pure v
-
-identifier :: forall e. Parser e String
-identifier = token ident
-
-natural :: forall e. Parser e Int
-natural = token nat
-
-integer :: forall e. Parser e Int
-integer = token int
-
-symbol :: forall e. String -> Parser e String
-symbol xs = token (string xs)
-
-hex :: forall e. Parser e Char
-hex = sat isHexaDecimal
-
-eof :: forall e. Parser e Unit
-eof = Parser \input -> case S.length input.string of
-  0 -> success input unit
-  _ -> failure input.position
-
 many1 :: forall e v. Parser e v -> Parser e (Array v)
 many1 p = do first <- p
             rest <- A.many p
@ -245,10 +182,10 @@ until parser_end p = do
 parse_last_char :: forall e. String -> Parser e Char -> Boolean
 parse_last_char s p = case last_char s of
  Nothing -> false
-  Just c -> case parse p { string: singleton c, position: 0 } of
+  Just c -> case parse p { string: CU.singleton c, position: 0 } of
    Left _  -> false
    _       -> true
  where
    -- Get the last character of a String.
    last_char :: String -> Maybe Char
-    last_char = A.last <<< toCharArray
+    last_char = A.last <<< CU.toCharArray
--- a/src/GenericParser/RFC5234.purs
+++ b/src/GenericParser/RFC5234.purs
@ -2,27 +2,33 @@
 -- | This module implements core rules found in appendix B.1.
 module GenericParser.RFC5234 where

-import Prelude (Unit, between, bind, void, ($))
+import Prelude (Unit, between, (<<<), (||), (==), bind, void, ($))

 import Control.Alt ((<|>))
 import Data.Char as C

-import GenericParser.BaseFunctions (isHexaDecimal)
+import GenericParser.BaseFunctions (isAlpha, isDigit, isHexaDecimal)

 import GenericParser.Parser (Parser, char, sat)

-- | RFC 5234:
+-- | ALPHA: any letter, upper or lower case.
+-- |
+-- | ALPHA          =  %x41-5A / %x61-7A   ; A-Z / a-z
+alpha :: forall e. Parser e Char
+alpha = sat isAlpha

--ALPHA          =  %x41-5A / %x61-7A   ; A-Z / a-z
-
--BIT            =  "0" / "1"
+-- | BIT: either character '0' or '1'.
+-- |
+-- | BIT            =  "0" / "1"
+bit :: forall e. Parser e Char
+bit = char '0' <|> char '1'

 -- | CHAR (renamed `asciichar` to fix naming conflict with `GenericParser.char`):
 -- | any 7-bit US-ASCII character, excluding NUL.
 -- |
 -- | CHAR           =  %x01-7F
 asciichar :: forall e. Parser e Char
-asciichar = sat (\x -> between 1 127 $ C.toCharCode x)
+asciichar = sat (between 1 127 <<< C.toCharCode)

 -- | CR: carriage return.
 -- |
@ -37,12 +43,19 @@ crlf :: forall e. Parser e Unit
 crlf = do _ <- char '\r'
          void $ char '\n'

--CTL            =  %x00-1F / %x7F
--                  ; controls
--
+-- | CTL: control characters.
+-- |
+-- | CTL            =  %x00-1F / %x7F
+ctl :: forall e. Parser e Char
+ctl = sat cond
+  where cond x = (between 0 31 $ C.toCharCode x)
+                 || C.toCharCode x == 127
+
 --DIGIT          =  %x30-39
 --                  ; 0-9
--
+digit :: forall e. Parser e Char
+digit = sat isDigit
+
 --DQUOTE         =  %x22
 --                  ; " (Double Quote)

@ -84,7 +97,7 @@ sp = char ' '
 -- |
 -- | Visible printing characters.
 vchar :: forall e. Parser e Char
-vchar = sat (\x -> between 33 126 $ C.toCharCode x)
+vchar = sat (between 33 126 <<< C.toCharCode)

 -- | WSP: white space.
 -- |
--- a/src/GenericParser/SomeParsers.purs
+++ b/src/GenericParser/SomeParsers.purs
@ -0,0 +1,70 @@
+module GenericParser.SomeParsers where
+import Prelude (Unit, bind, discard, negate, pure, unit, ($))
+
+import Control.Alt ((<|>))
+import Control.Plus (empty)
+import Data.Array as A
+import Data.Maybe (Maybe(..))
+import Data.Int as Int
+import Data.String as S
+import Data.String.CodeUnits as CU
+
+import GenericParser.Parser
+import GenericParser.BaseFunctions (isAlpha, isAlphaNum, isLower, isSpace, isUpper)
+import GenericParser.RFC5234 (digit)
+
+lower :: forall e. Parser e Char
+lower = sat isLower
+
+upper :: forall e. Parser e Char
+upper = sat isUpper
+
+letter :: forall e. Parser e Char
+letter = sat isAlpha
+
+alphanum :: forall e. Parser e Char
+alphanum = sat isAlphaNum
+
+ident :: forall e. Parser e String
+ident = do x  <- lower
+           xs <- A.many alphanum
+           pure (CU.fromCharArray $ A.cons x xs)
+
+nat :: forall e. Parser e Int
+nat = do xs <- A.some digit
+         case Int.fromString (CU.fromCharArray xs) of
+           Nothing -> empty
+           Just x -> pure x
+
+int :: forall e. Parser e Int
+int = do _ <- char '-'
+         n <- nat
+         pure (-n)
+       <|> nat
+
+space :: forall e. Parser e Unit
+space = do _ <- A.many (sat isSpace)
+           pure unit
+
+token :: forall e a. Parser e a -> Parser e a
+token p = do space
+             v <- p
+             _ <- space
+             pure v
+
+identifier :: forall e. Parser e String
+identifier = token ident
+
+natural :: forall e. Parser e Int
+natural = token nat
+
+integer :: forall e. Parser e Int
+integer = token int
+
+symbol :: forall e. String -> Parser e String
+symbol xs = token (string xs)
+
+eof :: forall e. Parser e Unit
+eof = Parser \input -> case S.length input.string of
+  0 -> success input unit
+  _ -> failure input.position