WIP RFC5234

2024-01-27 05:02:04 +01:00 · 2024-01-27 05:02:04 +01:00 · 9868002114
commit 9868002114
parent ef336bd2de
5 changed files with 105 additions and 80 deletions
--- a/src/GenericParser.purs
+++ b/src/GenericParser.purs
@ -6,4 +6,4 @@ module GenericParser

 import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size)
 import GenericParser.DomainParser (domain, label, subdomain, sub_eof)
-import GenericParser.Parser (alphanum, char, current_input, current_position, digit, eof, Error, failure, failureError, ident, identifier, Input, int, integer, item, letter, lookahead, lower, many1, nat, natural, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, space, string, success, symbol, token, try, tryMaybe, until, upper, Value)
+import GenericParser.Parser (alphanum, char, current_input, current_position, digit, eof, Error, failure, failureError, hex, ident, identifier, Input, int, integer, item, letter, lookahead, lower, many1, nat, natural, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, space, string, success, symbol, token, try, tryMaybe, until, upper, Value)
--- a/src/GenericParser/EmailAddress.purs
+++ b/src/GenericParser/EmailAddress.purs
@ -11,6 +11,9 @@ import Data.Either (Either(..))
 import Data.Maybe (Maybe(..))
 import Data.String.CodeUnits as CU

+-- ABNF core rules.
+import GenericParser.RFC5234
+
 import GenericParser.DomainParser.Common (DomainError)
 import GenericParser.DomainParser (sub_eof)
 import GenericParser.Parser (Parser(..)
@ -21,79 +24,6 @@ data EmailError
  = InvalidCharacter
  | InvalidDomain (Maybe DomainError)

-- | RFC 5234:
-
--ALPHA          =  %x41-5A / %x61-7A   ; A-Z / a-z
--
--BIT            =  "0" / "1"
--
--CHAR           =  %x01-7F
--                  ; any 7-bit US-ASCII character,
--                  ;  excluding NUL
--
--CR             =  %x0D
--                  ; carriage return
--
--CRLF           =  CR LF
--                  ; Internet standard newline
--
--CTL            =  %x00-1F / %x7F
--                  ; controls
--
--DIGIT          =  %x30-39
--                  ; 0-9
--
--DQUOTE         =  %x22
--                  ; " (Double Quote)
--
--HEXDIG         =  DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
-
-- | HTAB: horizontal tab.
-- | HTAB           =  %x09
-htab :: forall e. Parser e Char
-htab = char '\t'
-
--LF             =  %x0A
--                  ; linefeed
--
--LWSP           =  *(WSP / CRLF WSP)
--                  ; Use of this linear-white-space rule
--                  ;  permits lines containing only white
--                  ;  space that are no longer legal in
--                  ;  mail headers and have caused
--                  ;  interoperability problems in other
--                  ;  contexts.
--                  ; Do not use when defining mail
--                  ;  headers and use with caution in
--                  ;  other contexts.
--
--OCTET          =  %x00-FF
--                  ; 8 bits of data
--
-- | SP: space.
-- |
-- | SP             =  %x20
-sp :: forall e. Parser e Char
-sp = char ' '
-
-- | VCHAR          =  %x21-7E
-- |
-- | Visible printing characters.
-vchar :: forall e. Parser e Char
-vchar = sat (\x -> between 33 126 $ C.toCharCode x)
-
-- | WSP: white space.
-- |
-- | WSP            =  SP / HTAB
-wsp :: forall e. Parser e Char
-wsp = sp <|> htab
-
-
-crlf :: forall e. Parser e Unit
-crlf = do _ <- char '\r'
-          _ <- char '\n'
-          pure unit
-
 -- | obs-FWS         =   1*WSP *(CRLF 1*WSP)
 -- |
 -- | Obsolete FWS.
--- a/src/GenericParser/IPAddress.purs
+++ b/src/GenericParser/IPAddress.purs
@ -13,8 +13,8 @@ import GenericParser.Parser (Parser(..)
                            , current_position
                            , string
                            , many1, lookahead
-                            , sat, char, nat)
-import GenericParser.BaseFunctions (repeat, isHexaDecimal)
+                            , char, nat, hex)
+import GenericParser.BaseFunctions (repeat)

 data IPv6Error
  = InvalidCharacter
@ -23,9 +23,6 @@ data IPv6Error
  | TooManyChunks
  | IPv6UnrelevantShortRepresentation

-hex :: forall e. Parser e Char
-hex = sat isHexaDecimal
-
 -- | `ipv6_chunk` parses just a group of hexadecimal characters.
 -- | Return an error (TooManyHexaDecimalCharacters) in case the group has more than 4 characters.
 ipv6_chunk :: Parser IPv6Error String
--- a/src/GenericParser/Parser.purs
+++ b/src/GenericParser/Parser.purs
@ -12,7 +12,7 @@ import Data.Maybe (Maybe(..), maybe)
 import Data.String as S
 import Data.String.CodeUnits (toCharArray, fromCharArray, singleton)

-import GenericParser.BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper)
+import GenericParser.BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper, isHexaDecimal)

 type Position = Int
 type PositionString = { string :: String, position :: Position }
@ -208,6 +208,9 @@ integer = token int
 symbol :: forall e. String -> Parser e String
 symbol xs = token (string xs)

+hex :: forall e. Parser e Char
+hex = sat isHexaDecimal
+
 eof :: forall e. Parser e Unit
 eof = Parser \input -> case S.length input.string of
  0 -> success input unit
--- a/src/GenericParser/RFC5234.purs
+++ b/src/GenericParser/RFC5234.purs
@ -0,0 +1,95 @@
+-- | `RFC5234`, Augmented BNF notation, explains the syntax specification found in many RFCs.
+-- | This module implements core rules found in appendix B.1.
+module GenericParser.RFC5234 where
+
+import Prelude (Unit, unit, bind, pure, ($), (<>), (==), (||), between, void)
+
+import Control.Alt ((<|>))
+import Data.Array as A
+import Data.Char as C
+import Data.Either (Either(..))
+import Data.Maybe (Maybe(..))
+import Data.String.CodeUnits as CU
+
+import GenericParser.BaseFunctions (repeat, isHexaDecimal)
+
+import GenericParser.Parser (Parser(..)
+                            , sat, char , digit , letter, item, many1, tryMaybe
+                            , current_input, failureError, parse, rollback, until)
+
+-- | RFC 5234:
+
+--ALPHA          =  %x41-5A / %x61-7A   ; A-Z / a-z
+--
+--BIT            =  "0" / "1"
+--
+--CHAR           =  %x01-7F
+--                  ; any 7-bit US-ASCII character,
+--                  ;  excluding NUL
+--
+--CR             =  %x0D
+--                  ; carriage return
+--
+--CRLF           =  CR LF
+--                  ; Internet standard newline
+--
+--CTL            =  %x00-1F / %x7F
+--                  ; controls
+--
+--DIGIT          =  %x30-39
+--                  ; 0-9
+--
+--DQUOTE         =  %x22
+--                  ; " (Double Quote)
+
+-- | HEXDIG: hexadecimal.
+-- |
+-- | HEXDIG         =  DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
+hexdig :: forall e. Parser e Char
+hexdig = sat isHexaDecimal
+
+-- | HTAB: horizontal tab.
+-- | HTAB           =  %x09
+htab :: forall e. Parser e Char
+htab = char '\t'
+
+--LF             =  %x0A
+--                  ; linefeed
+--
+--LWSP           =  *(WSP / CRLF WSP)
+--                  ; Use of this linear-white-space rule
+--                  ;  permits lines containing only white
+--                  ;  space that are no longer legal in
+--                  ;  mail headers and have caused
+--                  ;  interoperability problems in other
+--                  ;  contexts.
+--                  ; Do not use when defining mail
+--                  ;  headers and use with caution in
+--                  ;  other contexts.
+--
+--OCTET          =  %x00-FF
+--                  ; 8 bits of data
+--
+-- | SP: space.
+-- |
+-- | SP             =  %x20
+sp :: forall e. Parser e Char
+sp = char ' '
+
+-- | VCHAR          =  %x21-7E
+-- |
+-- | Visible printing characters.
+vchar :: forall e. Parser e Char
+vchar = sat (\x -> between 33 126 $ C.toCharCode x)
+
+-- | WSP: white space.
+-- |
+-- | WSP            =  SP / HTAB
+wsp :: forall e. Parser e Char
+wsp = sp <|> htab
+
+
+crlf :: forall e. Parser e Unit
+crlf = do _ <- char '\r'
+          _ <- char '\n'
+          pure unit