WIP RFC5234

2024-01-27 05:02:04 +01:00 · 2024-01-27 05:02:04 +01:00 · 9868002114
commit 9868002114
parent ef336bd2de
5 changed files with 105 additions and 80 deletions
--- a/src/GenericParser.purs
+++ b/src/GenericParser.purs
@ -6,4 +6,4 @@ module GenericParser
 import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size)
 import GenericParser.DomainParser (domain, label, subdomain, sub_eof)
-import GenericParser.Parser (alphanum, char, current_input, current_position, digit, eof, Error, failure, failureError, ident, identifier, Input, int, integer, item, letter, lookahead, lower, many1, nat, natural, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, space, string, success, symbol, token, try, tryMaybe, until, upper, Value)
+import GenericParser.Parser (alphanum, char, current_input, current_position, digit, eof, Error, failure, failureError, hex, ident, identifier, Input, int, integer, item, letter, lookahead, lower, many1, nat, natural, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, space, string, success, symbol, token, try, tryMaybe, until, upper, Value)
--- a/src/GenericParser/EmailAddress.purs
+++ b/src/GenericParser/EmailAddress.purs
@ -11,6 +11,9 @@ import Data.Either (Either(..))
 import Data.Maybe (Maybe(..))
 import Data.String.CodeUnits as CU
 -- ABNF core rules.
 import GenericParser.RFC5234
 import GenericParser.DomainParser.Common (DomainError)
 import GenericParser.DomainParser (sub_eof)
 import GenericParser.Parser (Parser(..)
@ -21,79 +24,6 @@ data EmailError
  = InvalidCharacter
  | InvalidDomain (Maybe DomainError)
 -- | RFC 5234:
 --ALPHA          =  %x41-5A / %x61-7A   ; A-Z / a-z
 --
 --BIT            =  "0" / "1"
 --
 --CHAR           =  %x01-7F
 --                  ; any 7-bit US-ASCII character,
 --                  ;  excluding NUL
 --
 --CR             =  %x0D
 --                  ; carriage return
 --
 --CRLF           =  CR LF
 --                  ; Internet standard newline
 --
 --CTL            =  %x00-1F / %x7F
 --                  ; controls
 --
 --DIGIT          =  %x30-39
 --                  ; 0-9
 --
 --DQUOTE         =  %x22
 --                  ; " (Double Quote)
 --
 --HEXDIG         =  DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
 -- | HTAB: horizontal tab.
 -- | HTAB           =  %x09
 htab :: forall e. Parser e Char
 htab = char '\t'
 --LF             =  %x0A
 --                  ; linefeed
 --
 --LWSP           =  *(WSP / CRLF WSP)
 --                  ; Use of this linear-white-space rule
 --                  ;  permits lines containing only white
 --                  ;  space that are no longer legal in
 --                  ;  mail headers and have caused
 --                  ;  interoperability problems in other
 --                  ;  contexts.
 --                  ; Do not use when defining mail
 --                  ;  headers and use with caution in
 --                  ;  other contexts.
 --
 --OCTET          =  %x00-FF
 --                  ; 8 bits of data
 --
 -- | SP: space.
 -- |
 -- | SP             =  %x20
 sp :: forall e. Parser e Char
 sp = char ' '
 -- | VCHAR          =  %x21-7E
 -- |
 -- | Visible printing characters.
 vchar :: forall e. Parser e Char
 vchar = sat (\x -> between 33 126 $ C.toCharCode x)
 -- | WSP: white space.
 -- |
 -- | WSP            =  SP / HTAB
 wsp :: forall e. Parser e Char
 wsp = sp <|> htab
 crlf :: forall e. Parser e Unit
 crlf = do _ <- char '\r'
          _ <- char '\n'
          pure unit
 -- | obs-FWS         =   1*WSP *(CRLF 1*WSP)
 -- |
 -- | Obsolete FWS.
--- a/src/GenericParser/IPAddress.purs
+++ b/src/GenericParser/IPAddress.purs
@ -13,8 +13,8 @@ import GenericParser.Parser (Parser(..)
                            , current_position
                            , string
                            , many1, lookahead
-                            , sat, char, nat)
+                            , char, nat, hex)
-import GenericParser.BaseFunctions (repeat, isHexaDecimal)
+import GenericParser.BaseFunctions (repeat)
 data IPv6Error
  = InvalidCharacter
@ -23,9 +23,6 @@ data IPv6Error
  | TooManyChunks
  | IPv6UnrelevantShortRepresentation
 hex :: forall e. Parser e Char
 hex = sat isHexaDecimal
 -- | `ipv6_chunk` parses just a group of hexadecimal characters.
 -- | Return an error (TooManyHexaDecimalCharacters) in case the group has more than 4 characters.
 ipv6_chunk :: Parser IPv6Error String
--- a/src/GenericParser/Parser.purs
+++ b/src/GenericParser/Parser.purs
@ -12,7 +12,7 @@ import Data.Maybe (Maybe(..), maybe)
 import Data.String as S
 import Data.String.CodeUnits (toCharArray, fromCharArray, singleton)
-import GenericParser.BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper)
+import GenericParser.BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper, isHexaDecimal)
 type Position = Int
 type PositionString = { string :: String, position :: Position }
@ -208,6 +208,9 @@ integer = token int
 symbol :: forall e. String -> Parser e String
 symbol xs = token (string xs)
 hex :: forall e. Parser e Char
 hex = sat isHexaDecimal
 eof :: forall e. Parser e Unit
 eof = Parser \input -> case S.length input.string of
  0 -> success input unit
--- a/src/GenericParser/RFC5234.purs
+++ b/src/GenericParser/RFC5234.purs
@ -0,0 +1,95 @@
 -- | `RFC5234`, Augmented BNF notation, explains the syntax specification found in many RFCs.
 -- | This module implements core rules found in appendix B.1.
 module GenericParser.RFC5234 where
 import Prelude (Unit, unit, bind, pure, ($), (<>), (==), (||), between, void)
 import Control.Alt ((<|>))
 import Data.Array as A
 import Data.Char as C
 import Data.Either (Either(..))
 import Data.Maybe (Maybe(..))
 import Data.String.CodeUnits as CU
 import GenericParser.BaseFunctions (repeat, isHexaDecimal)
 import GenericParser.Parser (Parser(..)
                            , sat, char , digit , letter, item, many1, tryMaybe
                            , current_input, failureError, parse, rollback, until)
 -- | RFC 5234:
 --ALPHA          =  %x41-5A / %x61-7A   ; A-Z / a-z
 --
 --BIT            =  "0" / "1"
 --
 --CHAR           =  %x01-7F
 --                  ; any 7-bit US-ASCII character,
 --                  ;  excluding NUL
 --
 --CR             =  %x0D
 --                  ; carriage return
 --
 --CRLF           =  CR LF
 --                  ; Internet standard newline
 --
 --CTL            =  %x00-1F / %x7F
 --                  ; controls
 --
 --DIGIT          =  %x30-39
 --                  ; 0-9
 --
 --DQUOTE         =  %x22
 --                  ; " (Double Quote)
 -- | HEXDIG: hexadecimal.
 -- |
 -- | HEXDIG         =  DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
 hexdig :: forall e. Parser e Char
 hexdig = sat isHexaDecimal
 -- | HTAB: horizontal tab.
 -- | HTAB           =  %x09
 htab :: forall e. Parser e Char
 htab = char '\t'
 --LF             =  %x0A
 --                  ; linefeed
 --
 --LWSP           =  *(WSP / CRLF WSP)
 --                  ; Use of this linear-white-space rule
 --                  ;  permits lines containing only white
 --                  ;  space that are no longer legal in
 --                  ;  mail headers and have caused
 --                  ;  interoperability problems in other
 --                  ;  contexts.
 --                  ; Do not use when defining mail
 --                  ;  headers and use with caution in
 --                  ;  other contexts.
 --
 --OCTET          =  %x00-FF
 --                  ; 8 bits of data
 --
 -- | SP: space.
 -- |
 -- | SP             =  %x20
 sp :: forall e. Parser e Char
 sp = char ' '
 -- | VCHAR          =  %x21-7E
 -- |
 -- | Visible printing characters.
 vchar :: forall e. Parser e Char
 vchar = sat (\x -> between 33 126 $ C.toCharCode x)
 -- | WSP: white space.
 -- |
 -- | WSP            =  SP / HTAB
 wsp :: forall e. Parser e Char
 wsp = sp <|> htab
 crlf :: forall e. Parser e Unit
 crlf = do _ <- char '\r'
          _ <- char '\n'
          pure unit