WIP RFC5234

This commit is contained in:
Philippe Pittoli 2024-01-27 05:02:04 +01:00
parent ef336bd2de
commit 9868002114
5 changed files with 105 additions and 80 deletions

View File

@ -6,4 +6,4 @@ module GenericParser
import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size) import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size)
import GenericParser.DomainParser (domain, label, subdomain, sub_eof) import GenericParser.DomainParser (domain, label, subdomain, sub_eof)
import GenericParser.Parser (alphanum, char, current_input, current_position, digit, eof, Error, failure, failureError, ident, identifier, Input, int, integer, item, letter, lookahead, lower, many1, nat, natural, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, space, string, success, symbol, token, try, tryMaybe, until, upper, Value) import GenericParser.Parser (alphanum, char, current_input, current_position, digit, eof, Error, failure, failureError, hex, ident, identifier, Input, int, integer, item, letter, lookahead, lower, many1, nat, natural, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, space, string, success, symbol, token, try, tryMaybe, until, upper, Value)

View File

@ -11,6 +11,9 @@ import Data.Either (Either(..))
import Data.Maybe (Maybe(..)) import Data.Maybe (Maybe(..))
import Data.String.CodeUnits as CU import Data.String.CodeUnits as CU
-- ABNF core rules.
import GenericParser.RFC5234
import GenericParser.DomainParser.Common (DomainError) import GenericParser.DomainParser.Common (DomainError)
import GenericParser.DomainParser (sub_eof) import GenericParser.DomainParser (sub_eof)
import GenericParser.Parser (Parser(..) import GenericParser.Parser (Parser(..)
@ -21,79 +24,6 @@ data EmailError
= InvalidCharacter = InvalidCharacter
| InvalidDomain (Maybe DomainError) | InvalidDomain (Maybe DomainError)
-- | RFC 5234:
--ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
--
--BIT = "0" / "1"
--
--CHAR = %x01-7F
-- ; any 7-bit US-ASCII character,
-- ; excluding NUL
--
--CR = %x0D
-- ; carriage return
--
--CRLF = CR LF
-- ; Internet standard newline
--
--CTL = %x00-1F / %x7F
-- ; controls
--
--DIGIT = %x30-39
-- ; 0-9
--
--DQUOTE = %x22
-- ; " (Double Quote)
--
--HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
-- | HTAB: horizontal tab.
-- | HTAB = %x09
htab :: forall e. Parser e Char
htab = char '\t'
--LF = %x0A
-- ; linefeed
--
--LWSP = *(WSP / CRLF WSP)
-- ; Use of this linear-white-space rule
-- ; permits lines containing only white
-- ; space that are no longer legal in
-- ; mail headers and have caused
-- ; interoperability problems in other
-- ; contexts.
-- ; Do not use when defining mail
-- ; headers and use with caution in
-- ; other contexts.
--
--OCTET = %x00-FF
-- ; 8 bits of data
--
-- | SP: space.
-- |
-- | SP = %x20
sp :: forall e. Parser e Char
sp = char ' '
-- | VCHAR = %x21-7E
-- |
-- | Visible printing characters.
vchar :: forall e. Parser e Char
vchar = sat (\x -> between 33 126 $ C.toCharCode x)
-- | WSP: white space.
-- |
-- | WSP = SP / HTAB
wsp :: forall e. Parser e Char
wsp = sp <|> htab
crlf :: forall e. Parser e Unit
crlf = do _ <- char '\r'
_ <- char '\n'
pure unit
-- | obs-FWS = 1*WSP *(CRLF 1*WSP) -- | obs-FWS = 1*WSP *(CRLF 1*WSP)
-- | -- |
-- | Obsolete FWS. -- | Obsolete FWS.

View File

@ -13,8 +13,8 @@ import GenericParser.Parser (Parser(..)
, current_position , current_position
, string , string
, many1, lookahead , many1, lookahead
, sat, char, nat) , char, nat, hex)
import GenericParser.BaseFunctions (repeat, isHexaDecimal) import GenericParser.BaseFunctions (repeat)
data IPv6Error data IPv6Error
= InvalidCharacter = InvalidCharacter
@ -23,9 +23,6 @@ data IPv6Error
| TooManyChunks | TooManyChunks
| IPv6UnrelevantShortRepresentation | IPv6UnrelevantShortRepresentation
hex :: forall e. Parser e Char
hex = sat isHexaDecimal
-- | `ipv6_chunk` parses just a group of hexadecimal characters. -- | `ipv6_chunk` parses just a group of hexadecimal characters.
-- | Return an error (TooManyHexaDecimalCharacters) in case the group has more than 4 characters. -- | Return an error (TooManyHexaDecimalCharacters) in case the group has more than 4 characters.
ipv6_chunk :: Parser IPv6Error String ipv6_chunk :: Parser IPv6Error String

View File

@ -12,7 +12,7 @@ import Data.Maybe (Maybe(..), maybe)
import Data.String as S import Data.String as S
import Data.String.CodeUnits (toCharArray, fromCharArray, singleton) import Data.String.CodeUnits (toCharArray, fromCharArray, singleton)
import GenericParser.BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper) import GenericParser.BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper, isHexaDecimal)
type Position = Int type Position = Int
type PositionString = { string :: String, position :: Position } type PositionString = { string :: String, position :: Position }
@ -208,6 +208,9 @@ integer = token int
symbol :: forall e. String -> Parser e String symbol :: forall e. String -> Parser e String
symbol xs = token (string xs) symbol xs = token (string xs)
hex :: forall e. Parser e Char
hex = sat isHexaDecimal
eof :: forall e. Parser e Unit eof :: forall e. Parser e Unit
eof = Parser \input -> case S.length input.string of eof = Parser \input -> case S.length input.string of
0 -> success input unit 0 -> success input unit

View File

@ -0,0 +1,95 @@
-- | `RFC5234`, Augmented BNF notation, explains the syntax specification found in many RFCs.
-- | This module implements core rules found in appendix B.1.
module GenericParser.RFC5234 where
import Prelude (Unit, unit, bind, pure, ($), (<>), (==), (||), between, void)
import Control.Alt ((<|>))
import Data.Array as A
import Data.Char as C
import Data.Either (Either(..))
import Data.Maybe (Maybe(..))
import Data.String.CodeUnits as CU
import GenericParser.BaseFunctions (repeat, isHexaDecimal)
import GenericParser.Parser (Parser(..)
, sat, char , digit , letter, item, many1, tryMaybe
, current_input, failureError, parse, rollback, until)
-- | RFC 5234:
--ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
--
--BIT = "0" / "1"
--
--CHAR = %x01-7F
-- ; any 7-bit US-ASCII character,
-- ; excluding NUL
--
--CR = %x0D
-- ; carriage return
--
--CRLF = CR LF
-- ; Internet standard newline
--
--CTL = %x00-1F / %x7F
-- ; controls
--
--DIGIT = %x30-39
-- ; 0-9
--
--DQUOTE = %x22
-- ; " (Double Quote)
-- | HEXDIG: hexadecimal.
-- |
-- | HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
hexdig :: forall e. Parser e Char
hexdig = sat isHexaDecimal
-- | HTAB: horizontal tab.
-- | HTAB = %x09
htab :: forall e. Parser e Char
htab = char '\t'
--LF = %x0A
-- ; linefeed
--
--LWSP = *(WSP / CRLF WSP)
-- ; Use of this linear-white-space rule
-- ; permits lines containing only white
-- ; space that are no longer legal in
-- ; mail headers and have caused
-- ; interoperability problems in other
-- ; contexts.
-- ; Do not use when defining mail
-- ; headers and use with caution in
-- ; other contexts.
--
--OCTET = %x00-FF
-- ; 8 bits of data
--
-- | SP: space.
-- |
-- | SP = %x20
sp :: forall e. Parser e Char
sp = char ' '
-- | VCHAR = %x21-7E
-- |
-- | Visible printing characters.
vchar :: forall e. Parser e Char
vchar = sat (\x -> between 33 126 $ C.toCharCode x)
-- | WSP: white space.
-- |
-- | WSP = SP / HTAB
wsp :: forall e. Parser e Char
wsp = sp <|> htab
crlf :: forall e. Parser e Unit
crlf = do _ <- char '\r'
_ <- char '\n'
pure unit