WIP RFC5234

This commit is contained in:
Philippe Pittoli 2024-01-27 05:02:04 +01:00
parent ef336bd2de
commit 9868002114
5 changed files with 105 additions and 80 deletions

View File

@ -6,4 +6,4 @@ module GenericParser
import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size)
import GenericParser.DomainParser (domain, label, subdomain, sub_eof)
import GenericParser.Parser (alphanum, char, current_input, current_position, digit, eof, Error, failure, failureError, ident, identifier, Input, int, integer, item, letter, lookahead, lower, many1, nat, natural, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, space, string, success, symbol, token, try, tryMaybe, until, upper, Value)
import GenericParser.Parser (alphanum, char, current_input, current_position, digit, eof, Error, failure, failureError, hex, ident, identifier, Input, int, integer, item, letter, lookahead, lower, many1, nat, natural, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, space, string, success, symbol, token, try, tryMaybe, until, upper, Value)

View File

@ -11,6 +11,9 @@ import Data.Either (Either(..))
import Data.Maybe (Maybe(..))
import Data.String.CodeUnits as CU
-- ABNF core rules.
import GenericParser.RFC5234
import GenericParser.DomainParser.Common (DomainError)
import GenericParser.DomainParser (sub_eof)
import GenericParser.Parser (Parser(..)
@ -21,79 +24,6 @@ data EmailError
= InvalidCharacter
| InvalidDomain (Maybe DomainError)
-- | RFC 5234:
--ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
--
--BIT = "0" / "1"
--
--CHAR = %x01-7F
-- ; any 7-bit US-ASCII character,
-- ; excluding NUL
--
--CR = %x0D
-- ; carriage return
--
--CRLF = CR LF
-- ; Internet standard newline
--
--CTL = %x00-1F / %x7F
-- ; controls
--
--DIGIT = %x30-39
-- ; 0-9
--
--DQUOTE = %x22
-- ; " (Double Quote)
--
--HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
-- | HTAB: horizontal tab.
-- | HTAB = %x09
htab :: forall e. Parser e Char
htab = char '\t'
--LF = %x0A
-- ; linefeed
--
--LWSP = *(WSP / CRLF WSP)
-- ; Use of this linear-white-space rule
-- ; permits lines containing only white
-- ; space that are no longer legal in
-- ; mail headers and have caused
-- ; interoperability problems in other
-- ; contexts.
-- ; Do not use when defining mail
-- ; headers and use with caution in
-- ; other contexts.
--
--OCTET = %x00-FF
-- ; 8 bits of data
--
-- | SP: space.
-- |
-- | SP = %x20
sp :: forall e. Parser e Char
sp = char ' '
-- | VCHAR = %x21-7E
-- |
-- | Visible printing characters.
vchar :: forall e. Parser e Char
vchar = sat (\x -> between 33 126 $ C.toCharCode x)
-- | WSP: white space.
-- |
-- | WSP = SP / HTAB
wsp :: forall e. Parser e Char
wsp = sp <|> htab
crlf :: forall e. Parser e Unit
crlf = do _ <- char '\r'
_ <- char '\n'
pure unit
-- | obs-FWS = 1*WSP *(CRLF 1*WSP)
-- |
-- | Obsolete FWS.

View File

@ -13,8 +13,8 @@ import GenericParser.Parser (Parser(..)
, current_position
, string
, many1, lookahead
, sat, char, nat)
import GenericParser.BaseFunctions (repeat, isHexaDecimal)
, char, nat, hex)
import GenericParser.BaseFunctions (repeat)
data IPv6Error
= InvalidCharacter
@ -23,9 +23,6 @@ data IPv6Error
| TooManyChunks
| IPv6UnrelevantShortRepresentation
hex :: forall e. Parser e Char
hex = sat isHexaDecimal
-- | `ipv6_chunk` parses just a group of hexadecimal characters.
-- | Return an error (TooManyHexaDecimalCharacters) in case the group has more than 4 characters.
ipv6_chunk :: Parser IPv6Error String

View File

@ -12,7 +12,7 @@ import Data.Maybe (Maybe(..), maybe)
import Data.String as S
import Data.String.CodeUnits (toCharArray, fromCharArray, singleton)
import GenericParser.BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper)
import GenericParser.BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper, isHexaDecimal)
type Position = Int
type PositionString = { string :: String, position :: Position }
@ -208,6 +208,9 @@ integer = token int
symbol :: forall e. String -> Parser e String
symbol xs = token (string xs)
hex :: forall e. Parser e Char
hex = sat isHexaDecimal
eof :: forall e. Parser e Unit
eof = Parser \input -> case S.length input.string of
0 -> success input unit

View File

@ -0,0 +1,95 @@
-- | `RFC5234`, Augmented BNF notation, explains the syntax specification found in many RFCs.
-- | This module implements core rules found in appendix B.1.
module GenericParser.RFC5234 where
import Prelude (Unit, unit, bind, pure, ($), (<>), (==), (||), between, void)
import Control.Alt ((<|>))
import Data.Array as A
import Data.Char as C
import Data.Either (Either(..))
import Data.Maybe (Maybe(..))
import Data.String.CodeUnits as CU
import GenericParser.BaseFunctions (repeat, isHexaDecimal)
import GenericParser.Parser (Parser(..)
, sat, char , digit , letter, item, many1, tryMaybe
, current_input, failureError, parse, rollback, until)
-- | RFC 5234:
--ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
--
--BIT = "0" / "1"
--
--CHAR = %x01-7F
-- ; any 7-bit US-ASCII character,
-- ; excluding NUL
--
--CR = %x0D
-- ; carriage return
--
--CRLF = CR LF
-- ; Internet standard newline
--
--CTL = %x00-1F / %x7F
-- ; controls
--
--DIGIT = %x30-39
-- ; 0-9
--
--DQUOTE = %x22
-- ; " (Double Quote)
-- | HEXDIG: hexadecimal.
-- |
-- | HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
hexdig :: forall e. Parser e Char
hexdig = sat isHexaDecimal
-- | HTAB: horizontal tab.
-- | HTAB = %x09
htab :: forall e. Parser e Char
htab = char '\t'
--LF = %x0A
-- ; linefeed
--
--LWSP = *(WSP / CRLF WSP)
-- ; Use of this linear-white-space rule
-- ; permits lines containing only white
-- ; space that are no longer legal in
-- ; mail headers and have caused
-- ; interoperability problems in other
-- ; contexts.
-- ; Do not use when defining mail
-- ; headers and use with caution in
-- ; other contexts.
--
--OCTET = %x00-FF
-- ; 8 bits of data
--
-- | SP: space.
-- |
-- | SP = %x20
sp :: forall e. Parser e Char
sp = char ' '
-- | VCHAR = %x21-7E
-- |
-- | Visible printing characters.
vchar :: forall e. Parser e Char
vchar = sat (\x -> between 33 126 $ C.toCharCode x)
-- | WSP: white space.
-- |
-- | WSP = SP / HTAB
wsp :: forall e. Parser e Char
wsp = sp <|> htab
crlf :: forall e. Parser e Unit
crlf = do _ <- char '\r'
_ <- char '\n'
pure unit