WIP RFC5234
This commit is contained in:
parent
ef336bd2de
commit
9868002114
@ -6,4 +6,4 @@ module GenericParser
|
|||||||
|
|
||||||
import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size)
|
import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size)
|
||||||
import GenericParser.DomainParser (domain, label, subdomain, sub_eof)
|
import GenericParser.DomainParser (domain, label, subdomain, sub_eof)
|
||||||
import GenericParser.Parser (alphanum, char, current_input, current_position, digit, eof, Error, failure, failureError, ident, identifier, Input, int, integer, item, letter, lookahead, lower, many1, nat, natural, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, space, string, success, symbol, token, try, tryMaybe, until, upper, Value)
|
import GenericParser.Parser (alphanum, char, current_input, current_position, digit, eof, Error, failure, failureError, hex, ident, identifier, Input, int, integer, item, letter, lookahead, lower, many1, nat, natural, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, space, string, success, symbol, token, try, tryMaybe, until, upper, Value)
|
||||||
|
@ -11,6 +11,9 @@ import Data.Either (Either(..))
|
|||||||
import Data.Maybe (Maybe(..))
|
import Data.Maybe (Maybe(..))
|
||||||
import Data.String.CodeUnits as CU
|
import Data.String.CodeUnits as CU
|
||||||
|
|
||||||
|
-- ABNF core rules.
|
||||||
|
import GenericParser.RFC5234
|
||||||
|
|
||||||
import GenericParser.DomainParser.Common (DomainError)
|
import GenericParser.DomainParser.Common (DomainError)
|
||||||
import GenericParser.DomainParser (sub_eof)
|
import GenericParser.DomainParser (sub_eof)
|
||||||
import GenericParser.Parser (Parser(..)
|
import GenericParser.Parser (Parser(..)
|
||||||
@ -21,79 +24,6 @@ data EmailError
|
|||||||
= InvalidCharacter
|
= InvalidCharacter
|
||||||
| InvalidDomain (Maybe DomainError)
|
| InvalidDomain (Maybe DomainError)
|
||||||
|
|
||||||
-- | RFC 5234:
|
|
||||||
|
|
||||||
--ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
|
|
||||||
--
|
|
||||||
--BIT = "0" / "1"
|
|
||||||
--
|
|
||||||
--CHAR = %x01-7F
|
|
||||||
-- ; any 7-bit US-ASCII character,
|
|
||||||
-- ; excluding NUL
|
|
||||||
--
|
|
||||||
--CR = %x0D
|
|
||||||
-- ; carriage return
|
|
||||||
--
|
|
||||||
--CRLF = CR LF
|
|
||||||
-- ; Internet standard newline
|
|
||||||
--
|
|
||||||
--CTL = %x00-1F / %x7F
|
|
||||||
-- ; controls
|
|
||||||
--
|
|
||||||
--DIGIT = %x30-39
|
|
||||||
-- ; 0-9
|
|
||||||
--
|
|
||||||
--DQUOTE = %x22
|
|
||||||
-- ; " (Double Quote)
|
|
||||||
--
|
|
||||||
--HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
|
|
||||||
|
|
||||||
-- | HTAB: horizontal tab.
|
|
||||||
-- | HTAB = %x09
|
|
||||||
htab :: forall e. Parser e Char
|
|
||||||
htab = char '\t'
|
|
||||||
|
|
||||||
--LF = %x0A
|
|
||||||
-- ; linefeed
|
|
||||||
--
|
|
||||||
--LWSP = *(WSP / CRLF WSP)
|
|
||||||
-- ; Use of this linear-white-space rule
|
|
||||||
-- ; permits lines containing only white
|
|
||||||
-- ; space that are no longer legal in
|
|
||||||
-- ; mail headers and have caused
|
|
||||||
-- ; interoperability problems in other
|
|
||||||
-- ; contexts.
|
|
||||||
-- ; Do not use when defining mail
|
|
||||||
-- ; headers and use with caution in
|
|
||||||
-- ; other contexts.
|
|
||||||
--
|
|
||||||
--OCTET = %x00-FF
|
|
||||||
-- ; 8 bits of data
|
|
||||||
--
|
|
||||||
-- | SP: space.
|
|
||||||
-- |
|
|
||||||
-- | SP = %x20
|
|
||||||
sp :: forall e. Parser e Char
|
|
||||||
sp = char ' '
|
|
||||||
|
|
||||||
-- | VCHAR = %x21-7E
|
|
||||||
-- |
|
|
||||||
-- | Visible printing characters.
|
|
||||||
vchar :: forall e. Parser e Char
|
|
||||||
vchar = sat (\x -> between 33 126 $ C.toCharCode x)
|
|
||||||
|
|
||||||
-- | WSP: white space.
|
|
||||||
-- |
|
|
||||||
-- | WSP = SP / HTAB
|
|
||||||
wsp :: forall e. Parser e Char
|
|
||||||
wsp = sp <|> htab
|
|
||||||
|
|
||||||
|
|
||||||
crlf :: forall e. Parser e Unit
|
|
||||||
crlf = do _ <- char '\r'
|
|
||||||
_ <- char '\n'
|
|
||||||
pure unit
|
|
||||||
|
|
||||||
-- | obs-FWS = 1*WSP *(CRLF 1*WSP)
|
-- | obs-FWS = 1*WSP *(CRLF 1*WSP)
|
||||||
-- |
|
-- |
|
||||||
-- | Obsolete FWS.
|
-- | Obsolete FWS.
|
||||||
|
@ -13,8 +13,8 @@ import GenericParser.Parser (Parser(..)
|
|||||||
, current_position
|
, current_position
|
||||||
, string
|
, string
|
||||||
, many1, lookahead
|
, many1, lookahead
|
||||||
, sat, char, nat)
|
, char, nat, hex)
|
||||||
import GenericParser.BaseFunctions (repeat, isHexaDecimal)
|
import GenericParser.BaseFunctions (repeat)
|
||||||
|
|
||||||
data IPv6Error
|
data IPv6Error
|
||||||
= InvalidCharacter
|
= InvalidCharacter
|
||||||
@ -23,9 +23,6 @@ data IPv6Error
|
|||||||
| TooManyChunks
|
| TooManyChunks
|
||||||
| IPv6UnrelevantShortRepresentation
|
| IPv6UnrelevantShortRepresentation
|
||||||
|
|
||||||
hex :: forall e. Parser e Char
|
|
||||||
hex = sat isHexaDecimal
|
|
||||||
|
|
||||||
-- | `ipv6_chunk` parses just a group of hexadecimal characters.
|
-- | `ipv6_chunk` parses just a group of hexadecimal characters.
|
||||||
-- | Return an error (TooManyHexaDecimalCharacters) in case the group has more than 4 characters.
|
-- | Return an error (TooManyHexaDecimalCharacters) in case the group has more than 4 characters.
|
||||||
ipv6_chunk :: Parser IPv6Error String
|
ipv6_chunk :: Parser IPv6Error String
|
||||||
|
@ -12,7 +12,7 @@ import Data.Maybe (Maybe(..), maybe)
|
|||||||
import Data.String as S
|
import Data.String as S
|
||||||
import Data.String.CodeUnits (toCharArray, fromCharArray, singleton)
|
import Data.String.CodeUnits (toCharArray, fromCharArray, singleton)
|
||||||
|
|
||||||
import GenericParser.BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper)
|
import GenericParser.BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper, isHexaDecimal)
|
||||||
|
|
||||||
type Position = Int
|
type Position = Int
|
||||||
type PositionString = { string :: String, position :: Position }
|
type PositionString = { string :: String, position :: Position }
|
||||||
@ -208,6 +208,9 @@ integer = token int
|
|||||||
symbol :: forall e. String -> Parser e String
|
symbol :: forall e. String -> Parser e String
|
||||||
symbol xs = token (string xs)
|
symbol xs = token (string xs)
|
||||||
|
|
||||||
|
hex :: forall e. Parser e Char
|
||||||
|
hex = sat isHexaDecimal
|
||||||
|
|
||||||
eof :: forall e. Parser e Unit
|
eof :: forall e. Parser e Unit
|
||||||
eof = Parser \input -> case S.length input.string of
|
eof = Parser \input -> case S.length input.string of
|
||||||
0 -> success input unit
|
0 -> success input unit
|
||||||
|
95
src/GenericParser/RFC5234.purs
Normal file
95
src/GenericParser/RFC5234.purs
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
-- | `RFC5234`, Augmented BNF notation, explains the syntax specification found in many RFCs.
|
||||||
|
-- | This module implements core rules found in appendix B.1.
|
||||||
|
module GenericParser.RFC5234 where
|
||||||
|
|
||||||
|
import Prelude (Unit, unit, bind, pure, ($), (<>), (==), (||), between, void)
|
||||||
|
|
||||||
|
import Control.Alt ((<|>))
|
||||||
|
import Data.Array as A
|
||||||
|
import Data.Char as C
|
||||||
|
import Data.Either (Either(..))
|
||||||
|
import Data.Maybe (Maybe(..))
|
||||||
|
import Data.String.CodeUnits as CU
|
||||||
|
|
||||||
|
import GenericParser.BaseFunctions (repeat, isHexaDecimal)
|
||||||
|
|
||||||
|
import GenericParser.Parser (Parser(..)
|
||||||
|
, sat, char , digit , letter, item, many1, tryMaybe
|
||||||
|
, current_input, failureError, parse, rollback, until)
|
||||||
|
|
||||||
|
-- | RFC 5234:
|
||||||
|
|
||||||
|
--ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
|
||||||
|
--
|
||||||
|
--BIT = "0" / "1"
|
||||||
|
--
|
||||||
|
--CHAR = %x01-7F
|
||||||
|
-- ; any 7-bit US-ASCII character,
|
||||||
|
-- ; excluding NUL
|
||||||
|
--
|
||||||
|
--CR = %x0D
|
||||||
|
-- ; carriage return
|
||||||
|
--
|
||||||
|
--CRLF = CR LF
|
||||||
|
-- ; Internet standard newline
|
||||||
|
--
|
||||||
|
--CTL = %x00-1F / %x7F
|
||||||
|
-- ; controls
|
||||||
|
--
|
||||||
|
--DIGIT = %x30-39
|
||||||
|
-- ; 0-9
|
||||||
|
--
|
||||||
|
--DQUOTE = %x22
|
||||||
|
-- ; " (Double Quote)
|
||||||
|
|
||||||
|
-- | HEXDIG: hexadecimal.
|
||||||
|
-- |
|
||||||
|
-- | HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
|
||||||
|
hexdig :: forall e. Parser e Char
|
||||||
|
hexdig = sat isHexaDecimal
|
||||||
|
|
||||||
|
-- | HTAB: horizontal tab.
|
||||||
|
-- | HTAB = %x09
|
||||||
|
htab :: forall e. Parser e Char
|
||||||
|
htab = char '\t'
|
||||||
|
|
||||||
|
--LF = %x0A
|
||||||
|
-- ; linefeed
|
||||||
|
--
|
||||||
|
--LWSP = *(WSP / CRLF WSP)
|
||||||
|
-- ; Use of this linear-white-space rule
|
||||||
|
-- ; permits lines containing only white
|
||||||
|
-- ; space that are no longer legal in
|
||||||
|
-- ; mail headers and have caused
|
||||||
|
-- ; interoperability problems in other
|
||||||
|
-- ; contexts.
|
||||||
|
-- ; Do not use when defining mail
|
||||||
|
-- ; headers and use with caution in
|
||||||
|
-- ; other contexts.
|
||||||
|
--
|
||||||
|
--OCTET = %x00-FF
|
||||||
|
-- ; 8 bits of data
|
||||||
|
--
|
||||||
|
-- | SP: space.
|
||||||
|
-- |
|
||||||
|
-- | SP = %x20
|
||||||
|
sp :: forall e. Parser e Char
|
||||||
|
sp = char ' '
|
||||||
|
|
||||||
|
-- | VCHAR = %x21-7E
|
||||||
|
-- |
|
||||||
|
-- | Visible printing characters.
|
||||||
|
vchar :: forall e. Parser e Char
|
||||||
|
vchar = sat (\x -> between 33 126 $ C.toCharCode x)
|
||||||
|
|
||||||
|
-- | WSP: white space.
|
||||||
|
-- |
|
||||||
|
-- | WSP = SP / HTAB
|
||||||
|
wsp :: forall e. Parser e Char
|
||||||
|
wsp = sp <|> htab
|
||||||
|
|
||||||
|
|
||||||
|
crlf :: forall e. Parser e Unit
|
||||||
|
crlf = do _ <- char '\r'
|
||||||
|
_ <- char '\n'
|
||||||
|
pure unit
|
Loading…
Reference in New Issue
Block a user