WIP RFC5234
This commit is contained in:
parent
ef336bd2de
commit
9868002114
@ -6,4 +6,4 @@ module GenericParser
|
||||
|
||||
import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size)
|
||||
import GenericParser.DomainParser (domain, label, subdomain, sub_eof)
|
||||
import GenericParser.Parser (alphanum, char, current_input, current_position, digit, eof, Error, failure, failureError, ident, identifier, Input, int, integer, item, letter, lookahead, lower, many1, nat, natural, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, space, string, success, symbol, token, try, tryMaybe, until, upper, Value)
|
||||
import GenericParser.Parser (alphanum, char, current_input, current_position, digit, eof, Error, failure, failureError, hex, ident, identifier, Input, int, integer, item, letter, lookahead, lower, many1, nat, natural, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, space, string, success, symbol, token, try, tryMaybe, until, upper, Value)
|
||||
|
@ -11,6 +11,9 @@ import Data.Either (Either(..))
|
||||
import Data.Maybe (Maybe(..))
|
||||
import Data.String.CodeUnits as CU
|
||||
|
||||
-- ABNF core rules.
|
||||
import GenericParser.RFC5234
|
||||
|
||||
import GenericParser.DomainParser.Common (DomainError)
|
||||
import GenericParser.DomainParser (sub_eof)
|
||||
import GenericParser.Parser (Parser(..)
|
||||
@ -21,79 +24,6 @@ data EmailError
|
||||
= InvalidCharacter
|
||||
| InvalidDomain (Maybe DomainError)
|
||||
|
||||
-- | RFC 5234:
|
||||
|
||||
--ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
|
||||
--
|
||||
--BIT = "0" / "1"
|
||||
--
|
||||
--CHAR = %x01-7F
|
||||
-- ; any 7-bit US-ASCII character,
|
||||
-- ; excluding NUL
|
||||
--
|
||||
--CR = %x0D
|
||||
-- ; carriage return
|
||||
--
|
||||
--CRLF = CR LF
|
||||
-- ; Internet standard newline
|
||||
--
|
||||
--CTL = %x00-1F / %x7F
|
||||
-- ; controls
|
||||
--
|
||||
--DIGIT = %x30-39
|
||||
-- ; 0-9
|
||||
--
|
||||
--DQUOTE = %x22
|
||||
-- ; " (Double Quote)
|
||||
--
|
||||
--HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
|
||||
|
||||
-- | HTAB: horizontal tab.
|
||||
-- | HTAB = %x09
|
||||
htab :: forall e. Parser e Char
|
||||
htab = char '\t'
|
||||
|
||||
--LF = %x0A
|
||||
-- ; linefeed
|
||||
--
|
||||
--LWSP = *(WSP / CRLF WSP)
|
||||
-- ; Use of this linear-white-space rule
|
||||
-- ; permits lines containing only white
|
||||
-- ; space that are no longer legal in
|
||||
-- ; mail headers and have caused
|
||||
-- ; interoperability problems in other
|
||||
-- ; contexts.
|
||||
-- ; Do not use when defining mail
|
||||
-- ; headers and use with caution in
|
||||
-- ; other contexts.
|
||||
--
|
||||
--OCTET = %x00-FF
|
||||
-- ; 8 bits of data
|
||||
--
|
||||
-- | SP: space.
|
||||
-- |
|
||||
-- | SP = %x20
|
||||
sp :: forall e. Parser e Char
|
||||
sp = char ' '
|
||||
|
||||
-- | VCHAR = %x21-7E
|
||||
-- |
|
||||
-- | Visible printing characters.
|
||||
vchar :: forall e. Parser e Char
|
||||
vchar = sat (\x -> between 33 126 $ C.toCharCode x)
|
||||
|
||||
-- | WSP: white space.
|
||||
-- |
|
||||
-- | WSP = SP / HTAB
|
||||
wsp :: forall e. Parser e Char
|
||||
wsp = sp <|> htab
|
||||
|
||||
|
||||
crlf :: forall e. Parser e Unit
|
||||
crlf = do _ <- char '\r'
|
||||
_ <- char '\n'
|
||||
pure unit
|
||||
|
||||
-- | obs-FWS = 1*WSP *(CRLF 1*WSP)
|
||||
-- |
|
||||
-- | Obsolete FWS.
|
||||
|
@ -13,8 +13,8 @@ import GenericParser.Parser (Parser(..)
|
||||
, current_position
|
||||
, string
|
||||
, many1, lookahead
|
||||
, sat, char, nat)
|
||||
import GenericParser.BaseFunctions (repeat, isHexaDecimal)
|
||||
, char, nat, hex)
|
||||
import GenericParser.BaseFunctions (repeat)
|
||||
|
||||
data IPv6Error
|
||||
= InvalidCharacter
|
||||
@ -23,9 +23,6 @@ data IPv6Error
|
||||
| TooManyChunks
|
||||
| IPv6UnrelevantShortRepresentation
|
||||
|
||||
hex :: forall e. Parser e Char
|
||||
hex = sat isHexaDecimal
|
||||
|
||||
-- | `ipv6_chunk` parses just a group of hexadecimal characters.
|
||||
-- | Return an error (TooManyHexaDecimalCharacters) in case the group has more than 4 characters.
|
||||
ipv6_chunk :: Parser IPv6Error String
|
||||
|
@ -12,7 +12,7 @@ import Data.Maybe (Maybe(..), maybe)
|
||||
import Data.String as S
|
||||
import Data.String.CodeUnits (toCharArray, fromCharArray, singleton)
|
||||
|
||||
import GenericParser.BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper)
|
||||
import GenericParser.BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper, isHexaDecimal)
|
||||
|
||||
type Position = Int
|
||||
type PositionString = { string :: String, position :: Position }
|
||||
@ -208,6 +208,9 @@ integer = token int
|
||||
symbol :: forall e. String -> Parser e String
|
||||
symbol xs = token (string xs)
|
||||
|
||||
hex :: forall e. Parser e Char
|
||||
hex = sat isHexaDecimal
|
||||
|
||||
eof :: forall e. Parser e Unit
|
||||
eof = Parser \input -> case S.length input.string of
|
||||
0 -> success input unit
|
||||
|
95
src/GenericParser/RFC5234.purs
Normal file
95
src/GenericParser/RFC5234.purs
Normal file
@ -0,0 +1,95 @@
|
||||
-- | `RFC5234`, Augmented BNF notation, explains the syntax specification found in many RFCs.
|
||||
-- | This module implements core rules found in appendix B.1.
|
||||
module GenericParser.RFC5234 where
|
||||
|
||||
import Prelude (Unit, unit, bind, pure, ($), (<>), (==), (||), between, void)
|
||||
|
||||
import Control.Alt ((<|>))
|
||||
import Data.Array as A
|
||||
import Data.Char as C
|
||||
import Data.Either (Either(..))
|
||||
import Data.Maybe (Maybe(..))
|
||||
import Data.String.CodeUnits as CU
|
||||
|
||||
import GenericParser.BaseFunctions (repeat, isHexaDecimal)
|
||||
|
||||
import GenericParser.Parser (Parser(..)
|
||||
, sat, char , digit , letter, item, many1, tryMaybe
|
||||
, current_input, failureError, parse, rollback, until)
|
||||
|
||||
-- | RFC 5234:
|
||||
|
||||
--ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
|
||||
--
|
||||
--BIT = "0" / "1"
|
||||
--
|
||||
--CHAR = %x01-7F
|
||||
-- ; any 7-bit US-ASCII character,
|
||||
-- ; excluding NUL
|
||||
--
|
||||
--CR = %x0D
|
||||
-- ; carriage return
|
||||
--
|
||||
--CRLF = CR LF
|
||||
-- ; Internet standard newline
|
||||
--
|
||||
--CTL = %x00-1F / %x7F
|
||||
-- ; controls
|
||||
--
|
||||
--DIGIT = %x30-39
|
||||
-- ; 0-9
|
||||
--
|
||||
--DQUOTE = %x22
|
||||
-- ; " (Double Quote)
|
||||
|
||||
-- | HEXDIG: hexadecimal.
|
||||
-- |
|
||||
-- | HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
|
||||
hexdig :: forall e. Parser e Char
|
||||
hexdig = sat isHexaDecimal
|
||||
|
||||
-- | HTAB: horizontal tab.
|
||||
-- | HTAB = %x09
|
||||
htab :: forall e. Parser e Char
|
||||
htab = char '\t'
|
||||
|
||||
--LF = %x0A
|
||||
-- ; linefeed
|
||||
--
|
||||
--LWSP = *(WSP / CRLF WSP)
|
||||
-- ; Use of this linear-white-space rule
|
||||
-- ; permits lines containing only white
|
||||
-- ; space that are no longer legal in
|
||||
-- ; mail headers and have caused
|
||||
-- ; interoperability problems in other
|
||||
-- ; contexts.
|
||||
-- ; Do not use when defining mail
|
||||
-- ; headers and use with caution in
|
||||
-- ; other contexts.
|
||||
--
|
||||
--OCTET = %x00-FF
|
||||
-- ; 8 bits of data
|
||||
--
|
||||
-- | SP: space.
|
||||
-- |
|
||||
-- | SP = %x20
|
||||
sp :: forall e. Parser e Char
|
||||
sp = char ' '
|
||||
|
||||
-- | VCHAR = %x21-7E
|
||||
-- |
|
||||
-- | Visible printing characters.
|
||||
vchar :: forall e. Parser e Char
|
||||
vchar = sat (\x -> between 33 126 $ C.toCharCode x)
|
||||
|
||||
-- | WSP: white space.
|
||||
-- |
|
||||
-- | WSP = SP / HTAB
|
||||
wsp :: forall e. Parser e Char
|
||||
wsp = sp <|> htab
|
||||
|
||||
|
||||
crlf :: forall e. Parser e Unit
|
||||
crlf = do _ <- char '\r'
|
||||
_ <- char '\n'
|
||||
pure unit
|
Loading…
Reference in New Issue
Block a user