diff --git a/src/GenericParser.purs b/src/GenericParser.purs index cc1749d..9df9884 100644 --- a/src/GenericParser.purs +++ b/src/GenericParser.purs @@ -6,4 +6,4 @@ module GenericParser import GenericParser.DomainParser.Common (DomainError(..), ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size) import GenericParser.DomainParser (domain, label, subdomain, sub_eof) -import GenericParser.Parser (alphanum, char, current_input, current_position, digit, eof, Error, failure, failureError, ident, identifier, Input, int, integer, item, letter, lookahead, lower, many1, nat, natural, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, space, string, success, symbol, token, try, tryMaybe, until, upper, Value) +import GenericParser.Parser (alphanum, char, current_input, current_position, digit, eof, Error, failure, failureError, hex, ident, identifier, Input, int, integer, item, letter, lookahead, lower, many1, nat, natural, parse, parse_last_char, Parser(..), Position, PositionString, Result, rollback, sat, space, string, success, symbol, token, try, tryMaybe, until, upper, Value) diff --git a/src/GenericParser/EmailAddress.purs b/src/GenericParser/EmailAddress.purs index 331f50d..8d9d676 100644 --- a/src/GenericParser/EmailAddress.purs +++ b/src/GenericParser/EmailAddress.purs @@ -11,6 +11,9 @@ import Data.Either (Either(..)) import Data.Maybe (Maybe(..)) import Data.String.CodeUnits as CU +-- ABNF core rules. +import GenericParser.RFC5234 + import GenericParser.DomainParser.Common (DomainError) import GenericParser.DomainParser (sub_eof) import GenericParser.Parser (Parser(..) @@ -21,79 +24,6 @@ data EmailError = InvalidCharacter | InvalidDomain (Maybe DomainError) --- | RFC 5234: - ---ALPHA = %x41-5A / %x61-7A ; A-Z / a-z --- ---BIT = "0" / "1" --- ---CHAR = %x01-7F --- ; any 7-bit US-ASCII character, --- ; excluding NUL --- ---CR = %x0D --- ; carriage return --- ---CRLF = CR LF --- ; Internet standard newline --- ---CTL = %x00-1F / %x7F --- ; controls --- ---DIGIT = %x30-39 --- ; 0-9 --- ---DQUOTE = %x22 --- ; " (Double Quote) --- ---HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" - --- | HTAB: horizontal tab. --- | HTAB = %x09 -htab :: forall e. Parser e Char -htab = char '\t' - ---LF = %x0A --- ; linefeed --- ---LWSP = *(WSP / CRLF WSP) --- ; Use of this linear-white-space rule --- ; permits lines containing only white --- ; space that are no longer legal in --- ; mail headers and have caused --- ; interoperability problems in other --- ; contexts. --- ; Do not use when defining mail --- ; headers and use with caution in --- ; other contexts. --- ---OCTET = %x00-FF --- ; 8 bits of data --- --- | SP: space. --- | --- | SP = %x20 -sp :: forall e. Parser e Char -sp = char ' ' - --- | VCHAR = %x21-7E --- | --- | Visible printing characters. -vchar :: forall e. Parser e Char -vchar = sat (\x -> between 33 126 $ C.toCharCode x) - --- | WSP: white space. --- | --- | WSP = SP / HTAB -wsp :: forall e. Parser e Char -wsp = sp <|> htab - - -crlf :: forall e. Parser e Unit -crlf = do _ <- char '\r' - _ <- char '\n' - pure unit - -- | obs-FWS = 1*WSP *(CRLF 1*WSP) -- | -- | Obsolete FWS. diff --git a/src/GenericParser/IPAddress.purs b/src/GenericParser/IPAddress.purs index 11fa362..ecc1cd0 100644 --- a/src/GenericParser/IPAddress.purs +++ b/src/GenericParser/IPAddress.purs @@ -13,8 +13,8 @@ import GenericParser.Parser (Parser(..) , current_position , string , many1, lookahead - , sat, char, nat) -import GenericParser.BaseFunctions (repeat, isHexaDecimal) + , char, nat, hex) +import GenericParser.BaseFunctions (repeat) data IPv6Error = InvalidCharacter @@ -23,9 +23,6 @@ data IPv6Error | TooManyChunks | IPv6UnrelevantShortRepresentation -hex :: forall e. Parser e Char -hex = sat isHexaDecimal - -- | `ipv6_chunk` parses just a group of hexadecimal characters. -- | Return an error (TooManyHexaDecimalCharacters) in case the group has more than 4 characters. ipv6_chunk :: Parser IPv6Error String diff --git a/src/GenericParser/Parser.purs b/src/GenericParser/Parser.purs index a088644..57cb3e9 100644 --- a/src/GenericParser/Parser.purs +++ b/src/GenericParser/Parser.purs @@ -12,7 +12,7 @@ import Data.Maybe (Maybe(..), maybe) import Data.String as S import Data.String.CodeUnits (toCharArray, fromCharArray, singleton) -import GenericParser.BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper) +import GenericParser.BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper, isHexaDecimal) type Position = Int type PositionString = { string :: String, position :: Position } @@ -208,6 +208,9 @@ integer = token int symbol :: forall e. String -> Parser e String symbol xs = token (string xs) +hex :: forall e. Parser e Char +hex = sat isHexaDecimal + eof :: forall e. Parser e Unit eof = Parser \input -> case S.length input.string of 0 -> success input unit diff --git a/src/GenericParser/RFC5234.purs b/src/GenericParser/RFC5234.purs new file mode 100644 index 0000000..671e39c --- /dev/null +++ b/src/GenericParser/RFC5234.purs @@ -0,0 +1,95 @@ +-- | `RFC5234`, Augmented BNF notation, explains the syntax specification found in many RFCs. +-- | This module implements core rules found in appendix B.1. +module GenericParser.RFC5234 where + +import Prelude (Unit, unit, bind, pure, ($), (<>), (==), (||), between, void) + +import Control.Alt ((<|>)) +import Data.Array as A +import Data.Char as C +import Data.Either (Either(..)) +import Data.Maybe (Maybe(..)) +import Data.String.CodeUnits as CU + +import GenericParser.BaseFunctions (repeat, isHexaDecimal) + +import GenericParser.Parser (Parser(..) + , sat, char , digit , letter, item, many1, tryMaybe + , current_input, failureError, parse, rollback, until) + +-- | RFC 5234: + +--ALPHA = %x41-5A / %x61-7A ; A-Z / a-z +-- +--BIT = "0" / "1" +-- +--CHAR = %x01-7F +-- ; any 7-bit US-ASCII character, +-- ; excluding NUL +-- +--CR = %x0D +-- ; carriage return +-- +--CRLF = CR LF +-- ; Internet standard newline +-- +--CTL = %x00-1F / %x7F +-- ; controls +-- +--DIGIT = %x30-39 +-- ; 0-9 +-- +--DQUOTE = %x22 +-- ; " (Double Quote) + +-- | HEXDIG: hexadecimal. +-- | +-- | HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" +hexdig :: forall e. Parser e Char +hexdig = sat isHexaDecimal + +-- | HTAB: horizontal tab. +-- | HTAB = %x09 +htab :: forall e. Parser e Char +htab = char '\t' + +--LF = %x0A +-- ; linefeed +-- +--LWSP = *(WSP / CRLF WSP) +-- ; Use of this linear-white-space rule +-- ; permits lines containing only white +-- ; space that are no longer legal in +-- ; mail headers and have caused +-- ; interoperability problems in other +-- ; contexts. +-- ; Do not use when defining mail +-- ; headers and use with caution in +-- ; other contexts. +-- +--OCTET = %x00-FF +-- ; 8 bits of data +-- +-- | SP: space. +-- | +-- | SP = %x20 +sp :: forall e. Parser e Char +sp = char ' ' + +-- | VCHAR = %x21-7E +-- | +-- | Visible printing characters. +vchar :: forall e. Parser e Char +vchar = sat (\x -> between 33 126 $ C.toCharCode x) + +-- | WSP: white space. +-- | +-- | WSP = SP / HTAB +wsp :: forall e. Parser e Char +wsp = sp <|> htab + + +crlf :: forall e. Parser e Unit +crlf = do _ <- char '\r' + _ <- char '\n' + pure unit