Code split: DomainParser, DomainParserRFC1035, DomainParser.Common.
This commit is contained in:
parent
22d6386c32
commit
e459de778d
@ -1,7 +1,9 @@
|
||||
module GenericParser
|
||||
( module GenericParser.Parser
|
||||
, module GenericParser.DomainParserRFC1035
|
||||
, module GenericParser.DomainParser.Common
|
||||
, module GenericParser.DomainParser
|
||||
) where
|
||||
|
||||
import GenericParser.DomainParser.Common (DomainError(..), eof, ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size)
|
||||
import GenericParser.DomainParser (domain, label, subdomain, sub_eof)
|
||||
import GenericParser.Parser (alphanum, char, current_position, digit, Error, failure, failureError, ident, identifier, Input, int, integer, item, letter, lower, many1, nat, natural, parse, Parser(..), Position, PositionString, Result, sat, space, string, success, symbol, token, try, tryMaybe, upper, Value)
|
||||
import GenericParser.DomainParserRFC1035 (domain, DomainError(..), eof, label, ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size, subdomain, sub_eof)
|
||||
|
91
src/GenericParser/DomainParser.purs
Normal file
91
src/GenericParser/DomainParser.purs
Normal file
@ -0,0 +1,91 @@
|
||||
-- | `DomainParser` is a parser for modern domain names as seen in practice.
|
||||
-- | See `DomainParserRFC1035` for a domain parser restricted to RFC1035 recommandations.
|
||||
module GenericParser.DomainParser where
|
||||
|
||||
import Prelude (bind, not, pure, ($), (&&), (*>), (<<<), (<>), (>), (-))
|
||||
|
||||
import Control.Alt ((<|>))
|
||||
import Control.Lazy (defer)
|
||||
import Data.Array as A
|
||||
import Data.Either (Either(..))
|
||||
import Data.Maybe (Maybe(..), maybe)
|
||||
import Data.String as S
|
||||
import Data.String.CodeUnits as CU
|
||||
|
||||
-- Import all common functions between RFC1035 and modern domain parsing.
|
||||
import GenericParser.DomainParser.Common (DomainError(..), eof, ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size)
|
||||
|
||||
import GenericParser.Parser (Parser(..)
|
||||
, success, failureError
|
||||
, current_position
|
||||
, alphanum, char, letter, many1, parse, string
|
||||
, try, tryMaybe)
|
||||
|
||||
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
|
||||
-- | In practice, the first character can be an underscore (for example, see `_dmarc.example.com`).
|
||||
label :: Parser DomainError String
|
||||
label = do
|
||||
pos <- current_position
|
||||
maybel <- tryMaybe letter
|
||||
case maybel of
|
||||
Nothing -> Parser \_ -> failureError pos (Just InvalidCharacter)
|
||||
Just l -> do
|
||||
s <- tryMaybe ldh_str
|
||||
lastpos <- current_position
|
||||
let labelstr = CU.singleton l <> maybe "" CU.fromCharArray s
|
||||
if (S.length labelstr > max_label_length)
|
||||
then Parser \_ -> failureError pos (Just <<< LabelTooLarge $ S.length labelstr)
|
||||
else if (S.length labelstr > 1 && not (parse_last_char labelstr let_dig))
|
||||
then Parser \_ -> failureError (lastpos - 1) (Just InvalidCharacter)
|
||||
else pure labelstr
|
||||
where
|
||||
-- Get the last character of a String.
|
||||
last_char :: String -> Maybe Char
|
||||
last_char = A.last <<< CU.toCharArray
|
||||
|
||||
-- Parse the last character of a String.
|
||||
parse_last_char :: forall e. String -> Parser e Char -> Boolean
|
||||
parse_last_char s p = case last_char s of
|
||||
Nothing -> false
|
||||
Just c -> case parse p { string: CU.singleton c, position: 0 } of
|
||||
Left _ -> false
|
||||
_ -> true
|
||||
|
||||
-- | From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
|
||||
subdomain :: Parser DomainError String
|
||||
subdomain = do
|
||||
-- First: read a label. This is bare minimum for a subdomain.
|
||||
lab <- label
|
||||
upperlabels <- try do
|
||||
_ <- char '.'
|
||||
sub <- defer \_ -> subdomain
|
||||
pure sub
|
||||
case upperlabels of
|
||||
Nothing -> pure lab
|
||||
Just l -> pure $ lab <> "." <> l
|
||||
|
||||
-- | Test for the domain to be a list of subdomains then an end-of-file.
|
||||
-- | Said otherwise, the input must only contain a domain (with or without a final dot '.').
|
||||
sub_eof :: Parser DomainError String
|
||||
sub_eof = do
|
||||
sub <- subdomain
|
||||
maybe_final_point <- tryMaybe $ char '.'
|
||||
_ <- eof -- In case there is still some input, it fails.
|
||||
pos <- current_position
|
||||
let parsed_domain = did_we_parse_the_final_point maybe_final_point sub
|
||||
if S.length parsed_domain > max_domain_length
|
||||
then Parser \_ -> failureError pos (Just <<< DomainTooLarge $ S.length parsed_domain)
|
||||
else pure parsed_domain
|
||||
where
|
||||
did_we_parse_the_final_point Nothing sub = sub
|
||||
did_we_parse_the_final_point _ sub = sub <> "."
|
||||
|
||||
-- | From RFC 1035: <domain> ::= <subdomain> | " "
|
||||
-- |
|
||||
-- | Accepting an optional '.' at the end of the subdomain doesn't conform
|
||||
-- | to the (prefered) syntax of a domain as described in RFC 1035.
|
||||
-- | However, this last '.' character should be acceptable in most applications.
|
||||
-- | In some cases, a fully qualified domain name (FQDN) such as `example.com.`
|
||||
-- | has to be differenciated from a "relative" name (www).
|
||||
domain :: Parser DomainError String
|
||||
domain = (string " " *> eof) <|> sub_eof
|
54
src/GenericParser/DomainParser/Common.purs
Normal file
54
src/GenericParser/DomainParser/Common.purs
Normal file
@ -0,0 +1,54 @@
|
||||
-- | `GenericParser.DomainParser.Common` regroups all functions and types that are common between the different domain parsers.
|
||||
module GenericParser.DomainParser.Common where
|
||||
|
||||
import Prelude
|
||||
import Control.Alt ((<|>))
|
||||
import Control.Lazy (defer)
|
||||
import Data.Array as A
|
||||
import Data.Either (Either(..))
|
||||
import Data.Maybe (Maybe(..), maybe)
|
||||
import Data.String as S
|
||||
import Data.String.CodeUnits as CU
|
||||
|
||||
import GenericParser.Parser (Parser(..)
|
||||
, success, failureError
|
||||
, alphanum, char, many1, string )
|
||||
|
||||
type Size = Int
|
||||
-- | `DomainError` expresses all possible errors that can occur while parsing a domain.
|
||||
-- | When an error occurs, the position is given by the Parser along the related `DomainError`.
|
||||
data DomainError
|
||||
= LabelTooLarge Size
|
||||
| DomainTooLarge Size
|
||||
| InvalidCharacter
|
||||
| EOFExpected
|
||||
|
||||
-- | From RFC 1035: <let-dig> ::= <letter> | <digit>
|
||||
let_dig :: forall e. Parser e Char
|
||||
let_dig = alphanum
|
||||
|
||||
-- | From RFC 1035: <let-dig-hyp> ::= <let-dig> | "-"
|
||||
-- | Either a Letter, Digital or an Hyphenation character.
|
||||
let_dig_hyp :: forall e. Parser e Char
|
||||
let_dig_hyp = let_dig <|> char '-'
|
||||
|
||||
-- | From RFC 1035: <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
|
||||
ldh_str :: forall e. Parser e (Array Char)
|
||||
ldh_str = many1 let_dig_hyp
|
||||
|
||||
-- | WARNING: Verify the actual maximum length for a label.
|
||||
-- | Current maximum accepted length for a label is 63.
|
||||
max_label_length :: Int
|
||||
max_label_length = 63
|
||||
|
||||
-- | WARNING: Verify the actual maximum length for a domain.
|
||||
-- | Current maximum accepted length for a domain is 255.
|
||||
max_domain_length :: Int
|
||||
max_domain_length = 255
|
||||
|
||||
-- | Test for the end-of-file (no more input).
|
||||
-- | If not EOF the parser fails (Nothing), otherwise it provides an empty string.
|
||||
eof :: Parser DomainError String
|
||||
eof = Parser \input -> case S.length input.string of
|
||||
0 -> success input ""
|
||||
_ -> failureError input.position (Just EOFExpected)
|
@ -12,44 +12,15 @@ import Data.Maybe (Maybe(..), maybe)
|
||||
import Data.String as S
|
||||
import Data.String.CodeUnits as CU
|
||||
|
||||
-- Import all common functions between RFC1035 and modern domain parsing.
|
||||
import GenericParser.DomainParser.Common (DomainError(..), eof, ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size)
|
||||
|
||||
import GenericParser.Parser (Parser(..)
|
||||
, success, failureError
|
||||
, current_position
|
||||
, alphanum, char, letter, many1, parse, string
|
||||
, try, tryMaybe)
|
||||
|
||||
type Size = Int
|
||||
-- | `DomainError` expresses all possible errors that can occur while parsing a domain.
|
||||
-- | When an error occurs, the position is given by the Parser along the related `DomainError`.
|
||||
data DomainError
|
||||
= LabelTooLarge Size
|
||||
| DomainTooLarge Size
|
||||
| InvalidCharacter
|
||||
| EOFExpected
|
||||
|
||||
-- | From RFC 1035: <let-dig> ::= <letter> | <digit>
|
||||
let_dig :: forall e. Parser e Char
|
||||
let_dig = alphanum
|
||||
|
||||
-- | From RFC 1035: <let-dig-hyp> ::= <let-dig> | "-"
|
||||
-- | Either a Letter, Digital or an Hyphenation character.
|
||||
let_dig_hyp :: forall e. Parser e Char
|
||||
let_dig_hyp = let_dig <|> char '-'
|
||||
|
||||
-- | From RFC 1035: <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
|
||||
ldh_str :: forall e. Parser e (Array Char)
|
||||
ldh_str = many1 let_dig_hyp
|
||||
|
||||
-- | WARNING: Verify the actual maximum length for a label.
|
||||
-- | Current maximum accepted length for a label is 63.
|
||||
max_label_length :: Int
|
||||
max_label_length = 63
|
||||
|
||||
-- | WARNING: Verify the actual maximum length for a domain.
|
||||
-- | Current maximum accepted length for a domain is 255.
|
||||
max_domain_length :: Int
|
||||
max_domain_length = 255
|
||||
|
||||
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
|
||||
label :: Parser DomainError String
|
||||
label = do
|
||||
@ -92,13 +63,6 @@ subdomain = do
|
||||
Nothing -> pure lab
|
||||
Just l -> pure $ lab <> "." <> l
|
||||
|
||||
-- | Test for the end-of-file (no more input).
|
||||
-- | If not EOF the parser fails (Nothing), otherwise it provides an empty string.
|
||||
eof :: Parser DomainError String
|
||||
eof = Parser \input -> case S.length input.string of
|
||||
0 -> success input ""
|
||||
_ -> failureError input.position (Just EOFExpected)
|
||||
|
||||
-- | Test for the domain to be a list of subdomains then an end-of-file.
|
||||
-- | Said otherwise, the input must only contain a domain (with or without a final dot '.').
|
||||
sub_eof :: Parser DomainError String
|
||||
|
@ -1,7 +1,8 @@
|
||||
module Test.Main where
|
||||
|
||||
import GenericParser.Parser (Parser(..))
|
||||
import GenericParser.DomainParserRFC1035 (domain, label, ldh_str, sub_eof, subdomain, DomainError(..))
|
||||
import GenericParser.DomainParser.Common (ldh_str, DomainError(..))
|
||||
import GenericParser.DomainParserRFC1035 (domain, label, sub_eof, subdomain)
|
||||
|
||||
import Prelude (Unit, discard, show, ($), (<>))
|
||||
import Data.Either (Either(..))
|
||||
|
Loading…
Reference in New Issue
Block a user