parser/src/GenericParser/DomainParserRFC1035.purs

99 lines
3.9 KiB
Text

-- | `DomainParserRFC1035` is a simple parser for domain names as described in RFC 1035.
-- | See `DomainParser` for a more modern domain parser, accepting underscores in labels for example.
module GenericParser.DomainParserRFC1035 where
import Prelude (bind, not, pure, ($), (&&), (*>), (<<<), (<>), (>), (-))
import Control.Alt ((<|>))
import Control.Lazy (defer)
import Data.Array as A
import Data.Either (Either(..))
import Data.Maybe (Maybe(..), maybe)
import Data.String as S
import Data.String.CodeUnits as CU
-- Import all common functions between RFC1035 and modern domain parsing.
import GenericParser.DomainParser.Common (DomainError(..), eof, ldh_str, let_dig, max_domain_length, max_label_length)
import GenericParser.Parser (Parser(..)
, failureError
, current_position
, char, parse, string
, tryMaybe)
import GenericParser.SomeParsers (letter)
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
label :: Parser DomainError String
label = do
pos <- current_position
maybel <- tryMaybe letter
case maybel of
Nothing -> Parser \_ -> failureError pos (Just InvalidCharacter)
Just l -> do
s <- tryMaybe ldh_str
lastpos <- current_position
let labelstr = CU.singleton l <> maybe "" CU.fromCharArray s
if (S.length labelstr > max_label_length)
then Parser \_ -> failureError pos (Just <<< LabelTooLarge $ S.length labelstr)
else if (S.length labelstr > 1 && not (parse_last_char labelstr let_dig))
then Parser \_ -> failureError (lastpos - 1) (Just InvalidCharacter)
else pure labelstr
where
-- Get the last character of a String.
last_char :: String -> Maybe Char
last_char = A.last <<< CU.toCharArray
-- Parse the last character of a String.
parse_last_char :: forall e. String -> Parser e Char -> Boolean
parse_last_char s p = case last_char s of
Nothing -> false
Just c -> case parse p { string: CU.singleton c, position: 0 } of
Left _ -> false
_ -> true
-- | From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
-- | For implementation details, this accepts a final dot "." as a suffix.
subdomain :: Parser DomainError String
subdomain = do
-- First: read a label. This is bare minimum for a subdomain.
lab <- label
point <- tryMaybe $ char '.'
case point of
Nothing -> pure lab
Just _ -> do
upperlabels <- tryMaybe $ defer \_ -> subdomain
case upperlabels of
Nothing -> pure $ lab <> "."
Just l -> pure $ lab <> "." <> l
-- | Test for the domain to be a list of subdomains then an end-of-file.
-- | Said otherwise, the input must only contain a domain (with or without a final dot '.').
sub_eof :: Parser DomainError String
sub_eof = do
sub <- subdomain
_ <- eof -- In case there is still some input, it fails.
pos <- current_position
if S.length sub > max_domain_length
then Parser \_ -> failureError pos (Just <<< DomainTooLarge $ S.length sub)
else pure sub
-- | From RFC 1035: <domain> ::= <subdomain> | " "
-- |
-- | Since RFC 1034 requires to accept '*' as leftmost domain label, the rule should be:
-- | <domain> ::= <subdomain> | "*." <subdomain> | " "
-- |
-- | Accepting an optional '.' at the end of the subdomain doesn't conform
-- | to the (prefered) syntax of a domain as described in RFC 1035.
-- | However, this last '.' character should be acceptable in most applications.
-- | In some cases, a fully qualified domain name (FQDN) such as `example.com.`
-- | has to be differenciated from a "relative" name (www).
-- |
-- | For documentation about wildcards, see RFC 4592.
domain :: Parser DomainError String
domain = (string " " *> eof) <|> wildcard <|> sub_eof
where
wildcard :: Parser DomainError String
wildcard = do
_ <- string "*."
rest <- sub_eof
pure $ "*." <> rest