parser/src/DomainParser.purs

115 lines
4 KiB
Text

-- | `DomainParser` is a simple parser for domain names as described in RFC 1035.
module DomainParser where
import Prelude (class Monoid, bind, mempty, not, pure, ($), (&&), (*>), (<<<), (<>), (==), (>))
import Control.Lazy (defer)
import Data.Maybe (Maybe(..))
import Data.Array as A
import Data.String as S
import Data.Tuple (Tuple(..))
-- import Data.Array.NonEmpty as NonEmpty
import Data.String.CodeUnits as CU
import Control.Alt ((<|>))
import Control.Plus (empty)
import Parser (Parser(..), alphanum, char, letter, many1, parse, string)
-- | From RFC 1035: <let-dig> ::= <letter> | <digit>
let_dig :: Parser Char
let_dig = alphanum
-- | From RFC 1035: <let-dig-hyp> ::= <let-dig> | "-"
-- | Either a Letter, Digital or an Hyphenation character.
let_dig_hyp :: Parser Char
let_dig_hyp = let_dig <|> char '-'
-- | From RFC 1035: <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
ldh_str :: Parser (Array Char)
ldh_str = many1 let_dig_hyp
-- TODO: 63
label_maxsize :: Int
label_maxsize = 7
-- TODO: 255?
max_domain_length :: Int
max_domain_length = 15
last_char :: String -> Maybe Char
last_char = A.last <<< CU.toCharArray
parse_last_char :: String -> Parser Char -> Boolean
parse_last_char s p = case last_char s of
Nothing -> false
Just c -> case parse p (CU.singleton c) of
Nothing -> false
_ -> true
-- | FIXME: This is flawed.
-- | We cannot know if it worked: in case there is a problem with the parser `p`,
-- | the code will "continue to work" but without what's been parsed.
-- | This may sound reasonable but it prevents knowing if a problem actually occured!
-- | We cannot do a `try parser <|> alternative` since it will always work!
try :: forall a. Monoid a => Parser a -> Parser a
try p = Parser p'
where p' str = case parse p str of
Nothing -> Just (Tuple mempty str) -- FIXME! Need a better base structure.
Just x -> pure x
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
label :: Parser String
label = do
l <- letter
s <- try ldh_str
let labelstr = CU.singleton l <> CU.fromCharArray s
if (S.length labelstr > label_maxsize)
then empty
else if (S.length labelstr > 1 && not (parse_last_char labelstr let_dig))
then empty
else pure labelstr
-- | From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
subdomain :: Parser String
subdomain = do
-- First: read a label. This is bare minimum for a subdomain.
lab <- label
upperlabels <- try do
_ <- char '.'
sub <- defer \_ -> subdomain
pure sub
if (S.length upperlabels == 0) -- This is related to the problem of not having a proper base structure.
then pure lab
else pure $ lab <> "." <> upperlabels
-- | Test for the end-of-file (no more input).
-- | If not EOF the parser fails (Nothing), otherwise it provides an empty string.
eof :: Parser String
eof = Parser \str -> case S.length str of
0 -> Just (Tuple "" str)
_ -> Nothing -- this means an error
-- | Test for the domain to be a list of subdomains then an end-of-file.
-- | Said otherwise, the input must only contain a domain (with or without a final dot '.').
sub_eof :: Parser String
sub_eof = do
sub <- subdomain
maybe_final_point <- char '.' <|> pure ' '
_ <- eof -- In case there is still some input, it fails.
let parsed_domain = did_we_parsed_the_final_point maybe_final_point sub
if S.length parsed_domain > max_domain_length
then empty -- TODO: error management
else pure parsed_domain
where
did_we_parsed_the_final_point '.' sub = sub <> "."
did_we_parsed_the_final_point _ sub = sub
-- | From RFC 1035: <domain> ::= <subdomain> | " "
-- |
-- | Accepting an optional '.' at the end of the subdomain doesn't conform
-- | to the (prefered) syntax of a domain as described in RFC 1035.
-- | However, this last '.' character should be acceptable in most applications.
-- | In some cases, a fully qualified domain name (FQDN) such as `example.com.`
-- | has to be differenciated from a "relative" name (www).
domain :: Parser String
domain = (string " " *> eof) <|> sub_eof