DomainParser

This commit is contained in:
Philippe Pittoli 2023-07-23 22:42:43 +02:00
parent f81449c100
commit 016f0e03c5

91
src/DomainParser.purs Normal file
View File

@ -0,0 +1,91 @@
-- | `DomainParser` is a simple parser for domain names as described in RFC 1035.
module DomainParser where
import Prelude (bind, discard, pure, show, ($), (+), (<>), (>), (-))
import Control.Alt ((<|>))
import Control.Lazy (defer)
import Data.Array (many, length, snoc)
import Data.Array.NonEmpty as NonEmpty
import Data.Either (Either(..))
import Data.Foldable (fold, foldl)
import Data.Maybe (Maybe(..))
import Data.String as S -- length
import Data.String.CodeUnits as CU
-- import Data.String.Regex as R
-- import Data.String.Regex.Flags as RF
import Data.Tuple (Tuple(..))
import Parsing
import Parsing.Combinators.Array (many1)
import Parsing.Combinators as PC
import Parsing (Parser, runParser)
import Parsing.String
import Parsing.String.Basic (alphaNum, letter)
import Parsing.String (char, string, eof)
-- From RFC 1035: <domain> ::= <subdomain> | " "
domain :: Parser String String
domain = PC.try (string " ") <|> sub_eof
sub_eof :: Parser String String
sub_eof = do
sub <- subdomain
-- TODO: non standard (RFC 1035).
PC.option (PC.try $ char '.')
eof
if S.length sub > 255
then fail $ "domain length is > 255 bytes (" <> show (S.length sub) <> ")"
else pure sub
-- From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
subdomain :: Parser String String
subdomain = do
-- First: read a label. This is bare minimum for a subdomain.
lab <- label
-- Second: the rest is optional.
r <- PC.optionMaybe point_sub
case r of
Nothing -> pure lab
Just sub -> pure $ lab <> sub
where
point_sub :: Parser String String
point_sub = do
point <- string "."
sub <- defer \_ -> subdomain
pure $ point <> sub
-- From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
label :: Parser String String
label = let_then_str_then_alpha <|> char_to_string letter
where
let_then_str_then_alpha :: Parser String String
let_then_str_then_alpha = do
Tuple whole_label last_char <- PC.try do
l <- letter
s <- ldhstr
pure $ Tuple (CU.singleton l <> (CU.fromCharArray $ NonEmpty.toArray s)) (NonEmpty.last s)
case runParser (CU.singleton last_char) let_dig of
Left _ -> fail $ "Label is wrong: last char is '" <> (CU.singleton last_char) <> "' which isn't an alphanum"
Right _ -> if S.length whole_label > 63 -- Remember: we already did read a letter (l).
then fail $ "Label is larger than expected (max 63 characters, current: " <> show (S.length whole_label) <> ")"
else pure whole_label
-- From RFC 1035: <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
ldhstr :: Parser String (NonEmpty.NonEmptyArray Char)
ldhstr = many1 let_dig_hyp
-- From RFC 1035: <let-dig-hyp> ::= <let-dig> | "-"
-- Either a Letter, Digital or an Hyphenation character.
let_dig_hyp :: Parser String Char
let_dig_hyp = let_dig <|> char '-' <|> fail "invalid character"
-- From RFC 1035: <let-dig> ::= <letter> | <digit>
let_dig :: Parser String Char
let_dig = alphaNum
-- | Converting a single letter parser to a String parser.
char_to_string :: Parser String Char -> Parser String String
char_to_string p = do
character <- p
pure $ CU.singleton character