DomainParser
parent
f81449c100
commit
016f0e03c5
|
@ -0,0 +1,91 @@
|
||||||
|
-- | `DomainParser` is a simple parser for domain names as described in RFC 1035.
|
||||||
|
module DomainParser where
|
||||||
|
|
||||||
|
import Prelude (bind, discard, pure, show, ($), (+), (<>), (>), (-))
|
||||||
|
|
||||||
|
import Control.Alt ((<|>))
|
||||||
|
import Control.Lazy (defer)
|
||||||
|
import Data.Array (many, length, snoc)
|
||||||
|
import Data.Array.NonEmpty as NonEmpty
|
||||||
|
import Data.Either (Either(..))
|
||||||
|
import Data.Foldable (fold, foldl)
|
||||||
|
import Data.Maybe (Maybe(..))
|
||||||
|
import Data.String as S -- length
|
||||||
|
import Data.String.CodeUnits as CU
|
||||||
|
-- import Data.String.Regex as R
|
||||||
|
-- import Data.String.Regex.Flags as RF
|
||||||
|
import Data.Tuple (Tuple(..))
|
||||||
|
import Parsing
|
||||||
|
import Parsing.Combinators.Array (many1)
|
||||||
|
import Parsing.Combinators as PC
|
||||||
|
import Parsing (Parser, runParser)
|
||||||
|
import Parsing.String
|
||||||
|
import Parsing.String.Basic (alphaNum, letter)
|
||||||
|
import Parsing.String (char, string, eof)
|
||||||
|
|
||||||
|
-- From RFC 1035: <domain> ::= <subdomain> | " "
|
||||||
|
domain :: Parser String String
|
||||||
|
domain = PC.try (string " ") <|> sub_eof
|
||||||
|
|
||||||
|
sub_eof :: Parser String String
|
||||||
|
sub_eof = do
|
||||||
|
sub <- subdomain
|
||||||
|
-- TODO: non standard (RFC 1035).
|
||||||
|
PC.option (PC.try $ char '.')
|
||||||
|
eof
|
||||||
|
if S.length sub > 255
|
||||||
|
then fail $ "domain length is > 255 bytes (" <> show (S.length sub) <> ")"
|
||||||
|
else pure sub
|
||||||
|
|
||||||
|
-- From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
|
||||||
|
subdomain :: Parser String String
|
||||||
|
subdomain = do
|
||||||
|
-- First: read a label. This is bare minimum for a subdomain.
|
||||||
|
lab <- label
|
||||||
|
-- Second: the rest is optional.
|
||||||
|
r <- PC.optionMaybe point_sub
|
||||||
|
case r of
|
||||||
|
Nothing -> pure lab
|
||||||
|
Just sub -> pure $ lab <> sub
|
||||||
|
|
||||||
|
where
|
||||||
|
point_sub :: Parser String String
|
||||||
|
point_sub = do
|
||||||
|
point <- string "."
|
||||||
|
sub <- defer \_ -> subdomain
|
||||||
|
pure $ point <> sub
|
||||||
|
|
||||||
|
-- From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
|
||||||
|
label :: Parser String String
|
||||||
|
label = let_then_str_then_alpha <|> char_to_string letter
|
||||||
|
where
|
||||||
|
let_then_str_then_alpha :: Parser String String
|
||||||
|
let_then_str_then_alpha = do
|
||||||
|
Tuple whole_label last_char <- PC.try do
|
||||||
|
l <- letter
|
||||||
|
s <- ldhstr
|
||||||
|
pure $ Tuple (CU.singleton l <> (CU.fromCharArray $ NonEmpty.toArray s)) (NonEmpty.last s)
|
||||||
|
case runParser (CU.singleton last_char) let_dig of
|
||||||
|
Left _ -> fail $ "Label is wrong: last char is '" <> (CU.singleton last_char) <> "' which isn't an alphanum"
|
||||||
|
Right _ -> if S.length whole_label > 63 -- Remember: we already did read a letter (l).
|
||||||
|
then fail $ "Label is larger than expected (max 63 characters, current: " <> show (S.length whole_label) <> ")"
|
||||||
|
else pure whole_label
|
||||||
|
|
||||||
|
-- From RFC 1035: <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
|
||||||
|
ldhstr :: Parser String (NonEmpty.NonEmptyArray Char)
|
||||||
|
ldhstr = many1 let_dig_hyp
|
||||||
|
|
||||||
|
-- From RFC 1035: <let-dig-hyp> ::= <let-dig> | "-"
|
||||||
|
-- Either a Letter, Digital or an Hyphenation character.
|
||||||
|
let_dig_hyp :: Parser String Char
|
||||||
|
let_dig_hyp = let_dig <|> char '-' <|> fail "invalid character"
|
||||||
|
|
||||||
|
-- From RFC 1035: <let-dig> ::= <letter> | <digit>
|
||||||
|
let_dig :: Parser String Char
|
||||||
|
let_dig = alphaNum
|
||||||
|
|
||||||
|
-- | Converting a single letter parser to a String parser.
|
||||||
|
char_to_string :: Parser String Char -> Parser String String
|
||||||
|
char_to_string p = do
|
||||||
|
character <- p
|
||||||
|
pure $ CU.singleton character
|
Loading…
Reference in New Issue