halogen-websocket-ipc-playzone/src/DomainParser.purs

95 lines
3.5 KiB
Plaintext
Raw Normal View History

2023-07-23 22:42:43 +02:00
-- | `DomainParser` is a simple parser for domain names as described in RFC 1035.
module DomainParser where
2023-07-24 12:32:57 +02:00
import Prelude (bind, discard, pure, show, ($), (<>), (>))
2023-07-23 22:42:43 +02:00
import Control.Alt ((<|>))
import Control.Lazy (defer)
import Data.Array.NonEmpty as NonEmpty
import Data.Either (Either(..))
import Data.Maybe (Maybe(..))
import Data.String as S -- length
import Data.String.CodeUnits as CU
-- import Data.String.Regex as R
-- import Data.String.Regex.Flags as RF
import Data.Tuple (Tuple(..))
import Parsing.Combinators.Array (many1)
import Parsing.Combinators as PC
2023-07-24 12:32:57 +02:00
import Parsing (Parser, fail, runParser)
2023-07-23 22:42:43 +02:00
import Parsing.String.Basic (alphaNum, letter)
import Parsing.String (char, string, eof)
2023-07-24 15:05:59 +02:00
-- | From RFC 1035: <domain> ::= <subdomain> | " "
-- |
-- | Accepting an optional '.' at the end of the subdomain doesn't conform
-- | to the (prefered) syntax of a domain as described in RFC 1035.
-- | However, this last '.' character should be acceptable in most applications,
-- | specially when an "absolute" name (example.com.) has to be differenciated from a "relative" name (www).
-- |
-- | PS: both "absolute" and "relative" are from filesystem's terminology,
-- | but I assume the reader to be more familiar with file-systems than DNS terminology.
2023-07-23 22:42:43 +02:00
domain :: Parser String String
domain = PC.try (string " ") <|> sub_eof
sub_eof :: Parser String String
sub_eof = do
sub <- subdomain
2023-07-24 12:32:57 +02:00
PC.optional (char '.')
2023-07-23 22:42:43 +02:00
eof
if S.length sub > 255
then fail $ "domain length is > 255 bytes (" <> show (S.length sub) <> ")"
else pure sub
-- From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
subdomain :: Parser String String
subdomain = do
-- First: read a label. This is bare minimum for a subdomain.
lab <- label
-- Second: the rest is optional.
r <- PC.optionMaybe point_sub
case r of
Nothing -> pure lab
Just sub -> pure $ lab <> sub
where
point_sub :: Parser String String
point_sub = do
point <- string "."
sub <- defer \_ -> subdomain
pure $ point <> sub
-- From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
label :: Parser String String
label = let_then_str_then_alpha <|> char_to_string letter
where
let_then_str_then_alpha :: Parser String String
let_then_str_then_alpha = do
Tuple whole_label last_char <- PC.try do
l <- letter
s <- ldhstr
pure $ Tuple (CU.singleton l <> (CU.fromCharArray $ NonEmpty.toArray s)) (NonEmpty.last s)
case runParser (CU.singleton last_char) let_dig of
Left _ -> fail $ "Label is wrong: last char is '" <> (CU.singleton last_char) <> "' which isn't an alphanum"
Right _ -> if S.length whole_label > 63 -- Remember: we already did read a letter (l).
then fail $ "Label is larger than expected (max 63 characters, current: " <> show (S.length whole_label) <> ")"
else pure whole_label
-- From RFC 1035: <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
ldhstr :: Parser String (NonEmpty.NonEmptyArray Char)
ldhstr = many1 let_dig_hyp
-- From RFC 1035: <let-dig-hyp> ::= <let-dig> | "-"
-- Either a Letter, Digital or an Hyphenation character.
let_dig_hyp :: Parser String Char
let_dig_hyp = let_dig <|> char '-' <|> fail "invalid character"
-- From RFC 1035: <let-dig> ::= <letter> | <digit>
let_dig :: Parser String Char
let_dig = alphaNum
-- | Converting a single letter parser to a String parser.
char_to_string :: Parser String Char -> Parser String String
char_to_string p = do
character <- p
pure $ CU.singleton character