Domain parsing is now almost complete.

This commit is contained in:
Philippe Pittoli 2023-07-23 14:06:09 +02:00
parent f60b1a7568
commit 3314add2fb

View File

@ -5,13 +5,13 @@ import Prelude
import Control.Alt ((<|>))
import Control.Lazy (defer)
import Data.Array (many, snoc, last)
import Data.Array (many, length, snoc, last)
import Data.Either (Either(..))
import Data.Foldable (fold, foldl)
import Data.Maybe (Maybe(..))
import Data.Array.NonEmpty as NonEmpty
import Data.String.CodeUnits as CU
-- import Data.String (joinWith)
import Data.String as S -- length
-- import Data.String.Regex as R
-- import Data.String.Regex.Flags as RF
import Data.Tuple (Tuple(..))
@ -65,9 +65,18 @@ render state
[ Bulma.hero_danger "A simple input" "Nothing much to see"
, Bulma.section_small $
[ h1 "Examples of domain parsing in Purescript"
] <> test_domains [ "ex.net", "e-x.net", "e-.net", "-x.net", "truc-blah.example.com", "te.s-t.net", "example.com" ]
] <> test_domains [ "ex.net"
, "e-x.net"
, "way-too-long--way-too-long--way-too-long--way-too-long--way-too-long.net"
, "way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.net"
, "e-.net"
, "-x.net"
, "truc-blah.example.com"
, "te.s-t.net"
, "example.com"
]
, Bulma.section_small [ render_stuff ]
]
]
where
-- Some helpers.
title = Bulma.h3
@ -94,14 +103,14 @@ render state
-- test_domains :: Array String -> _
test_domains doms = fold $ map tests_on_domain doms
-- tests_on_domain :: String -> _
tests_on_domain d
= [ Bulma.strong $ d <> " -> [ldhstr, label, subdomain, domain]"
, p $ d <> " : ldhstr : " <> (show $ runParser d ldhstr)
, p $ d <> " : label : " <> (show $ runParser d label)
-- , p $ show $ runParser d subdomain
--, p $ show $ runParser d domain
, p $ d <> " : ldhstr : " <> (show $ runParser d ldhstr)
, p $ d <> " : label : " <> (show $ runParser d label)
, p $ d <> " : subdomain : " <> (show $ runParser d subdomain)
, p $ d <> " : domain : " <> (show $ runParser d domain)
]
aye :: Parser String Char
@ -123,37 +132,44 @@ parse_stuff = do
-- From RFC 1035: <domain> ::= <subdomain> | " "
domain :: Parser String String
domain = sub_eof <|> string " "
domain = PC.try (string " ") <|> sub_eof
sub_eof :: Parser String String
sub_eof = do
sub <- PC.try subdomain
sub <- subdomain
-- TODO: optional "." at the end?
eof
pure sub
-- From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
subdomain :: Parser String String
subdomain = PC.try label <|> defer \_ -> sub_point_label
subdomain = defer \_ -> sub_point_label <|> label
sub_point_label :: Parser String String
sub_point_label = do
sub <- defer \_ -> subdomain
point <- string "."
lab <- label
pure $ sub <> point <> lab
point <- string "."
sub <- defer \_ -> subdomain
let result = lab <> point <> sub
if S.length result > 255
then fail $ "domain length is > 255 bytes (" <> show (S.length result) <> ")"
else pure result
-- From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
label :: Parser String String
label = PC.try let_then_str_then_alpha <|> char_to_string letter
label = let_then_str_then_alpha
where
let_then_str_then_alpha :: Parser String String
let_then_str_then_alpha = do
l <- letter
s <- many1 let_dig_hyp
let last_char = CU.singleton (NonEmpty.last s)
case runParser last_char alphaNum of
Left _ -> fail $ "Label is wrong: last char is '" <> last_char <> "' which isn't an alphanum"
Right _ -> pure $ CU.singleton l <> CU.fromCharArray (foldl (\acc x -> snoc acc x) [] s)
s <- many let_dig_hyp
case last s of
Nothing -> pure $ CU.singleton l
Just last_char -> case runParser (CU.singleton last_char) alphaNum of
Left _ -> fail $ "Label is wrong: last char is '" <> (CU.singleton last_char) <> "' which isn't an alphanum"
Right _ -> if length s > 62 -- Remember: we already did read a letter (l).
then fail $ "Label is larger than expected (max 63 characters, current: " <> show (1 + length s) <> ")"
else pure $ CU.singleton l <> CU.fromCharArray (foldl (\acc x -> snoc acc x) [] s)
-- From RFC 1035: <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
ldhstr :: Parser String String
@ -175,7 +191,7 @@ let_dig_hyp = let_dig <|> char '-'
-- From RFC 1035: <let-dig> ::= <letter> | <digit>
let_dig :: Parser String Char
let_dig = alphaNum
let_dig = alphaNum
-- | Converting a single letter parser to a String parser.
char_to_string :: Parser String Char -> Parser String String