Domain parsing is now almost complete.

beta
Philippe Pittoli 2023-07-23 14:06:09 +02:00
parent f60b1a7568
commit 3314add2fb
1 changed files with 38 additions and 22 deletions

View File

@ -5,13 +5,13 @@ import Prelude
import Control.Alt ((<|>)) import Control.Alt ((<|>))
import Control.Lazy (defer) import Control.Lazy (defer)
import Data.Array (many, snoc, last) import Data.Array (many, length, snoc, last)
import Data.Either (Either(..)) import Data.Either (Either(..))
import Data.Foldable (fold, foldl) import Data.Foldable (fold, foldl)
import Data.Maybe (Maybe(..)) import Data.Maybe (Maybe(..))
import Data.Array.NonEmpty as NonEmpty import Data.Array.NonEmpty as NonEmpty
import Data.String.CodeUnits as CU import Data.String.CodeUnits as CU
-- import Data.String (joinWith) import Data.String as S -- length
-- import Data.String.Regex as R -- import Data.String.Regex as R
-- import Data.String.Regex.Flags as RF -- import Data.String.Regex.Flags as RF
import Data.Tuple (Tuple(..)) import Data.Tuple (Tuple(..))
@ -65,9 +65,18 @@ render state
[ Bulma.hero_danger "A simple input" "Nothing much to see" [ Bulma.hero_danger "A simple input" "Nothing much to see"
, Bulma.section_small $ , Bulma.section_small $
[ h1 "Examples of domain parsing in Purescript" [ h1 "Examples of domain parsing in Purescript"
] <> test_domains [ "ex.net", "e-x.net", "e-.net", "-x.net", "truc-blah.example.com", "te.s-t.net", "example.com" ] ] <> test_domains [ "ex.net"
, "e-x.net"
, "way-too-long--way-too-long--way-too-long--way-too-long--way-too-long.net"
, "way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.net"
, "e-.net"
, "-x.net"
, "truc-blah.example.com"
, "te.s-t.net"
, "example.com"
]
, Bulma.section_small [ render_stuff ] , Bulma.section_small [ render_stuff ]
] ]
where where
-- Some helpers. -- Some helpers.
title = Bulma.h3 title = Bulma.h3
@ -94,14 +103,14 @@ render state
-- test_domains :: Array String -> _ -- test_domains :: Array String -> _
test_domains doms = fold $ map tests_on_domain doms test_domains doms = fold $ map tests_on_domain doms
-- tests_on_domain :: String -> _ -- tests_on_domain :: String -> _
tests_on_domain d tests_on_domain d
= [ Bulma.strong $ d <> " -> [ldhstr, label, subdomain, domain]" = [ Bulma.strong $ d <> " -> [ldhstr, label, subdomain, domain]"
, p $ d <> " : ldhstr : " <> (show $ runParser d ldhstr) , p $ d <> " : ldhstr : " <> (show $ runParser d ldhstr)
, p $ d <> " : label : " <> (show $ runParser d label) , p $ d <> " : label : " <> (show $ runParser d label)
-- , p $ show $ runParser d subdomain , p $ d <> " : subdomain : " <> (show $ runParser d subdomain)
--, p $ show $ runParser d domain , p $ d <> " : domain : " <> (show $ runParser d domain)
] ]
aye :: Parser String Char aye :: Parser String Char
@ -123,37 +132,44 @@ parse_stuff = do
-- From RFC 1035: <domain> ::= <subdomain> | " " -- From RFC 1035: <domain> ::= <subdomain> | " "
domain :: Parser String String domain :: Parser String String
domain = sub_eof <|> string " " domain = PC.try (string " ") <|> sub_eof
sub_eof :: Parser String String sub_eof :: Parser String String
sub_eof = do sub_eof = do
sub <- PC.try subdomain sub <- subdomain
-- TODO: optional "." at the end?
eof eof
pure sub pure sub
-- From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label> -- From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
subdomain :: Parser String String subdomain :: Parser String String
subdomain = PC.try label <|> defer \_ -> sub_point_label subdomain = defer \_ -> sub_point_label <|> label
sub_point_label :: Parser String String sub_point_label :: Parser String String
sub_point_label = do sub_point_label = do
sub <- defer \_ -> subdomain
point <- string "."
lab <- label lab <- label
pure $ sub <> point <> lab point <- string "."
sub <- defer \_ -> subdomain
let result = lab <> point <> sub
if S.length result > 255
then fail $ "domain length is > 255 bytes (" <> show (S.length result) <> ")"
else pure result
-- From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ] -- From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
label :: Parser String String label :: Parser String String
label = PC.try let_then_str_then_alpha <|> char_to_string letter label = let_then_str_then_alpha
where where
let_then_str_then_alpha :: Parser String String let_then_str_then_alpha :: Parser String String
let_then_str_then_alpha = do let_then_str_then_alpha = do
l <- letter l <- letter
s <- many1 let_dig_hyp s <- many let_dig_hyp
let last_char = CU.singleton (NonEmpty.last s) case last s of
case runParser last_char alphaNum of Nothing -> pure $ CU.singleton l
Left _ -> fail $ "Label is wrong: last char is '" <> last_char <> "' which isn't an alphanum" Just last_char -> case runParser (CU.singleton last_char) alphaNum of
Right _ -> pure $ CU.singleton l <> CU.fromCharArray (foldl (\acc x -> snoc acc x) [] s) Left _ -> fail $ "Label is wrong: last char is '" <> (CU.singleton last_char) <> "' which isn't an alphanum"
Right _ -> if length s > 62 -- Remember: we already did read a letter (l).
then fail $ "Label is larger than expected (max 63 characters, current: " <> show (1 + length s) <> ")"
else pure $ CU.singleton l <> CU.fromCharArray (foldl (\acc x -> snoc acc x) [] s)
-- From RFC 1035: <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str> -- From RFC 1035: <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
ldhstr :: Parser String String ldhstr :: Parser String String
@ -175,7 +191,7 @@ let_dig_hyp = let_dig <|> char '-'
-- From RFC 1035: <let-dig> ::= <letter> | <digit> -- From RFC 1035: <let-dig> ::= <letter> | <digit>
let_dig :: Parser String Char let_dig :: Parser String Char
let_dig = alphaNum let_dig = alphaNum
-- | Converting a single letter parser to a String parser. -- | Converting a single letter parser to a String parser.
char_to_string :: Parser String Char -> Parser String String char_to_string :: Parser String Char -> Parser String String