Domain parsing is now almost complete.
parent
f60b1a7568
commit
3314add2fb
|
@ -5,13 +5,13 @@ import Prelude
|
||||||
|
|
||||||
import Control.Alt ((<|>))
|
import Control.Alt ((<|>))
|
||||||
import Control.Lazy (defer)
|
import Control.Lazy (defer)
|
||||||
import Data.Array (many, snoc, last)
|
import Data.Array (many, length, snoc, last)
|
||||||
import Data.Either (Either(..))
|
import Data.Either (Either(..))
|
||||||
import Data.Foldable (fold, foldl)
|
import Data.Foldable (fold, foldl)
|
||||||
import Data.Maybe (Maybe(..))
|
import Data.Maybe (Maybe(..))
|
||||||
import Data.Array.NonEmpty as NonEmpty
|
import Data.Array.NonEmpty as NonEmpty
|
||||||
import Data.String.CodeUnits as CU
|
import Data.String.CodeUnits as CU
|
||||||
-- import Data.String (joinWith)
|
import Data.String as S -- length
|
||||||
-- import Data.String.Regex as R
|
-- import Data.String.Regex as R
|
||||||
-- import Data.String.Regex.Flags as RF
|
-- import Data.String.Regex.Flags as RF
|
||||||
import Data.Tuple (Tuple(..))
|
import Data.Tuple (Tuple(..))
|
||||||
|
@ -65,7 +65,16 @@ render state
|
||||||
[ Bulma.hero_danger "A simple input" "Nothing much to see"
|
[ Bulma.hero_danger "A simple input" "Nothing much to see"
|
||||||
, Bulma.section_small $
|
, Bulma.section_small $
|
||||||
[ h1 "Examples of domain parsing in Purescript"
|
[ h1 "Examples of domain parsing in Purescript"
|
||||||
] <> test_domains [ "ex.net", "e-x.net", "e-.net", "-x.net", "truc-blah.example.com", "te.s-t.net", "example.com" ]
|
] <> test_domains [ "ex.net"
|
||||||
|
, "e-x.net"
|
||||||
|
, "way-too-long--way-too-long--way-too-long--way-too-long--way-too-long.net"
|
||||||
|
, "way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.way-too-long--way-too-long--way-too-long--way-too-long.net"
|
||||||
|
, "e-.net"
|
||||||
|
, "-x.net"
|
||||||
|
, "truc-blah.example.com"
|
||||||
|
, "te.s-t.net"
|
||||||
|
, "example.com"
|
||||||
|
]
|
||||||
, Bulma.section_small [ render_stuff ]
|
, Bulma.section_small [ render_stuff ]
|
||||||
]
|
]
|
||||||
where
|
where
|
||||||
|
@ -100,8 +109,8 @@ render state
|
||||||
= [ Bulma.strong $ d <> " -> [ldhstr, label, subdomain, domain]"
|
= [ Bulma.strong $ d <> " -> [ldhstr, label, subdomain, domain]"
|
||||||
, p $ d <> " : ldhstr : " <> (show $ runParser d ldhstr)
|
, p $ d <> " : ldhstr : " <> (show $ runParser d ldhstr)
|
||||||
, p $ d <> " : label : " <> (show $ runParser d label)
|
, p $ d <> " : label : " <> (show $ runParser d label)
|
||||||
-- , p $ show $ runParser d subdomain
|
, p $ d <> " : subdomain : " <> (show $ runParser d subdomain)
|
||||||
--, p $ show $ runParser d domain
|
, p $ d <> " : domain : " <> (show $ runParser d domain)
|
||||||
]
|
]
|
||||||
|
|
||||||
aye :: Parser String Char
|
aye :: Parser String Char
|
||||||
|
@ -123,37 +132,44 @@ parse_stuff = do
|
||||||
|
|
||||||
-- From RFC 1035: <domain> ::= <subdomain> | " "
|
-- From RFC 1035: <domain> ::= <subdomain> | " "
|
||||||
domain :: Parser String String
|
domain :: Parser String String
|
||||||
domain = sub_eof <|> string " "
|
domain = PC.try (string " ") <|> sub_eof
|
||||||
|
|
||||||
sub_eof :: Parser String String
|
sub_eof :: Parser String String
|
||||||
sub_eof = do
|
sub_eof = do
|
||||||
sub <- PC.try subdomain
|
sub <- subdomain
|
||||||
|
-- TODO: optional "." at the end?
|
||||||
eof
|
eof
|
||||||
pure sub
|
pure sub
|
||||||
|
|
||||||
-- From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
|
-- From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
|
||||||
subdomain :: Parser String String
|
subdomain :: Parser String String
|
||||||
subdomain = PC.try label <|> defer \_ -> sub_point_label
|
subdomain = defer \_ -> sub_point_label <|> label
|
||||||
|
|
||||||
sub_point_label :: Parser String String
|
sub_point_label :: Parser String String
|
||||||
sub_point_label = do
|
sub_point_label = do
|
||||||
sub <- defer \_ -> subdomain
|
|
||||||
point <- string "."
|
|
||||||
lab <- label
|
lab <- label
|
||||||
pure $ sub <> point <> lab
|
point <- string "."
|
||||||
|
sub <- defer \_ -> subdomain
|
||||||
|
let result = lab <> point <> sub
|
||||||
|
if S.length result > 255
|
||||||
|
then fail $ "domain length is > 255 bytes (" <> show (S.length result) <> ")"
|
||||||
|
else pure result
|
||||||
|
|
||||||
-- From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
|
-- From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
|
||||||
label :: Parser String String
|
label :: Parser String String
|
||||||
label = PC.try let_then_str_then_alpha <|> char_to_string letter
|
label = let_then_str_then_alpha
|
||||||
where
|
where
|
||||||
let_then_str_then_alpha :: Parser String String
|
let_then_str_then_alpha :: Parser String String
|
||||||
let_then_str_then_alpha = do
|
let_then_str_then_alpha = do
|
||||||
l <- letter
|
l <- letter
|
||||||
s <- many1 let_dig_hyp
|
s <- many let_dig_hyp
|
||||||
let last_char = CU.singleton (NonEmpty.last s)
|
case last s of
|
||||||
case runParser last_char alphaNum of
|
Nothing -> pure $ CU.singleton l
|
||||||
Left _ -> fail $ "Label is wrong: last char is '" <> last_char <> "' which isn't an alphanum"
|
Just last_char -> case runParser (CU.singleton last_char) alphaNum of
|
||||||
Right _ -> pure $ CU.singleton l <> CU.fromCharArray (foldl (\acc x -> snoc acc x) [] s)
|
Left _ -> fail $ "Label is wrong: last char is '" <> (CU.singleton last_char) <> "' which isn't an alphanum"
|
||||||
|
Right _ -> if length s > 62 -- Remember: we already did read a letter (l).
|
||||||
|
then fail $ "Label is larger than expected (max 63 characters, current: " <> show (1 + length s) <> ")"
|
||||||
|
else pure $ CU.singleton l <> CU.fromCharArray (foldl (\acc x -> snoc acc x) [] s)
|
||||||
|
|
||||||
-- From RFC 1035: <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
|
-- From RFC 1035: <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
|
||||||
ldhstr :: Parser String String
|
ldhstr :: Parser String String
|
||||||
|
|
Loading…
Reference in New Issue