subdomain kinda works

master
Philippe Pittoli 2024-01-13 01:22:15 +01:00
parent ef6f49f624
commit d327b943f9
2 changed files with 76 additions and 58 deletions

View File

@ -4,11 +4,12 @@ module DomainParser where
import Prelude import Prelude
--import Prelude (bind, discard, pure, show, ($), (<>), (>)) --import Prelude (bind, discard, pure, show, ($), (<>), (>))
import Control.Lazy (defer)
import Data.Maybe (Maybe(..)) import Data.Maybe (Maybe(..))
import Data.Array as A import Data.Array as A
import Data.String as S import Data.String as S
import Data.Tuple (Tuple(..)) import Data.Tuple (Tuple(..))
import Data.Array.NonEmpty as NonEmpty -- import Data.Array.NonEmpty as NonEmpty
import Data.String.CodeUnits as CU import Data.String.CodeUnits as CU
import Control.Alt ((<|>)) import Control.Alt ((<|>))
import Control.Plus (empty) import Control.Plus (empty)
@ -42,45 +43,54 @@ parse_last_char s p = case last_char s of
Nothing -> false Nothing -> false
_ -> true _ -> true
try :: Parser String -> Parser String -- | FIXME: This is flawed.
-- | We cannot know if it worked: in case there is a problem with the parser `p`,
-- | the code will "continue to work" but without what's been parsed.
-- | This may sound reasonable but it prevents knowing if a problem actually occured!
-- | We cannot do a `try parser <|> alternative` since it will always work!
try :: forall a. Monoid a => Parser a -> Parser a
try p = Parser p' try p = Parser p'
where p' str = case parse p str of where p' str = case parse p str of
Nothing -> Just (Tuple "" str) -- FIXME: This is flawed: we cannot know if it worked! Nothing -> Just (Tuple mempty str) -- FIXME! Need a better base structure.
Just x -> pure x Just x -> pure x
-- From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ] -- From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
label :: Parser String label :: Parser String
label = let_then_str_then_alpha <|> (CU.singleton <$> letter) label = do
where
let_then_str_then_alpha :: Parser String
let_then_str_then_alpha = do
labelstr <- try do
l <- letter l <- letter
s <- ldh_str s <- try ldh_str
pure $ CU.singleton l <> CU.fromCharArray s let labelstr = CU.singleton l <> CU.fromCharArray s
if (S.length labelstr > label_maxsize || not (parse_last_char labelstr let_dig)) if (S.length labelstr > label_maxsize)
then empty
else if (S.length labelstr > 1 && not (parse_last_char labelstr let_dig))
then empty then empty
else pure labelstr else pure labelstr
-- From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
subdomain :: Parser String
subdomain = do
-- First: read a label. This is bare minimum for a subdomain.
lab <- label
upperlabels <- try do
_ <- char '.'
sub <- defer \_ -> subdomain
pure sub
if (S.length upperlabels == 0) -- This is related to the problem of not having a proper base structure.
then pure lab
else pure $ lab <> "." <> upperlabels
-- Tuple whole_label last_char <- PC.try do ---- -- Second: the rest is optional.
-- pure $ Tuple (CU.singleton l <> (CU.fromCharArray $ NonEmpty.toArray s)) (NonEmpty.last s) ---- r <- PC.optionMaybe (PC.try point_sub)
-- case runParser (CU.singleton last_char) let_dig of ---- case r of
-- Left _ -> fail $ "Label is wrong: last char is '" <> (CU.singleton last_char) <> "' which isn't an alphanum" ---- Nothing -> pure lab
-- Right _ -> if S.length whole_label > 63 -- Remember: we already did read a letter (l). ---- Just sub -> pure $ lab <> sub
-- then Nothing ----
-- else pure whole_label ---- where
---- point_sub :: Parser String
-- let_then_str_then_alpha = do ---- point_sub = do
-- Tuple whole_label last_char <- PC.try do ---- point <- char '.'
-- l <- letter ---- sub <- defer \_ -> subdomain
-- s <- ldhstr ---- pure $ CU.singleton point <> sub
-- pure $ Tuple (CU.singleton l <> (CU.fromCharArray $ NonEmpty.toArray s)) (NonEmpty.last s)
-- case runParser (CU.singleton last_char) let_dig of
-- Left _ -> fail $ "Label is wrong: last char is '" <> (CU.singleton last_char) <> "' which isn't an alphanum"
-- Right _ -> if S.length whole_label > 63 -- Remember: we already did read a letter (l).
-- then fail $ "too big"
-- else pure whole_label
{- {-
import Control.Lazy (defer) import Control.Lazy (defer)
@ -121,24 +131,6 @@ sub_eof = do
did_we_parsed_the_final_point Nothing sub = sub did_we_parsed_the_final_point Nothing sub = sub
did_we_parsed_the_final_point (Just _) sub = sub <> "." did_we_parsed_the_final_point (Just _) sub = sub <> "."
-- From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
subdomain :: Parser String String
subdomain = do
-- First: read a label. This is bare minimum for a subdomain.
lab <- label
-- Second: the rest is optional.
r <- PC.optionMaybe (PC.try point_sub)
case r of
Nothing -> pure lab
Just sub -> pure $ lab <> sub
where
point_sub :: Parser String String
point_sub = do
point <- string "."
sub <- defer \_ -> subdomain
pure $ point <> sub
-- | Converting a single letter parser to a String parser. -- | Converting a single letter parser to a String parser.
char_to_string :: Parser String Char -> Parser String String char_to_string :: Parser String Char -> Parser String String
char_to_string p = do char_to_string p = do

View File

@ -59,15 +59,41 @@ main = do
log "" log ""
logtest "label" label "example.org" id logtest "label" label "example.org" id
logtest "label" label "1notgreat.x" id
logtest "label" label ".shouldntwork" id
logtest "label" label "a-b.great" id
logtest "label" label "-b.shouldfail" id
logtest "label" label "b-.shouldfail" id
logtest "label" label "toolg.org" id
logtest "label" label "too-long.org" id
logtest "label" label "too-long-shouldfail.org" id
logtest "label" label "" id logtest "label" label "" id
logtest "label" label "a.x" id
logtest "label" label "a2.org" id
logtest "label" label "a33.org" id
logtest "label" label "a444.org" id
logtest "label" label "a5555.org" id
logtest "label" label "a66666.org" id
logtest "label" label "a777777.org" id
logtest "label" label "a8888888.org" id
log ""
logtest "label" label "-" id
logtest "label" label "a-" id
log ""
logtest "subdomain" subdomain "example.org" id
logtest "subdomain" subdomain "" id
logtest "subdomain" subdomain "a.x" id
logtest "subdomain" subdomain "a2.org" id
logtest "subdomain" subdomain "a33.org" id
logtest "subdomain" subdomain "a444.org" id
logtest "subdomain" subdomain "a5555.org" id
logtest "subdomain" subdomain "a66666.org" id
logtest "subdomain" subdomain "a777777.org" id
logtest "subdomain" subdomain "a8888888.org" id
logtest "subdomain" subdomain "xblah.a.x" id
logtest "subdomain" subdomain "xblah.a2.org" id
logtest "subdomain" subdomain "xblah.a33.org" id
logtest "subdomain" subdomain "xblah.a444.org" id
logtest "subdomain" subdomain "xblah.a5555.org" id
logtest "subdomain" subdomain "xblah.a66666.org" id
logtest "subdomain" subdomain "xblah.a777777.org" id
logtest "subdomain" subdomain "xblah.a8888888.org" id
logtest "subdomain" subdomain "-" id
logtest "subdomain" subdomain "a-" id
-- log $ "parsing the first 'f' in 'fiction' (sat): " <> case parse (sat (\x -> x == 'f')) "fiction" of -- log $ "parsing the first 'f' in 'fiction' (sat): " <> case parse (sat (\x -> x == 'f')) "fiction" of
-- Just (Tuple x y) -> show x <> " " <> show y -- Just (Tuple x y) -> show x <> " " <> show y