subdomain kinda works

This commit is contained in:
Philippe Pittoli 2024-01-13 01:22:15 +01:00
parent ef6f49f624
commit d327b943f9
2 changed files with 76 additions and 58 deletions

View File

@ -4,11 +4,12 @@ module DomainParser where
import Prelude
--import Prelude (bind, discard, pure, show, ($), (<>), (>))
import Control.Lazy (defer)
import Data.Maybe (Maybe(..))
import Data.Array as A
import Data.String as S
import Data.Tuple (Tuple(..))
import Data.Array.NonEmpty as NonEmpty
-- import Data.Array.NonEmpty as NonEmpty
import Data.String.CodeUnits as CU
import Control.Alt ((<|>))
import Control.Plus (empty)
@ -42,45 +43,54 @@ parse_last_char s p = case last_char s of
Nothing -> false
_ -> true
try :: Parser String -> Parser String
-- | FIXME: This is flawed.
-- | We cannot know if it worked: in case there is a problem with the parser `p`,
-- | the code will "continue to work" but without what's been parsed.
-- | This may sound reasonable but it prevents knowing if a problem actually occured!
-- | We cannot do a `try parser <|> alternative` since it will always work!
try :: forall a. Monoid a => Parser a -> Parser a
try p = Parser p'
where p' str = case parse p str of
Nothing -> Just (Tuple "" str) -- FIXME: This is flawed: we cannot know if it worked!
Nothing -> Just (Tuple mempty str) -- FIXME! Need a better base structure.
Just x -> pure x
-- From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
label :: Parser String
label = let_then_str_then_alpha <|> (CU.singleton <$> letter)
where
let_then_str_then_alpha :: Parser String
let_then_str_then_alpha = do
labelstr <- try do
l <- letter
s <- ldh_str
pure $ CU.singleton l <> CU.fromCharArray s
if (S.length labelstr > label_maxsize || not (parse_last_char labelstr let_dig))
then empty
else pure labelstr
label = do
l <- letter
s <- try ldh_str
let labelstr = CU.singleton l <> CU.fromCharArray s
if (S.length labelstr > label_maxsize)
then empty
else if (S.length labelstr > 1 && not (parse_last_char labelstr let_dig))
then empty
else pure labelstr
-- From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
subdomain :: Parser String
subdomain = do
-- First: read a label. This is bare minimum for a subdomain.
lab <- label
upperlabels <- try do
_ <- char '.'
sub <- defer \_ -> subdomain
pure sub
if (S.length upperlabels == 0) -- This is related to the problem of not having a proper base structure.
then pure lab
else pure $ lab <> "." <> upperlabels
-- Tuple whole_label last_char <- PC.try do
-- pure $ Tuple (CU.singleton l <> (CU.fromCharArray $ NonEmpty.toArray s)) (NonEmpty.last s)
-- case runParser (CU.singleton last_char) let_dig of
-- Left _ -> fail $ "Label is wrong: last char is '" <> (CU.singleton last_char) <> "' which isn't an alphanum"
-- Right _ -> if S.length whole_label > 63 -- Remember: we already did read a letter (l).
-- then Nothing
-- else pure whole_label
-- let_then_str_then_alpha = do
-- Tuple whole_label last_char <- PC.try do
-- l <- letter
-- s <- ldhstr
-- pure $ Tuple (CU.singleton l <> (CU.fromCharArray $ NonEmpty.toArray s)) (NonEmpty.last s)
-- case runParser (CU.singleton last_char) let_dig of
-- Left _ -> fail $ "Label is wrong: last char is '" <> (CU.singleton last_char) <> "' which isn't an alphanum"
-- Right _ -> if S.length whole_label > 63 -- Remember: we already did read a letter (l).
-- then fail $ "too big"
-- else pure whole_label
---- -- Second: the rest is optional.
---- r <- PC.optionMaybe (PC.try point_sub)
---- case r of
---- Nothing -> pure lab
---- Just sub -> pure $ lab <> sub
----
---- where
---- point_sub :: Parser String
---- point_sub = do
---- point <- char '.'
---- sub <- defer \_ -> subdomain
---- pure $ CU.singleton point <> sub
{-
import Control.Lazy (defer)
@ -121,24 +131,6 @@ sub_eof = do
did_we_parsed_the_final_point Nothing sub = sub
did_we_parsed_the_final_point (Just _) sub = sub <> "."
-- From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
subdomain :: Parser String String
subdomain = do
-- First: read a label. This is bare minimum for a subdomain.
lab <- label
-- Second: the rest is optional.
r <- PC.optionMaybe (PC.try point_sub)
case r of
Nothing -> pure lab
Just sub -> pure $ lab <> sub
where
point_sub :: Parser String String
point_sub = do
point <- string "."
sub <- defer \_ -> subdomain
pure $ point <> sub
-- | Converting a single letter parser to a String parser.
char_to_string :: Parser String Char -> Parser String String
char_to_string p = do

View File

@ -59,15 +59,41 @@ main = do
log ""
logtest "label" label "example.org" id
logtest "label" label "1notgreat.x" id
logtest "label" label ".shouldntwork" id
logtest "label" label "a-b.great" id
logtest "label" label "-b.shouldfail" id
logtest "label" label "b-.shouldfail" id
logtest "label" label "toolg.org" id
logtest "label" label "too-long.org" id
logtest "label" label "too-long-shouldfail.org" id
logtest "label" label "" id
logtest "label" label "a.x" id
logtest "label" label "a2.org" id
logtest "label" label "a33.org" id
logtest "label" label "a444.org" id
logtest "label" label "a5555.org" id
logtest "label" label "a66666.org" id
logtest "label" label "a777777.org" id
logtest "label" label "a8888888.org" id
log ""
logtest "label" label "-" id
logtest "label" label "a-" id
log ""
logtest "subdomain" subdomain "example.org" id
logtest "subdomain" subdomain "" id
logtest "subdomain" subdomain "a.x" id
logtest "subdomain" subdomain "a2.org" id
logtest "subdomain" subdomain "a33.org" id
logtest "subdomain" subdomain "a444.org" id
logtest "subdomain" subdomain "a5555.org" id
logtest "subdomain" subdomain "a66666.org" id
logtest "subdomain" subdomain "a777777.org" id
logtest "subdomain" subdomain "a8888888.org" id
logtest "subdomain" subdomain "xblah.a.x" id
logtest "subdomain" subdomain "xblah.a2.org" id
logtest "subdomain" subdomain "xblah.a33.org" id
logtest "subdomain" subdomain "xblah.a444.org" id
logtest "subdomain" subdomain "xblah.a5555.org" id
logtest "subdomain" subdomain "xblah.a66666.org" id
logtest "subdomain" subdomain "xblah.a777777.org" id
logtest "subdomain" subdomain "xblah.a8888888.org" id
logtest "subdomain" subdomain "-" id
logtest "subdomain" subdomain "a-" id
-- log $ "parsing the first 'f' in 'fiction' (sat): " <> case parse (sat (\x -> x == 'f')) "fiction" of
-- Just (Tuple x y) -> show x <> " " <> show y