Playing with parsers: reading domain labels is okay-ish.

beta
Philippe Pittoli 2023-07-23 03:29:26 +02:00
parent ff46b7937a
commit f60b1a7568
1 changed files with 44 additions and 49 deletions

View File

@ -3,18 +3,18 @@ module App.HomeInterface where
import Prelude
import Control.Alt ((<|>))
import Control.Alt ((<|>))
import Control.Lazy (defer)
import Data.Array (many)
import Data.Array (many, snoc, last)
import Data.Either (Either(..))
import Data.Foldable (fold)
import Data.Foldable (fold, foldl)
import Data.Maybe (Maybe(..))
import Data.Array.NonEmpty as NonEmpty
import Data.String.CodeUnits as CU
-- import Data.String (joinWith)
-- import Data.String.Regex as R
-- import Data.String.Regex.Flags as RF
-- import Data.Tuple (Tuple(..))
import Data.Tuple (Tuple(..))
import Effect.Aff.Class (class MonadAff)
import Effect (Effect)
import Halogen as H
@ -23,6 +23,7 @@ import Halogen.HTML as HH
import Halogen.HTML.Properties as HP
import Parsing
import Parsing.Combinators as PC
import Parsing.Combinators.Array (many1)
import Parsing (Parser, runParser)
import Parsing.String
import Parsing.String.Basic
@ -64,7 +65,7 @@ render state
[ Bulma.hero_danger "A simple input" "Nothing much to see"
, Bulma.section_small $
[ h1 "Examples of domain parsing in Purescript"
] <> test_domains [ "ex.net", "e-.net", "-x.net", "te.s-t.net", "example.com" ]
] <> test_domains [ "ex.net", "e-x.net", "e-.net", "-x.net", "truc-blah.example.com", "te.s-t.net", "example.com" ]
, Bulma.section_small [ render_stuff ]
]
where
@ -96,11 +97,11 @@ render state
-- tests_on_domain :: String -> _
tests_on_domain d
= [ text $ "domain: " <> d <> " [ldhstr, label, subdomain, domain]"
, p $ show $ runParser d ldhstr
, p $ show $ runParser d label
, p $ show $ runParser d subdomain
, p $ show $ runParser d domain
= [ Bulma.strong $ d <> " -> [ldhstr, label, subdomain, domain]"
, p $ d <> " : ldhstr : " <> (show $ runParser d ldhstr)
, p $ d <> " : label : " <> (show $ runParser d label)
-- , p $ show $ runParser d subdomain
--, p $ show $ runParser d domain
]
aye :: Parser String Char
@ -114,12 +115,13 @@ ayebee = do
parse_stuff :: Parser String Boolean
parse_stuff = do
void $ alphaNum
void $ (void $ many (alphaNum <|> char '-' <|> char '.')) <|> eof
void $ label
void $ eof
pure true
-- From RFC 1035: <domain> ::= <subdomain> | " "
domain :: Parser String String
domain = sub_eof <|> string " "
@ -129,6 +131,7 @@ sub_eof = do
eof
pure sub
-- From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
subdomain :: Parser String String
subdomain = PC.try label <|> defer \_ -> sub_point_label
@ -139,58 +142,50 @@ sub_point_label = do
lab <- label
pure $ sub <> point <> lab
-- From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
label :: Parser String String
label = PC.try let_then_str_then_alpha <|> PC.try let_then_alpha <|> let_to_string letter
label = PC.try let_then_str_then_alpha <|> char_to_string letter
where
let_then_str_then_alpha :: Parser String String
let_then_str_then_alpha = do
l <- letter
s <- ldhstr
a <- alphaNum
pure $ CU.singleton l <> s <> CU.singleton a
let_then_alpha :: Parser String String
let_then_alpha = do
l <- letter
a <- alphaNum
pure $ CU.singleton l <> CU.singleton a
s <- many1 let_dig_hyp
let last_char = CU.singleton (NonEmpty.last s)
case runParser last_char alphaNum of
Left _ -> fail $ "Label is wrong: last char is '" <> last_char <> "' which isn't an alphanum"
Right _ -> pure $ CU.singleton l <> CU.fromCharArray (foldl (\acc x -> snoc acc x) [] s)
-- From RFC 1035: <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
ldhstr :: Parser String String
ldhstr = PC.try actual_ldhstr <|> just_ldh
ldhstr = PC.try ldh_then_str <|> just_ldh
where
actual_ldhstr :: Parser String String
actual_ldhstr = do
just_ldh :: Parser String String
just_ldh = char_to_string let_dig_hyp
ldh_then_str :: Parser String String
ldh_then_str = do
ldh <- let_dig_hyp
str <- defer \_ -> ldhstr
pure $ CU.singleton ldh <> str
just_ldh :: Parser String String
just_ldh = let_to_string let_dig_hyp
-- From RFC 1035: <let-dig-hyp> ::= <let-dig> | "-"
-- Either a Letter, Digital or an Hyphenation character.
let_dig_hyp :: Parser String Char
let_dig_hyp = let_dig <|> char '-'
-- From RFC 1035: <let-dig> ::= <letter> | <digit>
let_dig :: Parser String Char
let_dig = alphaNum
-- | Converting a single letter parser to a String parser.
char_to_string :: Parser String Char -> Parser String String
char_to_string p = do
a <- p
pure $ CU.singleton a
-- Not used currently.
hyp_then_string :: Parser String String
hyp_then_string = do
a <- let_to_string let_dig_hyp
a <- char_to_string let_dig_hyp
b <- ldhstr
pure $ a <> b
-- Either a Letter, Digital or an Hyphenation character.
let_dig_hyp :: Parser String Char
let_dig_hyp = alphaNum <|> char '-'
-- | Converting a single letter parser to a String parser.
let_to_string :: Parser String Char -> Parser String String
let_to_string p = do
a <- p
pure $ CU.singleton a
-- From RFC 1035
-- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
-- <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
-- <let-dig-hyp> ::= <let-dig> | "-"
-- <let-dig> ::= <letter> | <digit>
tested_domain :: String
tested_domain = "example.com"