Split code, implement more of DomainParser.

This commit is contained in:
Philippe Pittoli 2024-01-12 05:14:55 +01:00
parent 060467bcc4
commit ef6f49f624
4 changed files with 231 additions and 44 deletions

28
src/BaseFunctions.purs Normal file
View File

@ -0,0 +1,28 @@
module BaseFunctions where
import Prelude (between, (<>), (==))
import Data.Array as A
import Data.String.CodeUnits (singleton)
concat :: Char -> String -> String
concat c rest = singleton c <> rest
isDigit :: Char -> Boolean
isDigit = between '0' '9'
isLower :: Char -> Boolean
isLower = between 'a' 'z'
isUpper :: Char -> Boolean
isUpper = between 'A' 'Z'
isAlpha :: Char -> Boolean
isAlpha c = A.any (\f -> f c) [isLower, isUpper]
isAlphaNum :: Char -> Boolean
isAlphaNum c = A.any (\f -> f c) [isAlpha, isDigit]
isSpace :: Char -> Boolean
isSpace c = A.any (\v -> v == c) [' ', '\t', ' ', '\r', '\n']

147
src/DomainParser.purs Normal file
View File

@ -0,0 +1,147 @@
-- | `DomainParser` is a simple parser for domain names as described in RFC 1035.
module DomainParser where
import Prelude
--import Prelude (bind, discard, pure, show, ($), (<>), (>))
import Data.Maybe (Maybe(..))
import Data.Array as A
import Data.String as S
import Data.Tuple (Tuple(..))
import Data.Array.NonEmpty as NonEmpty
import Data.String.CodeUnits as CU
import Control.Alt ((<|>))
import Control.Plus (empty)
import Parser
-- From RFC 1035: <let-dig> ::= <letter> | <digit>
let_dig :: Parser Char
let_dig = alphanum
-- From RFC 1035: <let-dig-hyp> ::= <let-dig> | "-"
-- Either a Letter, Digital or an Hyphenation character.
let_dig_hyp :: Parser Char
let_dig_hyp = let_dig <|> char '-'
-- From RFC 1035: <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
ldh_str :: Parser (Array Char)
ldh_str = many1 let_dig_hyp
-- TODO: 63
label_maxsize :: Int
label_maxsize = 7
last_char :: String -> Maybe Char
last_char = A.last <<< CU.toCharArray
parse_last_char :: String -> Parser Char -> Boolean
parse_last_char s p = case last_char s of
Nothing -> false
Just c -> case parse p (CU.singleton c) of
Nothing -> false
_ -> true
try :: Parser String -> Parser String
try p = Parser p'
where p' str = case parse p str of
Nothing -> Just (Tuple "" str) -- FIXME: This is flawed: we cannot know if it worked!
Just x -> pure x
-- From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
label :: Parser String
label = let_then_str_then_alpha <|> (CU.singleton <$> letter)
where
let_then_str_then_alpha :: Parser String
let_then_str_then_alpha = do
labelstr <- try do
l <- letter
s <- ldh_str
pure $ CU.singleton l <> CU.fromCharArray s
if (S.length labelstr > label_maxsize || not (parse_last_char labelstr let_dig))
then empty
else pure labelstr
-- Tuple whole_label last_char <- PC.try do
-- pure $ Tuple (CU.singleton l <> (CU.fromCharArray $ NonEmpty.toArray s)) (NonEmpty.last s)
-- case runParser (CU.singleton last_char) let_dig of
-- Left _ -> fail $ "Label is wrong: last char is '" <> (CU.singleton last_char) <> "' which isn't an alphanum"
-- Right _ -> if S.length whole_label > 63 -- Remember: we already did read a letter (l).
-- then Nothing
-- else pure whole_label
-- let_then_str_then_alpha = do
-- Tuple whole_label last_char <- PC.try do
-- l <- letter
-- s <- ldhstr
-- pure $ Tuple (CU.singleton l <> (CU.fromCharArray $ NonEmpty.toArray s)) (NonEmpty.last s)
-- case runParser (CU.singleton last_char) let_dig of
-- Left _ -> fail $ "Label is wrong: last char is '" <> (CU.singleton last_char) <> "' which isn't an alphanum"
-- Right _ -> if S.length whole_label > 63 -- Remember: we already did read a letter (l).
-- then fail $ "too big"
-- else pure whole_label
{-
import Control.Lazy (defer)
import Data.Array.NonEmpty as NonEmpty
import Data.Either (Either(..))
import Data.Maybe (Maybe(..))
import Data.String as S -- length
import Data.String.CodeUnits as CU
-- import Data.String.Regex as R
-- import Data.String.Regex.Flags as RF
import Data.Tuple (Tuple(..))
import Parsing.Combinators.Array (many1)
import Parsing.Combinators as PC
import Parsing (Parser, fail, runParser)
import Parsing.String.Basic (alphaNum, letter)
import Parsing.String (char, string, eof)
-- | From RFC 1035: <domain> ::= <subdomain> | " "
-- |
-- | Accepting an optional '.' at the end of the subdomain doesn't conform
-- | to the (prefered) syntax of a domain as described in RFC 1035.
-- | However, this last '.' character should be acceptable in most applications.
-- | In some cases, a fully qualified domain name (FQDN) such as `example.com.`
-- | has to be differenciated from a "relative" name (www).
domain :: Parser String String
domain = PC.try (string " ") <|> sub_eof
sub_eof :: Parser String String
sub_eof = do
sub <- subdomain
maybe_final_point <- PC.optionMaybe (char '.')
eof
let parsed_domain = did_we_parsed_the_final_point maybe_final_point sub
if S.length parsed_domain > 255
then fail $ "domain length is > 255 bytes (" <> show (S.length parsed_domain) <> ")"
else pure parsed_domain
where
did_we_parsed_the_final_point Nothing sub = sub
did_we_parsed_the_final_point (Just _) sub = sub <> "."
-- From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
subdomain :: Parser String String
subdomain = do
-- First: read a label. This is bare minimum for a subdomain.
lab <- label
-- Second: the rest is optional.
r <- PC.optionMaybe (PC.try point_sub)
case r of
Nothing -> pure lab
Just sub -> pure $ lab <> sub
where
point_sub :: Parser String String
point_sub = do
point <- string "."
sub <- defer \_ -> subdomain
pure $ point <> sub
-- | Converting a single letter parser to a String parser.
char_to_string :: Parser String Char -> Parser String String
char_to_string p = do
character <- p
pure $ CU.singleton character
-}

View File

@ -1,6 +1,7 @@
module Main where
import Parser
import DomainParser
import Prelude (Unit, discard, show, ($), (==), (<>))
@ -10,6 +11,8 @@ import Effect.Console (log)
import Data.Maybe (Maybe(..))
import Data.Tuple (Tuple(..))
import Data.String.CodeUnits (fromCharArray)
-- isf :: Parser Boolean
-- isf = (_ == 'f') <$> itemP
--
@ -35,27 +38,54 @@ import Data.Tuple (Tuple(..))
-- let toprint = if b then "FOUND IT" else "not found"
-- log $ toprint <> ", rest: " <> str
logtest :: forall a. String -> Parser a -> String -> (a -> String) -> Effect Unit
logtest fname p str r = do
log $ "(" <> fname <> ") parsing '" <> str <> "': "
<> case parse p str of
Just (Tuple x y) -> show (r x) <> " " <> show y
Nothing -> "failed"
id :: forall a. a -> a
id a = a
main :: Effect Unit
main = do
log "🍝"
logtest "ldh_str" ldh_str "a12B.fl" fromCharArray
logtest "ldh_str" ldh_str "1efg.x1" fromCharArray
logtest "ldh_str" ldh_str ".qjzleb" fromCharArray
logtest "ldh_str" ldh_str "a-b.b-a" fromCharArray
logtest "ldh_str" ldh_str "" fromCharArray
log $ "parsing the first 'f' in 'fiction' (sat): " <> case parse (sat (\x -> x == 'f')) "fiction" of
Just (Tuple x y) -> show x <> " " <> show y
Nothing -> "failed"
log ""
log $ "parsing 'fic' in 'fiction' (string): " <> case parse (string "fic") "fiction" of
Just (Tuple x y) -> show x <> " " <> show y
Nothing -> "failed"
logtest "label" label "example.org" id
logtest "label" label "1notgreat.x" id
logtest "label" label ".shouldntwork" id
logtest "label" label "a-b.great" id
logtest "label" label "-b.shouldfail" id
logtest "label" label "b-.shouldfail" id
logtest "label" label "toolg.org" id
logtest "label" label "too-long.org" id
logtest "label" label "too-long-shouldfail.org" id
logtest "label" label "" id
log $ "parsing ident (all first alphanum) in 'ab123-blah' (ident): " <>
case parse ident "ab123-blah" of
Just (Tuple x y) -> show x <> " " <> show y
Nothing -> "failed"
log $ "parsing integer in '-19ab' (integer): " <>
case parse integer "-19ab" of
Just (Tuple x y) -> show x <> " " <> show y
Nothing -> "failed"
-- log $ "parsing the first 'f' in 'fiction' (sat): " <> case parse (sat (\x -> x == 'f')) "fiction" of
-- Just (Tuple x y) -> show x <> " " <> show y
-- Nothing -> "failed"
--
-- log $ "parsing 'fic' in 'fiction' (string): " <> case parse (string "fic") "fiction" of
-- Just (Tuple x y) -> show x <> " " <> show y
-- Nothing -> "failed"
--
-- log $ "parsing ident (all first alphanum) in 'ab123-blah' (ident): " <>
-- case parse ident "ab123-blah" of
-- Just (Tuple x y) -> show x <> " " <> show y
-- Nothing -> "failed"
--
-- log $ "parsing integer in '-19ab' (integer): " <>
-- case parse integer "-19ab" of
-- Just (Tuple x y) -> show x <> " " <> show y
-- Nothing -> "failed"
-- JUST WORKS
-- isffound $ parse isf "fable"
@ -87,5 +117,3 @@ main = do
-- log $ "parsing '': " <> case parse ishi2 "" of
-- Just (Tuple x y) -> show x <> " " <> show y
-- Nothing -> "coudn't parse two letters"
log "end"

View File

@ -1,18 +1,18 @@
module Parser where
import Prelude
import Data.Int as Int
import Control.Alt (class Alt, (<|>))
import Control.Plus (class Plus, empty)
import Control.Alternative (class Alternative)
import Control.Lazy (class Lazy)
import Control.Plus (class Plus, empty)
import Data.Array as A
import Data.Int as Int
import Data.Maybe (Maybe(..))
import Data.String.CodeUnits (uncons, toCharArray, fromCharArray)
import Data.Tuple (Tuple(..))
import Data.String.CodeUnits (singleton, uncons, toCharArray, fromCharArray)
import BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper)
newtype Parser v = Parser (String -> Maybe (Tuple v String))
parse :: forall a. Parser a -> (String -> Maybe (Tuple a String))
@ -71,42 +71,24 @@ sat :: (Char -> Boolean) -> Parser Char
sat p = do x <- item
if p x then pure x else empty
isDigit :: Char -> Boolean
isDigit = between '0' '9'
digit :: Parser Char
digit = sat isDigit
isLower :: Char -> Boolean
isLower = between 'a' 'z'
lower :: Parser Char
lower = sat isLower
isUpper :: Char -> Boolean
isUpper = between 'A' 'Z'
upper :: Parser Char
upper = sat isUpper
isAlpha :: Char -> Boolean
isAlpha c = A.any (\f -> f c) [isLower, isUpper]
letter :: Parser Char
letter = sat isAlpha
isAlphaNum :: Char -> Boolean
isAlphaNum c = A.any (\f -> f c) [isAlpha, isDigit]
alphanum :: Parser Char
alphanum = sat isAlphaNum
char :: Char -> Parser Char
char x = sat (_ == x)
concat :: Char -> String -> String
concat c rest = singleton c <> rest
string :: String -> Parser String
string str = case A.uncons (toCharArray str) of
Nothing -> Parser \stream -> Just (Tuple "" stream)
@ -131,9 +113,6 @@ int = do _ <- char '-'
pure (-n)
<|> nat
isSpace :: Char -> Boolean
isSpace c = A.any (\v -> v == c) [' ', '\t', ' ', '\r', '\n']
space :: Parser Unit
space = do _ <- A.many (sat isSpace)
pure unit
@ -155,3 +134,8 @@ integer = token int
symbol :: String -> Parser String
symbol xs = token (string xs)
many1 :: forall a. Parser a -> Parser (Array a)
many1 p = do first <- p
rest <- A.many p
pure $ A.cons first rest