Split code, implement more of DomainParser.
This commit is contained in:
parent
060467bcc4
commit
ef6f49f624
28
src/BaseFunctions.purs
Normal file
28
src/BaseFunctions.purs
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
module BaseFunctions where
|
||||||
|
|
||||||
|
import Prelude (between, (<>), (==))
|
||||||
|
|
||||||
|
import Data.Array as A
|
||||||
|
import Data.String.CodeUnits (singleton)
|
||||||
|
|
||||||
|
concat :: Char -> String -> String
|
||||||
|
concat c rest = singleton c <> rest
|
||||||
|
|
||||||
|
isDigit :: Char -> Boolean
|
||||||
|
isDigit = between '0' '9'
|
||||||
|
|
||||||
|
isLower :: Char -> Boolean
|
||||||
|
isLower = between 'a' 'z'
|
||||||
|
|
||||||
|
isUpper :: Char -> Boolean
|
||||||
|
isUpper = between 'A' 'Z'
|
||||||
|
|
||||||
|
isAlpha :: Char -> Boolean
|
||||||
|
isAlpha c = A.any (\f -> f c) [isLower, isUpper]
|
||||||
|
|
||||||
|
isAlphaNum :: Char -> Boolean
|
||||||
|
isAlphaNum c = A.any (\f -> f c) [isAlpha, isDigit]
|
||||||
|
|
||||||
|
isSpace :: Char -> Boolean
|
||||||
|
isSpace c = A.any (\v -> v == c) [' ', '\t', ' ', '\r', '\n']
|
||||||
|
|
147
src/DomainParser.purs
Normal file
147
src/DomainParser.purs
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
-- | `DomainParser` is a simple parser for domain names as described in RFC 1035.
|
||||||
|
module DomainParser where
|
||||||
|
|
||||||
|
import Prelude
|
||||||
|
--import Prelude (bind, discard, pure, show, ($), (<>), (>))
|
||||||
|
|
||||||
|
import Data.Maybe (Maybe(..))
|
||||||
|
import Data.Array as A
|
||||||
|
import Data.String as S
|
||||||
|
import Data.Tuple (Tuple(..))
|
||||||
|
import Data.Array.NonEmpty as NonEmpty
|
||||||
|
import Data.String.CodeUnits as CU
|
||||||
|
import Control.Alt ((<|>))
|
||||||
|
import Control.Plus (empty)
|
||||||
|
|
||||||
|
import Parser
|
||||||
|
|
||||||
|
-- From RFC 1035: <let-dig> ::= <letter> | <digit>
|
||||||
|
let_dig :: Parser Char
|
||||||
|
let_dig = alphanum
|
||||||
|
|
||||||
|
-- From RFC 1035: <let-dig-hyp> ::= <let-dig> | "-"
|
||||||
|
-- Either a Letter, Digital or an Hyphenation character.
|
||||||
|
let_dig_hyp :: Parser Char
|
||||||
|
let_dig_hyp = let_dig <|> char '-'
|
||||||
|
|
||||||
|
-- From RFC 1035: <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
|
||||||
|
ldh_str :: Parser (Array Char)
|
||||||
|
ldh_str = many1 let_dig_hyp
|
||||||
|
|
||||||
|
-- TODO: 63
|
||||||
|
label_maxsize :: Int
|
||||||
|
label_maxsize = 7
|
||||||
|
|
||||||
|
last_char :: String -> Maybe Char
|
||||||
|
last_char = A.last <<< CU.toCharArray
|
||||||
|
|
||||||
|
parse_last_char :: String -> Parser Char -> Boolean
|
||||||
|
parse_last_char s p = case last_char s of
|
||||||
|
Nothing -> false
|
||||||
|
Just c -> case parse p (CU.singleton c) of
|
||||||
|
Nothing -> false
|
||||||
|
_ -> true
|
||||||
|
|
||||||
|
try :: Parser String -> Parser String
|
||||||
|
try p = Parser p'
|
||||||
|
where p' str = case parse p str of
|
||||||
|
Nothing -> Just (Tuple "" str) -- FIXME: This is flawed: we cannot know if it worked!
|
||||||
|
Just x -> pure x
|
||||||
|
|
||||||
|
-- From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
|
||||||
|
label :: Parser String
|
||||||
|
label = let_then_str_then_alpha <|> (CU.singleton <$> letter)
|
||||||
|
where
|
||||||
|
let_then_str_then_alpha :: Parser String
|
||||||
|
let_then_str_then_alpha = do
|
||||||
|
labelstr <- try do
|
||||||
|
l <- letter
|
||||||
|
s <- ldh_str
|
||||||
|
pure $ CU.singleton l <> CU.fromCharArray s
|
||||||
|
if (S.length labelstr > label_maxsize || not (parse_last_char labelstr let_dig))
|
||||||
|
then empty
|
||||||
|
else pure labelstr
|
||||||
|
|
||||||
|
|
||||||
|
-- Tuple whole_label last_char <- PC.try do
|
||||||
|
-- pure $ Tuple (CU.singleton l <> (CU.fromCharArray $ NonEmpty.toArray s)) (NonEmpty.last s)
|
||||||
|
-- case runParser (CU.singleton last_char) let_dig of
|
||||||
|
-- Left _ -> fail $ "Label is wrong: last char is '" <> (CU.singleton last_char) <> "' which isn't an alphanum"
|
||||||
|
-- Right _ -> if S.length whole_label > 63 -- Remember: we already did read a letter (l).
|
||||||
|
-- then Nothing
|
||||||
|
-- else pure whole_label
|
||||||
|
|
||||||
|
-- let_then_str_then_alpha = do
|
||||||
|
-- Tuple whole_label last_char <- PC.try do
|
||||||
|
-- l <- letter
|
||||||
|
-- s <- ldhstr
|
||||||
|
-- pure $ Tuple (CU.singleton l <> (CU.fromCharArray $ NonEmpty.toArray s)) (NonEmpty.last s)
|
||||||
|
-- case runParser (CU.singleton last_char) let_dig of
|
||||||
|
-- Left _ -> fail $ "Label is wrong: last char is '" <> (CU.singleton last_char) <> "' which isn't an alphanum"
|
||||||
|
-- Right _ -> if S.length whole_label > 63 -- Remember: we already did read a letter (l).
|
||||||
|
-- then fail $ "too big"
|
||||||
|
-- else pure whole_label
|
||||||
|
|
||||||
|
{-
|
||||||
|
import Control.Lazy (defer)
|
||||||
|
import Data.Array.NonEmpty as NonEmpty
|
||||||
|
import Data.Either (Either(..))
|
||||||
|
import Data.Maybe (Maybe(..))
|
||||||
|
import Data.String as S -- length
|
||||||
|
import Data.String.CodeUnits as CU
|
||||||
|
-- import Data.String.Regex as R
|
||||||
|
-- import Data.String.Regex.Flags as RF
|
||||||
|
import Data.Tuple (Tuple(..))
|
||||||
|
import Parsing.Combinators.Array (many1)
|
||||||
|
import Parsing.Combinators as PC
|
||||||
|
import Parsing (Parser, fail, runParser)
|
||||||
|
import Parsing.String.Basic (alphaNum, letter)
|
||||||
|
import Parsing.String (char, string, eof)
|
||||||
|
|
||||||
|
-- | From RFC 1035: <domain> ::= <subdomain> | " "
|
||||||
|
-- |
|
||||||
|
-- | Accepting an optional '.' at the end of the subdomain doesn't conform
|
||||||
|
-- | to the (prefered) syntax of a domain as described in RFC 1035.
|
||||||
|
-- | However, this last '.' character should be acceptable in most applications.
|
||||||
|
-- | In some cases, a fully qualified domain name (FQDN) such as `example.com.`
|
||||||
|
-- | has to be differenciated from a "relative" name (www).
|
||||||
|
domain :: Parser String String
|
||||||
|
domain = PC.try (string " ") <|> sub_eof
|
||||||
|
|
||||||
|
sub_eof :: Parser String String
|
||||||
|
sub_eof = do
|
||||||
|
sub <- subdomain
|
||||||
|
maybe_final_point <- PC.optionMaybe (char '.')
|
||||||
|
eof
|
||||||
|
let parsed_domain = did_we_parsed_the_final_point maybe_final_point sub
|
||||||
|
if S.length parsed_domain > 255
|
||||||
|
then fail $ "domain length is > 255 bytes (" <> show (S.length parsed_domain) <> ")"
|
||||||
|
else pure parsed_domain
|
||||||
|
where
|
||||||
|
did_we_parsed_the_final_point Nothing sub = sub
|
||||||
|
did_we_parsed_the_final_point (Just _) sub = sub <> "."
|
||||||
|
|
||||||
|
-- From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
|
||||||
|
subdomain :: Parser String String
|
||||||
|
subdomain = do
|
||||||
|
-- First: read a label. This is bare minimum for a subdomain.
|
||||||
|
lab <- label
|
||||||
|
-- Second: the rest is optional.
|
||||||
|
r <- PC.optionMaybe (PC.try point_sub)
|
||||||
|
case r of
|
||||||
|
Nothing -> pure lab
|
||||||
|
Just sub -> pure $ lab <> sub
|
||||||
|
|
||||||
|
where
|
||||||
|
point_sub :: Parser String String
|
||||||
|
point_sub = do
|
||||||
|
point <- string "."
|
||||||
|
sub <- defer \_ -> subdomain
|
||||||
|
pure $ point <> sub
|
||||||
|
|
||||||
|
-- | Converting a single letter parser to a String parser.
|
||||||
|
char_to_string :: Parser String Char -> Parser String String
|
||||||
|
char_to_string p = do
|
||||||
|
character <- p
|
||||||
|
pure $ CU.singleton character
|
||||||
|
-}
|
@ -1,6 +1,7 @@
|
|||||||
module Main where
|
module Main where
|
||||||
|
|
||||||
import Parser
|
import Parser
|
||||||
|
import DomainParser
|
||||||
|
|
||||||
import Prelude (Unit, discard, show, ($), (==), (<>))
|
import Prelude (Unit, discard, show, ($), (==), (<>))
|
||||||
|
|
||||||
@ -10,6 +11,8 @@ import Effect.Console (log)
|
|||||||
import Data.Maybe (Maybe(..))
|
import Data.Maybe (Maybe(..))
|
||||||
import Data.Tuple (Tuple(..))
|
import Data.Tuple (Tuple(..))
|
||||||
|
|
||||||
|
import Data.String.CodeUnits (fromCharArray)
|
||||||
|
|
||||||
-- isf :: Parser Boolean
|
-- isf :: Parser Boolean
|
||||||
-- isf = (_ == 'f') <$> itemP
|
-- isf = (_ == 'f') <$> itemP
|
||||||
--
|
--
|
||||||
@ -35,27 +38,54 @@ import Data.Tuple (Tuple(..))
|
|||||||
-- let toprint = if b then "FOUND IT" else "not found"
|
-- let toprint = if b then "FOUND IT" else "not found"
|
||||||
-- log $ toprint <> ", rest: " <> str
|
-- log $ toprint <> ", rest: " <> str
|
||||||
|
|
||||||
|
logtest :: forall a. String -> Parser a -> String -> (a -> String) -> Effect Unit
|
||||||
|
logtest fname p str r = do
|
||||||
|
log $ "(" <> fname <> ") parsing '" <> str <> "': "
|
||||||
|
<> case parse p str of
|
||||||
|
Just (Tuple x y) -> show (r x) <> " " <> show y
|
||||||
|
Nothing -> "failed"
|
||||||
|
|
||||||
|
id :: forall a. a -> a
|
||||||
|
id a = a
|
||||||
|
|
||||||
main :: Effect Unit
|
main :: Effect Unit
|
||||||
main = do
|
main = do
|
||||||
log "🍝"
|
logtest "ldh_str" ldh_str "a12B.fl" fromCharArray
|
||||||
|
logtest "ldh_str" ldh_str "1efg.x1" fromCharArray
|
||||||
|
logtest "ldh_str" ldh_str ".qjzleb" fromCharArray
|
||||||
|
logtest "ldh_str" ldh_str "a-b.b-a" fromCharArray
|
||||||
|
logtest "ldh_str" ldh_str "" fromCharArray
|
||||||
|
|
||||||
log $ "parsing the first 'f' in 'fiction' (sat): " <> case parse (sat (\x -> x == 'f')) "fiction" of
|
log ""
|
||||||
Just (Tuple x y) -> show x <> " " <> show y
|
|
||||||
Nothing -> "failed"
|
|
||||||
|
|
||||||
log $ "parsing 'fic' in 'fiction' (string): " <> case parse (string "fic") "fiction" of
|
logtest "label" label "example.org" id
|
||||||
Just (Tuple x y) -> show x <> " " <> show y
|
logtest "label" label "1notgreat.x" id
|
||||||
Nothing -> "failed"
|
logtest "label" label ".shouldntwork" id
|
||||||
|
logtest "label" label "a-b.great" id
|
||||||
|
logtest "label" label "-b.shouldfail" id
|
||||||
|
logtest "label" label "b-.shouldfail" id
|
||||||
|
logtest "label" label "toolg.org" id
|
||||||
|
logtest "label" label "too-long.org" id
|
||||||
|
logtest "label" label "too-long-shouldfail.org" id
|
||||||
|
logtest "label" label "" id
|
||||||
|
|
||||||
log $ "parsing ident (all first alphanum) in 'ab123-blah' (ident): " <>
|
-- log $ "parsing the first 'f' in 'fiction' (sat): " <> case parse (sat (\x -> x == 'f')) "fiction" of
|
||||||
case parse ident "ab123-blah" of
|
-- Just (Tuple x y) -> show x <> " " <> show y
|
||||||
Just (Tuple x y) -> show x <> " " <> show y
|
-- Nothing -> "failed"
|
||||||
Nothing -> "failed"
|
--
|
||||||
|
-- log $ "parsing 'fic' in 'fiction' (string): " <> case parse (string "fic") "fiction" of
|
||||||
log $ "parsing integer in '-19ab' (integer): " <>
|
-- Just (Tuple x y) -> show x <> " " <> show y
|
||||||
case parse integer "-19ab" of
|
-- Nothing -> "failed"
|
||||||
Just (Tuple x y) -> show x <> " " <> show y
|
--
|
||||||
Nothing -> "failed"
|
-- log $ "parsing ident (all first alphanum) in 'ab123-blah' (ident): " <>
|
||||||
|
-- case parse ident "ab123-blah" of
|
||||||
|
-- Just (Tuple x y) -> show x <> " " <> show y
|
||||||
|
-- Nothing -> "failed"
|
||||||
|
--
|
||||||
|
-- log $ "parsing integer in '-19ab' (integer): " <>
|
||||||
|
-- case parse integer "-19ab" of
|
||||||
|
-- Just (Tuple x y) -> show x <> " " <> show y
|
||||||
|
-- Nothing -> "failed"
|
||||||
|
|
||||||
-- JUST WORKS
|
-- JUST WORKS
|
||||||
-- isffound $ parse isf "fable"
|
-- isffound $ parse isf "fable"
|
||||||
@ -87,5 +117,3 @@ main = do
|
|||||||
-- log $ "parsing '': " <> case parse ishi2 "" of
|
-- log $ "parsing '': " <> case parse ishi2 "" of
|
||||||
-- Just (Tuple x y) -> show x <> " " <> show y
|
-- Just (Tuple x y) -> show x <> " " <> show y
|
||||||
-- Nothing -> "coudn't parse two letters"
|
-- Nothing -> "coudn't parse two letters"
|
||||||
|
|
||||||
log "end"
|
|
||||||
|
@ -1,18 +1,18 @@
|
|||||||
module Parser where
|
module Parser where
|
||||||
|
|
||||||
import Prelude
|
import Prelude
|
||||||
|
|
||||||
import Data.Int as Int
|
|
||||||
|
|
||||||
import Control.Alt (class Alt, (<|>))
|
import Control.Alt (class Alt, (<|>))
|
||||||
import Control.Plus (class Plus, empty)
|
|
||||||
import Control.Alternative (class Alternative)
|
import Control.Alternative (class Alternative)
|
||||||
import Control.Lazy (class Lazy)
|
import Control.Lazy (class Lazy)
|
||||||
|
import Control.Plus (class Plus, empty)
|
||||||
|
|
||||||
import Data.Array as A
|
import Data.Array as A
|
||||||
|
import Data.Int as Int
|
||||||
import Data.Maybe (Maybe(..))
|
import Data.Maybe (Maybe(..))
|
||||||
|
import Data.String.CodeUnits (uncons, toCharArray, fromCharArray)
|
||||||
import Data.Tuple (Tuple(..))
|
import Data.Tuple (Tuple(..))
|
||||||
import Data.String.CodeUnits (singleton, uncons, toCharArray, fromCharArray)
|
|
||||||
|
import BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper)
|
||||||
|
|
||||||
newtype Parser v = Parser (String -> Maybe (Tuple v String))
|
newtype Parser v = Parser (String -> Maybe (Tuple v String))
|
||||||
parse :: forall a. Parser a -> (String -> Maybe (Tuple a String))
|
parse :: forall a. Parser a -> (String -> Maybe (Tuple a String))
|
||||||
@ -71,42 +71,24 @@ sat :: (Char -> Boolean) -> Parser Char
|
|||||||
sat p = do x <- item
|
sat p = do x <- item
|
||||||
if p x then pure x else empty
|
if p x then pure x else empty
|
||||||
|
|
||||||
isDigit :: Char -> Boolean
|
|
||||||
isDigit = between '0' '9'
|
|
||||||
|
|
||||||
digit :: Parser Char
|
digit :: Parser Char
|
||||||
digit = sat isDigit
|
digit = sat isDigit
|
||||||
|
|
||||||
isLower :: Char -> Boolean
|
|
||||||
isLower = between 'a' 'z'
|
|
||||||
|
|
||||||
lower :: Parser Char
|
lower :: Parser Char
|
||||||
lower = sat isLower
|
lower = sat isLower
|
||||||
|
|
||||||
isUpper :: Char -> Boolean
|
|
||||||
isUpper = between 'A' 'Z'
|
|
||||||
|
|
||||||
upper :: Parser Char
|
upper :: Parser Char
|
||||||
upper = sat isUpper
|
upper = sat isUpper
|
||||||
|
|
||||||
isAlpha :: Char -> Boolean
|
|
||||||
isAlpha c = A.any (\f -> f c) [isLower, isUpper]
|
|
||||||
|
|
||||||
letter :: Parser Char
|
letter :: Parser Char
|
||||||
letter = sat isAlpha
|
letter = sat isAlpha
|
||||||
|
|
||||||
isAlphaNum :: Char -> Boolean
|
|
||||||
isAlphaNum c = A.any (\f -> f c) [isAlpha, isDigit]
|
|
||||||
|
|
||||||
alphanum :: Parser Char
|
alphanum :: Parser Char
|
||||||
alphanum = sat isAlphaNum
|
alphanum = sat isAlphaNum
|
||||||
|
|
||||||
char :: Char -> Parser Char
|
char :: Char -> Parser Char
|
||||||
char x = sat (_ == x)
|
char x = sat (_ == x)
|
||||||
|
|
||||||
concat :: Char -> String -> String
|
|
||||||
concat c rest = singleton c <> rest
|
|
||||||
|
|
||||||
string :: String -> Parser String
|
string :: String -> Parser String
|
||||||
string str = case A.uncons (toCharArray str) of
|
string str = case A.uncons (toCharArray str) of
|
||||||
Nothing -> Parser \stream -> Just (Tuple "" stream)
|
Nothing -> Parser \stream -> Just (Tuple "" stream)
|
||||||
@ -131,9 +113,6 @@ int = do _ <- char '-'
|
|||||||
pure (-n)
|
pure (-n)
|
||||||
<|> nat
|
<|> nat
|
||||||
|
|
||||||
isSpace :: Char -> Boolean
|
|
||||||
isSpace c = A.any (\v -> v == c) [' ', '\t', ' ', '\r', '\n']
|
|
||||||
|
|
||||||
space :: Parser Unit
|
space :: Parser Unit
|
||||||
space = do _ <- A.many (sat isSpace)
|
space = do _ <- A.many (sat isSpace)
|
||||||
pure unit
|
pure unit
|
||||||
@ -155,3 +134,8 @@ integer = token int
|
|||||||
|
|
||||||
symbol :: String -> Parser String
|
symbol :: String -> Parser String
|
||||||
symbol xs = token (string xs)
|
symbol xs = token (string xs)
|
||||||
|
|
||||||
|
many1 :: forall a. Parser a -> Parser (Array a)
|
||||||
|
many1 p = do first <- p
|
||||||
|
rest <- A.many p
|
||||||
|
pure $ A.cons first rest
|
||||||
|
Loading…
Reference in New Issue
Block a user