Split code, implement more of DomainParser.
This commit is contained in:
parent
060467bcc4
commit
ef6f49f624
28
src/BaseFunctions.purs
Normal file
28
src/BaseFunctions.purs
Normal file
@ -0,0 +1,28 @@
|
||||
module BaseFunctions where
|
||||
|
||||
import Prelude (between, (<>), (==))
|
||||
|
||||
import Data.Array as A
|
||||
import Data.String.CodeUnits (singleton)
|
||||
|
||||
concat :: Char -> String -> String
|
||||
concat c rest = singleton c <> rest
|
||||
|
||||
isDigit :: Char -> Boolean
|
||||
isDigit = between '0' '9'
|
||||
|
||||
isLower :: Char -> Boolean
|
||||
isLower = between 'a' 'z'
|
||||
|
||||
isUpper :: Char -> Boolean
|
||||
isUpper = between 'A' 'Z'
|
||||
|
||||
isAlpha :: Char -> Boolean
|
||||
isAlpha c = A.any (\f -> f c) [isLower, isUpper]
|
||||
|
||||
isAlphaNum :: Char -> Boolean
|
||||
isAlphaNum c = A.any (\f -> f c) [isAlpha, isDigit]
|
||||
|
||||
isSpace :: Char -> Boolean
|
||||
isSpace c = A.any (\v -> v == c) [' ', '\t', ' ', '\r', '\n']
|
||||
|
147
src/DomainParser.purs
Normal file
147
src/DomainParser.purs
Normal file
@ -0,0 +1,147 @@
|
||||
-- | `DomainParser` is a simple parser for domain names as described in RFC 1035.
|
||||
module DomainParser where
|
||||
|
||||
import Prelude
|
||||
--import Prelude (bind, discard, pure, show, ($), (<>), (>))
|
||||
|
||||
import Data.Maybe (Maybe(..))
|
||||
import Data.Array as A
|
||||
import Data.String as S
|
||||
import Data.Tuple (Tuple(..))
|
||||
import Data.Array.NonEmpty as NonEmpty
|
||||
import Data.String.CodeUnits as CU
|
||||
import Control.Alt ((<|>))
|
||||
import Control.Plus (empty)
|
||||
|
||||
import Parser
|
||||
|
||||
-- From RFC 1035: <let-dig> ::= <letter> | <digit>
|
||||
let_dig :: Parser Char
|
||||
let_dig = alphanum
|
||||
|
||||
-- From RFC 1035: <let-dig-hyp> ::= <let-dig> | "-"
|
||||
-- Either a Letter, Digital or an Hyphenation character.
|
||||
let_dig_hyp :: Parser Char
|
||||
let_dig_hyp = let_dig <|> char '-'
|
||||
|
||||
-- From RFC 1035: <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
|
||||
ldh_str :: Parser (Array Char)
|
||||
ldh_str = many1 let_dig_hyp
|
||||
|
||||
-- TODO: 63
|
||||
label_maxsize :: Int
|
||||
label_maxsize = 7
|
||||
|
||||
last_char :: String -> Maybe Char
|
||||
last_char = A.last <<< CU.toCharArray
|
||||
|
||||
parse_last_char :: String -> Parser Char -> Boolean
|
||||
parse_last_char s p = case last_char s of
|
||||
Nothing -> false
|
||||
Just c -> case parse p (CU.singleton c) of
|
||||
Nothing -> false
|
||||
_ -> true
|
||||
|
||||
try :: Parser String -> Parser String
|
||||
try p = Parser p'
|
||||
where p' str = case parse p str of
|
||||
Nothing -> Just (Tuple "" str) -- FIXME: This is flawed: we cannot know if it worked!
|
||||
Just x -> pure x
|
||||
|
||||
-- From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
|
||||
label :: Parser String
|
||||
label = let_then_str_then_alpha <|> (CU.singleton <$> letter)
|
||||
where
|
||||
let_then_str_then_alpha :: Parser String
|
||||
let_then_str_then_alpha = do
|
||||
labelstr <- try do
|
||||
l <- letter
|
||||
s <- ldh_str
|
||||
pure $ CU.singleton l <> CU.fromCharArray s
|
||||
if (S.length labelstr > label_maxsize || not (parse_last_char labelstr let_dig))
|
||||
then empty
|
||||
else pure labelstr
|
||||
|
||||
|
||||
-- Tuple whole_label last_char <- PC.try do
|
||||
-- pure $ Tuple (CU.singleton l <> (CU.fromCharArray $ NonEmpty.toArray s)) (NonEmpty.last s)
|
||||
-- case runParser (CU.singleton last_char) let_dig of
|
||||
-- Left _ -> fail $ "Label is wrong: last char is '" <> (CU.singleton last_char) <> "' which isn't an alphanum"
|
||||
-- Right _ -> if S.length whole_label > 63 -- Remember: we already did read a letter (l).
|
||||
-- then Nothing
|
||||
-- else pure whole_label
|
||||
|
||||
-- let_then_str_then_alpha = do
|
||||
-- Tuple whole_label last_char <- PC.try do
|
||||
-- l <- letter
|
||||
-- s <- ldhstr
|
||||
-- pure $ Tuple (CU.singleton l <> (CU.fromCharArray $ NonEmpty.toArray s)) (NonEmpty.last s)
|
||||
-- case runParser (CU.singleton last_char) let_dig of
|
||||
-- Left _ -> fail $ "Label is wrong: last char is '" <> (CU.singleton last_char) <> "' which isn't an alphanum"
|
||||
-- Right _ -> if S.length whole_label > 63 -- Remember: we already did read a letter (l).
|
||||
-- then fail $ "too big"
|
||||
-- else pure whole_label
|
||||
|
||||
{-
|
||||
import Control.Lazy (defer)
|
||||
import Data.Array.NonEmpty as NonEmpty
|
||||
import Data.Either (Either(..))
|
||||
import Data.Maybe (Maybe(..))
|
||||
import Data.String as S -- length
|
||||
import Data.String.CodeUnits as CU
|
||||
-- import Data.String.Regex as R
|
||||
-- import Data.String.Regex.Flags as RF
|
||||
import Data.Tuple (Tuple(..))
|
||||
import Parsing.Combinators.Array (many1)
|
||||
import Parsing.Combinators as PC
|
||||
import Parsing (Parser, fail, runParser)
|
||||
import Parsing.String.Basic (alphaNum, letter)
|
||||
import Parsing.String (char, string, eof)
|
||||
|
||||
-- | From RFC 1035: <domain> ::= <subdomain> | " "
|
||||
-- |
|
||||
-- | Accepting an optional '.' at the end of the subdomain doesn't conform
|
||||
-- | to the (prefered) syntax of a domain as described in RFC 1035.
|
||||
-- | However, this last '.' character should be acceptable in most applications.
|
||||
-- | In some cases, a fully qualified domain name (FQDN) such as `example.com.`
|
||||
-- | has to be differenciated from a "relative" name (www).
|
||||
domain :: Parser String String
|
||||
domain = PC.try (string " ") <|> sub_eof
|
||||
|
||||
sub_eof :: Parser String String
|
||||
sub_eof = do
|
||||
sub <- subdomain
|
||||
maybe_final_point <- PC.optionMaybe (char '.')
|
||||
eof
|
||||
let parsed_domain = did_we_parsed_the_final_point maybe_final_point sub
|
||||
if S.length parsed_domain > 255
|
||||
then fail $ "domain length is > 255 bytes (" <> show (S.length parsed_domain) <> ")"
|
||||
else pure parsed_domain
|
||||
where
|
||||
did_we_parsed_the_final_point Nothing sub = sub
|
||||
did_we_parsed_the_final_point (Just _) sub = sub <> "."
|
||||
|
||||
-- From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
|
||||
subdomain :: Parser String String
|
||||
subdomain = do
|
||||
-- First: read a label. This is bare minimum for a subdomain.
|
||||
lab <- label
|
||||
-- Second: the rest is optional.
|
||||
r <- PC.optionMaybe (PC.try point_sub)
|
||||
case r of
|
||||
Nothing -> pure lab
|
||||
Just sub -> pure $ lab <> sub
|
||||
|
||||
where
|
||||
point_sub :: Parser String String
|
||||
point_sub = do
|
||||
point <- string "."
|
||||
sub <- defer \_ -> subdomain
|
||||
pure $ point <> sub
|
||||
|
||||
-- | Converting a single letter parser to a String parser.
|
||||
char_to_string :: Parser String Char -> Parser String String
|
||||
char_to_string p = do
|
||||
character <- p
|
||||
pure $ CU.singleton character
|
||||
-}
|
@ -1,6 +1,7 @@
|
||||
module Main where
|
||||
|
||||
import Parser
|
||||
import DomainParser
|
||||
|
||||
import Prelude (Unit, discard, show, ($), (==), (<>))
|
||||
|
||||
@ -10,6 +11,8 @@ import Effect.Console (log)
|
||||
import Data.Maybe (Maybe(..))
|
||||
import Data.Tuple (Tuple(..))
|
||||
|
||||
import Data.String.CodeUnits (fromCharArray)
|
||||
|
||||
-- isf :: Parser Boolean
|
||||
-- isf = (_ == 'f') <$> itemP
|
||||
--
|
||||
@ -35,27 +38,54 @@ import Data.Tuple (Tuple(..))
|
||||
-- let toprint = if b then "FOUND IT" else "not found"
|
||||
-- log $ toprint <> ", rest: " <> str
|
||||
|
||||
logtest :: forall a. String -> Parser a -> String -> (a -> String) -> Effect Unit
|
||||
logtest fname p str r = do
|
||||
log $ "(" <> fname <> ") parsing '" <> str <> "': "
|
||||
<> case parse p str of
|
||||
Just (Tuple x y) -> show (r x) <> " " <> show y
|
||||
Nothing -> "failed"
|
||||
|
||||
id :: forall a. a -> a
|
||||
id a = a
|
||||
|
||||
main :: Effect Unit
|
||||
main = do
|
||||
log "🍝"
|
||||
logtest "ldh_str" ldh_str "a12B.fl" fromCharArray
|
||||
logtest "ldh_str" ldh_str "1efg.x1" fromCharArray
|
||||
logtest "ldh_str" ldh_str ".qjzleb" fromCharArray
|
||||
logtest "ldh_str" ldh_str "a-b.b-a" fromCharArray
|
||||
logtest "ldh_str" ldh_str "" fromCharArray
|
||||
|
||||
log $ "parsing the first 'f' in 'fiction' (sat): " <> case parse (sat (\x -> x == 'f')) "fiction" of
|
||||
Just (Tuple x y) -> show x <> " " <> show y
|
||||
Nothing -> "failed"
|
||||
log ""
|
||||
|
||||
log $ "parsing 'fic' in 'fiction' (string): " <> case parse (string "fic") "fiction" of
|
||||
Just (Tuple x y) -> show x <> " " <> show y
|
||||
Nothing -> "failed"
|
||||
logtest "label" label "example.org" id
|
||||
logtest "label" label "1notgreat.x" id
|
||||
logtest "label" label ".shouldntwork" id
|
||||
logtest "label" label "a-b.great" id
|
||||
logtest "label" label "-b.shouldfail" id
|
||||
logtest "label" label "b-.shouldfail" id
|
||||
logtest "label" label "toolg.org" id
|
||||
logtest "label" label "too-long.org" id
|
||||
logtest "label" label "too-long-shouldfail.org" id
|
||||
logtest "label" label "" id
|
||||
|
||||
log $ "parsing ident (all first alphanum) in 'ab123-blah' (ident): " <>
|
||||
case parse ident "ab123-blah" of
|
||||
Just (Tuple x y) -> show x <> " " <> show y
|
||||
Nothing -> "failed"
|
||||
|
||||
log $ "parsing integer in '-19ab' (integer): " <>
|
||||
case parse integer "-19ab" of
|
||||
Just (Tuple x y) -> show x <> " " <> show y
|
||||
Nothing -> "failed"
|
||||
-- log $ "parsing the first 'f' in 'fiction' (sat): " <> case parse (sat (\x -> x == 'f')) "fiction" of
|
||||
-- Just (Tuple x y) -> show x <> " " <> show y
|
||||
-- Nothing -> "failed"
|
||||
--
|
||||
-- log $ "parsing 'fic' in 'fiction' (string): " <> case parse (string "fic") "fiction" of
|
||||
-- Just (Tuple x y) -> show x <> " " <> show y
|
||||
-- Nothing -> "failed"
|
||||
--
|
||||
-- log $ "parsing ident (all first alphanum) in 'ab123-blah' (ident): " <>
|
||||
-- case parse ident "ab123-blah" of
|
||||
-- Just (Tuple x y) -> show x <> " " <> show y
|
||||
-- Nothing -> "failed"
|
||||
--
|
||||
-- log $ "parsing integer in '-19ab' (integer): " <>
|
||||
-- case parse integer "-19ab" of
|
||||
-- Just (Tuple x y) -> show x <> " " <> show y
|
||||
-- Nothing -> "failed"
|
||||
|
||||
-- JUST WORKS
|
||||
-- isffound $ parse isf "fable"
|
||||
@ -87,5 +117,3 @@ main = do
|
||||
-- log $ "parsing '': " <> case parse ishi2 "" of
|
||||
-- Just (Tuple x y) -> show x <> " " <> show y
|
||||
-- Nothing -> "coudn't parse two letters"
|
||||
|
||||
log "end"
|
||||
|
@ -1,18 +1,18 @@
|
||||
module Parser where
|
||||
|
||||
import Prelude
|
||||
|
||||
import Data.Int as Int
|
||||
|
||||
import Control.Alt (class Alt, (<|>))
|
||||
import Control.Plus (class Plus, empty)
|
||||
import Control.Alternative (class Alternative)
|
||||
import Control.Lazy (class Lazy)
|
||||
import Control.Plus (class Plus, empty)
|
||||
|
||||
import Data.Array as A
|
||||
import Data.Int as Int
|
||||
import Data.Maybe (Maybe(..))
|
||||
import Data.String.CodeUnits (uncons, toCharArray, fromCharArray)
|
||||
import Data.Tuple (Tuple(..))
|
||||
import Data.String.CodeUnits (singleton, uncons, toCharArray, fromCharArray)
|
||||
|
||||
import BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper)
|
||||
|
||||
newtype Parser v = Parser (String -> Maybe (Tuple v String))
|
||||
parse :: forall a. Parser a -> (String -> Maybe (Tuple a String))
|
||||
@ -71,42 +71,24 @@ sat :: (Char -> Boolean) -> Parser Char
|
||||
sat p = do x <- item
|
||||
if p x then pure x else empty
|
||||
|
||||
isDigit :: Char -> Boolean
|
||||
isDigit = between '0' '9'
|
||||
|
||||
digit :: Parser Char
|
||||
digit = sat isDigit
|
||||
|
||||
isLower :: Char -> Boolean
|
||||
isLower = between 'a' 'z'
|
||||
|
||||
lower :: Parser Char
|
||||
lower = sat isLower
|
||||
|
||||
isUpper :: Char -> Boolean
|
||||
isUpper = between 'A' 'Z'
|
||||
|
||||
upper :: Parser Char
|
||||
upper = sat isUpper
|
||||
|
||||
isAlpha :: Char -> Boolean
|
||||
isAlpha c = A.any (\f -> f c) [isLower, isUpper]
|
||||
|
||||
letter :: Parser Char
|
||||
letter = sat isAlpha
|
||||
|
||||
isAlphaNum :: Char -> Boolean
|
||||
isAlphaNum c = A.any (\f -> f c) [isAlpha, isDigit]
|
||||
|
||||
alphanum :: Parser Char
|
||||
alphanum = sat isAlphaNum
|
||||
|
||||
char :: Char -> Parser Char
|
||||
char x = sat (_ == x)
|
||||
|
||||
concat :: Char -> String -> String
|
||||
concat c rest = singleton c <> rest
|
||||
|
||||
string :: String -> Parser String
|
||||
string str = case A.uncons (toCharArray str) of
|
||||
Nothing -> Parser \stream -> Just (Tuple "" stream)
|
||||
@ -131,9 +113,6 @@ int = do _ <- char '-'
|
||||
pure (-n)
|
||||
<|> nat
|
||||
|
||||
isSpace :: Char -> Boolean
|
||||
isSpace c = A.any (\v -> v == c) [' ', '\t', ' ', '\r', '\n']
|
||||
|
||||
space :: Parser Unit
|
||||
space = do _ <- A.many (sat isSpace)
|
||||
pure unit
|
||||
@ -155,3 +134,8 @@ integer = token int
|
||||
|
||||
symbol :: String -> Parser String
|
||||
symbol xs = token (string xs)
|
||||
|
||||
many1 :: forall a. Parser a -> Parser (Array a)
|
||||
many1 p = do first <- p
|
||||
rest <- A.many p
|
||||
pure $ A.cons first rest
|
||||
|
Loading…
Reference in New Issue
Block a user