To continue...
This commit is contained in:
parent
c4c624a2b4
commit
0702ba184e
@ -1,31 +1,36 @@
|
|||||||
-- | `DomainParser` is a simple parser for domain names as described in RFC 1035.
|
-- | `DomainParser` is a simple parser for domain names as described in RFC 1035.
|
||||||
module DomainParser where
|
module DomainParser where
|
||||||
|
|
||||||
import Prelude (class Monoid, bind, mempty, not, pure, ($), (&&), (*>), (<<<), (<>), (==), (>))
|
import Prelude (bind, not, pure, ($), (&&), (*>), (<<<), (<>), (==), (>))
|
||||||
|
|
||||||
import Control.Lazy (defer)
|
import Control.Lazy (defer)
|
||||||
import Data.Maybe (Maybe(..))
|
import Data.Maybe (Maybe(..))
|
||||||
|
import Data.Either (Either(..))
|
||||||
import Data.Array as A
|
import Data.Array as A
|
||||||
import Data.String as S
|
import Data.String as S
|
||||||
import Data.Tuple (Tuple(..))
|
|
||||||
-- import Data.Array.NonEmpty as NonEmpty
|
|
||||||
import Data.String.CodeUnits as CU
|
import Data.String.CodeUnits as CU
|
||||||
import Control.Alt ((<|>))
|
import Control.Alt ((<|>))
|
||||||
import Control.Plus (empty)
|
import Control.Plus (empty)
|
||||||
|
|
||||||
import Parser (Parser(..), alphanum, char, letter, many1, parse, string)
|
data DomainError
|
||||||
|
= SubdomainTooLarge Position Size
|
||||||
|
| DomainTooLarge Size
|
||||||
|
| InvalidCharacter Position
|
||||||
|
| EOFExpected
|
||||||
|
|
||||||
|
import Parser (Parser(..), success, fail, failError, alphanum, char, letter, many1, parse, string)
|
||||||
|
|
||||||
-- | From RFC 1035: <let-dig> ::= <letter> | <digit>
|
-- | From RFC 1035: <let-dig> ::= <letter> | <digit>
|
||||||
let_dig :: Parser Char
|
let_dig :: forall e. Parser e Char
|
||||||
let_dig = alphanum
|
let_dig = alphanum
|
||||||
|
|
||||||
-- | From RFC 1035: <let-dig-hyp> ::= <let-dig> | "-"
|
-- | From RFC 1035: <let-dig-hyp> ::= <let-dig> | "-"
|
||||||
-- | Either a Letter, Digital or an Hyphenation character.
|
-- | Either a Letter, Digital or an Hyphenation character.
|
||||||
let_dig_hyp :: Parser Char
|
let_dig_hyp :: forall e. Parser e Char
|
||||||
let_dig_hyp = let_dig <|> char '-'
|
let_dig_hyp = let_dig <|> char '-'
|
||||||
|
|
||||||
-- | From RFC 1035: <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
|
-- | From RFC 1035: <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
|
||||||
ldh_str :: Parser (Array Char)
|
ldh_str :: forall e. Parser e (Array Char)
|
||||||
ldh_str = many1 let_dig_hyp
|
ldh_str = many1 let_dig_hyp
|
||||||
|
|
||||||
-- TODO: 63
|
-- TODO: 63
|
||||||
@ -39,11 +44,11 @@ max_domain_length = 15
|
|||||||
last_char :: String -> Maybe Char
|
last_char :: String -> Maybe Char
|
||||||
last_char = A.last <<< CU.toCharArray
|
last_char = A.last <<< CU.toCharArray
|
||||||
|
|
||||||
parse_last_char :: String -> Parser Char -> Boolean
|
parse_last_char :: forall e. String -> Parser e Char -> Boolean
|
||||||
parse_last_char s p = case last_char s of
|
parse_last_char s p = case last_char s of
|
||||||
Nothing -> false
|
Nothing -> false
|
||||||
Just c -> case parse p (CU.singleton c) of
|
Just c -> case parse p (CU.singleton c) of
|
||||||
Nothing -> false
|
Left _ -> false
|
||||||
_ -> true
|
_ -> true
|
||||||
|
|
||||||
-- | FIXME: This is flawed.
|
-- | FIXME: This is flawed.
|
||||||
@ -51,14 +56,17 @@ parse_last_char s p = case last_char s of
|
|||||||
-- | the code will "continue to work" but without what's been parsed.
|
-- | the code will "continue to work" but without what's been parsed.
|
||||||
-- | This may sound reasonable but it prevents knowing if a problem actually occured!
|
-- | This may sound reasonable but it prevents knowing if a problem actually occured!
|
||||||
-- | We cannot do a `try parser <|> alternative` since it will always work!
|
-- | We cannot do a `try parser <|> alternative` since it will always work!
|
||||||
try :: forall a. Monoid a => Parser a -> Parser a
|
try :: forall e a. Parser e a -> Parser e a
|
||||||
try p = Parser p'
|
try p = Parser p'
|
||||||
where p' str = case parse p str of
|
where p' str = case parse p str of
|
||||||
Nothing -> Just (Tuple mempty str) -- FIXME! Need a better base structure.
|
Left { pos: _, input: _, error: e } -> case e of
|
||||||
Just x -> pure x
|
Nothing -> fail 0 str
|
||||||
|
error -> failError 0 str error
|
||||||
|
right -> right
|
||||||
|
-- Right { pos: pos, input: input, result: value } -> success pos input value
|
||||||
|
|
||||||
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
|
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
|
||||||
label :: Parser String
|
label :: forall e. Parser e String
|
||||||
label = do
|
label = do
|
||||||
l <- letter
|
l <- letter
|
||||||
s <- try ldh_str
|
s <- try ldh_str
|
||||||
@ -66,11 +74,11 @@ label = do
|
|||||||
if (S.length labelstr > label_maxsize)
|
if (S.length labelstr > label_maxsize)
|
||||||
then empty
|
then empty
|
||||||
else if (S.length labelstr > 1 && not (parse_last_char labelstr let_dig))
|
else if (S.length labelstr > 1 && not (parse_last_char labelstr let_dig))
|
||||||
then empty
|
then empty -- TODO: error management.
|
||||||
else pure labelstr
|
else pure labelstr
|
||||||
|
|
||||||
-- | From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
|
-- | From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
|
||||||
subdomain :: Parser String
|
subdomain :: forall e. Parser e String
|
||||||
subdomain = do
|
subdomain = do
|
||||||
-- First: read a label. This is bare minimum for a subdomain.
|
-- First: read a label. This is bare minimum for a subdomain.
|
||||||
lab <- label
|
lab <- label
|
||||||
@ -84,21 +92,21 @@ subdomain = do
|
|||||||
|
|
||||||
-- | Test for the end-of-file (no more input).
|
-- | Test for the end-of-file (no more input).
|
||||||
-- | If not EOF the parser fails (Nothing), otherwise it provides an empty string.
|
-- | If not EOF the parser fails (Nothing), otherwise it provides an empty string.
|
||||||
eof :: Parser String
|
eof :: forall e. Parser e String
|
||||||
eof = Parser \str -> case S.length str of
|
eof = Parser \str -> case S.length str of
|
||||||
0 -> Just (Tuple "" str)
|
0 -> success 0 str ""
|
||||||
_ -> Nothing -- this means an error
|
_ -> failError 0 str (Just EOFExpected)
|
||||||
|
|
||||||
-- | Test for the domain to be a list of subdomains then an end-of-file.
|
-- | Test for the domain to be a list of subdomains then an end-of-file.
|
||||||
-- | Said otherwise, the input must only contain a domain (with or without a final dot '.').
|
-- | Said otherwise, the input must only contain a domain (with or without a final dot '.').
|
||||||
sub_eof :: Parser String
|
sub_eof :: forall e. Parser e String
|
||||||
sub_eof = do
|
sub_eof = do
|
||||||
sub <- subdomain
|
sub <- subdomain
|
||||||
maybe_final_point <- char '.' <|> pure ' '
|
maybe_final_point <- try char '.'
|
||||||
_ <- eof -- In case there is still some input, it fails.
|
_ <- eof -- In case there is still some input, it fails.
|
||||||
let parsed_domain = did_we_parsed_the_final_point maybe_final_point sub
|
let parsed_domain = did_we_parsed_the_final_point maybe_final_point sub
|
||||||
if S.length parsed_domain > max_domain_length
|
if S.length parsed_domain > max_domain_length
|
||||||
then empty -- TODO: error management
|
then empty -- TODO: error management.
|
||||||
else pure parsed_domain
|
else pure parsed_domain
|
||||||
where
|
where
|
||||||
did_we_parsed_the_final_point '.' sub = sub <> "."
|
did_we_parsed_the_final_point '.' sub = sub <> "."
|
||||||
@ -111,5 +119,5 @@ sub_eof = do
|
|||||||
-- | However, this last '.' character should be acceptable in most applications.
|
-- | However, this last '.' character should be acceptable in most applications.
|
||||||
-- | In some cases, a fully qualified domain name (FQDN) such as `example.com.`
|
-- | In some cases, a fully qualified domain name (FQDN) such as `example.com.`
|
||||||
-- | has to be differenciated from a "relative" name (www).
|
-- | has to be differenciated from a "relative" name (www).
|
||||||
domain :: Parser String
|
domain :: forall e. Parser e String
|
||||||
domain = (string " " *> eof) <|> sub_eof
|
domain = (string " " *> eof) <|> sub_eof
|
||||||
|
@ -38,11 +38,12 @@ import Data.String.CodeUnits (fromCharArray)
|
|||||||
-- let toprint = if b then "FOUND IT" else "not found"
|
-- let toprint = if b then "FOUND IT" else "not found"
|
||||||
-- log $ toprint <> ", rest: " <> str
|
-- log $ toprint <> ", rest: " <> str
|
||||||
|
|
||||||
logtest :: forall a. String -> Parser a -> String -> (a -> String) -> Effect Unit
|
logtest :: forall e v. String -> Parser e v -> String -> (v -> String) -> Effect Unit
|
||||||
logtest fname p str r = do
|
logtest fname (Parse p) str r = do
|
||||||
log $ "(" <> fname <> ") parsing '" <> str <> "': "
|
log $ "(" <> fname <> ") parsing '" <> str <> "': "
|
||||||
<> case parse p str of
|
<> case p str of
|
||||||
Just (Tuple x y) -> show (r x) <> " " <> show y
|
Left { pos: pos, input: input, error: error } -> "failed: " <> error
|
||||||
|
Right { pos: pos, input: input, result: value } -> show (r x) <> " " <> show y
|
||||||
Nothing -> "failed"
|
Nothing -> "failed"
|
||||||
|
|
||||||
id :: forall a. a -> a
|
id :: forall a. a -> a
|
||||||
|
@ -15,15 +15,11 @@ import Data.String.CodeUnits (uncons, toCharArray, fromCharArray)
|
|||||||
|
|
||||||
import BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper)
|
import BaseFunctions (concat, isAlpha, isAlphaNum, isDigit, isLower, isSpace, isUpper)
|
||||||
|
|
||||||
type Input = String
|
type Input = (Tuple Position String)
|
||||||
type Size = Int
|
type Size = Int
|
||||||
type Position = Int
|
type Position = Int
|
||||||
data DomainError
|
type Error e = { pos :: Position, input :: String, error :: Maybe e }
|
||||||
= SubdomainTooLarge Position Size
|
type Value v = { pos :: Position, input :: String, result :: v }
|
||||||
| DomainTooLarge Size
|
|
||||||
| InvalidCharacter Position
|
|
||||||
type Error e = { pos :: Position, input :: Input, error :: Maybe e }
|
|
||||||
type Value v = { pos :: Position, input :: Input, result :: v }
|
|
||||||
type Result e v = Either (Error e) (Value v)
|
type Result e v = Either (Error e) (Value v)
|
||||||
|
|
||||||
newtype Parser e v = Parser (Input -> Result e v)
|
newtype Parser e v = Parser (Input -> Result e v)
|
||||||
|
Loading…
Reference in New Issue
Block a user