Accepting a final "." at the end of a subdomain (both RFC1035 and Modern Parser).

This commit is contained in:
Philippe Pittoli 2024-01-20 01:33:42 +01:00
parent 634aad96b7
commit 6a78e863c1
3 changed files with 24 additions and 35 deletions

View file

@ -19,7 +19,7 @@ import GenericParser.Parser (Parser(..)
, failureError , failureError
, current_position , current_position
, char, letter, parse, string , char, letter, parse, string
, try, tryMaybe) , tryMaybe)
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ] -- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
-- | In practice, the first character can be an underscore (for example, see `_dmarc.example.com`). -- | In practice, the first character can be an underscore (for example, see `_dmarc.example.com`).
@ -53,16 +53,18 @@ label = do
_ -> true _ -> true
-- | From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label> -- | From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
-- | For implementation details, this accepts a final dot "." as a suffix.
subdomain :: Parser DomainError String subdomain :: Parser DomainError String
subdomain = do subdomain = do
-- First: read a label. This is bare minimum for a subdomain. -- First: read a label. This is bare minimum for a subdomain.
lab <- label lab <- label
upperlabels <- try do point <- tryMaybe $ char '.'
_ <- char '.' case point of
sub <- defer \_ -> subdomain
pure sub
case upperlabels of
Nothing -> pure lab Nothing -> pure lab
Just _ -> do
upperlabels <- tryMaybe $ defer \_ -> subdomain
case upperlabels of
Nothing -> pure $ lab <> "."
Just l -> pure $ lab <> "." <> l Just l -> pure $ lab <> "." <> l
-- | Test for the domain to be a list of subdomains then an end-of-file. -- | Test for the domain to be a list of subdomains then an end-of-file.

View file

@ -13,13 +13,13 @@ import Data.String as S
import Data.String.CodeUnits as CU import Data.String.CodeUnits as CU
-- Import all common functions between RFC1035 and modern domain parsing. -- Import all common functions between RFC1035 and modern domain parsing.
import GenericParser.DomainParser.Common (DomainError(..), eof, ldh_str, let_dig, let_dig_hyp, max_domain_length, max_label_length, Size) import GenericParser.DomainParser.Common (DomainError(..), eof, ldh_str, let_dig, max_domain_length, max_label_length)
import GenericParser.Parser (Parser(..) import GenericParser.Parser (Parser(..)
, success, failureError , failureError
, current_position , current_position
, alphanum, char, letter, many1, parse, string , char, letter, parse, string
, try, tryMaybe) , tryMaybe)
-- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ] -- | From RFC 1035: <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
label :: Parser DomainError String label :: Parser DomainError String
@ -51,16 +51,18 @@ label = do
_ -> true _ -> true
-- | From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label> -- | From RFC 1035: <subdomain> ::= <label> | <subdomain> "." <label>
-- | For implementation details, this accepts a final dot "." as a suffix.
subdomain :: Parser DomainError String subdomain :: Parser DomainError String
subdomain = do subdomain = do
-- First: read a label. This is bare minimum for a subdomain. -- First: read a label. This is bare minimum for a subdomain.
lab <- label lab <- label
upperlabels <- try do point <- tryMaybe $ char '.'
_ <- char '.' case point of
sub <- defer \_ -> subdomain
pure sub
case upperlabels of
Nothing -> pure lab Nothing -> pure lab
Just _ -> do
upperlabels <- tryMaybe $ defer \_ -> subdomain
case upperlabels of
Nothing -> pure $ lab <> "."
Just l -> pure $ lab <> "." <> l Just l -> pure $ lab <> "." <> l
-- | Test for the domain to be a list of subdomains then an end-of-file. -- | Test for the domain to be a list of subdomains then an end-of-file.
@ -68,16 +70,11 @@ subdomain = do
sub_eof :: Parser DomainError String sub_eof :: Parser DomainError String
sub_eof = do sub_eof = do
sub <- subdomain sub <- subdomain
maybe_final_point <- tryMaybe $ char '.'
_ <- eof -- In case there is still some input, it fails. _ <- eof -- In case there is still some input, it fails.
pos <- current_position pos <- current_position
let parsed_domain = did_we_parse_the_final_point maybe_final_point sub if S.length sub > max_domain_length
if S.length parsed_domain > max_domain_length then Parser \_ -> failureError pos (Just <<< DomainTooLarge $ S.length sub)
then Parser \_ -> failureError pos (Just <<< DomainTooLarge $ S.length parsed_domain) else pure sub
else pure parsed_domain
where
did_we_parse_the_final_point Nothing sub = sub
did_we_parse_the_final_point _ sub = sub <> "."
-- | From RFC 1035: <domain> ::= <subdomain> | " " -- | From RFC 1035: <domain> ::= <subdomain> | " "
-- | -- |

View file

@ -45,19 +45,9 @@ main = do
"a.x", "a.x",
"a2.org", "a2.org",
"a33.org", "a33.org",
"a444.org",
"a5555.org",
"a66666.org",
"a777777.org",
"a8888888.org",
"xblah.a.x", "xblah.a.x",
"xblah.a2.org", "xblah.a2.org",
"xblah.a33.org", "xblah.a33.org",
"xblah.a444.org",
"xblah.a5555.org",
"xblah.a66666.org",
"xblah.a777777.org",
"xblah.a8888888.org",
"_dmarc.example.com" "_dmarc.example.com"
] ]
test_series "ldh_str" ldh_str fromCharArray showerror domains test_series "ldh_str" ldh_str fromCharArray showerror domains