Provide a solution for "the last char" problem.

This commit is contained in:
Philippe Pittoli 2024-01-25 07:19:25 +01:00
parent 540b1c1962
commit b3abe0241f
4 changed files with 36 additions and 29 deletions

View File

@ -12,21 +12,28 @@ import Data.String.CodeUnits as CU
import GenericParser.DomainParser.Common (DomainError)
import GenericParser.DomainParser (sub_eof)
import GenericParser.Parser (Parser(..)
, char , digit , letter, many1
, current_input, failureError, parse)
, char , digit , letter, item
, current_input, failureError, parse, rollback, until)
data EmailError
= InvalidCharacter
| InvalidDomain (Maybe DomainError)
-- | TODO: For now, `login_part` only checks that the first character is a letter,
-- | the rest can be any letter, digit, '-' or '.', including the last character.
-- | TODO: For now, `login_part` only checks that
-- | (a) the first character is a letter,
-- | (b) the last character is either a letter or a digit.
-- | The rest can be any letter, digit, '-' or '.'.
login_part :: Parser EmailError String
login_part = do firstchar <- letter
rest <- many1 (letter <|> digit <|> char '-' <|> char '.')
pure $ CU.fromCharArray $ [firstchar] <> rest
rest <- until end (letter <|> digit <|> char '-' <|> char '.')
lastchar <- letter <|> digit
pure $ CU.fromCharArray $ [firstchar] <> rest <> [lastchar]
where
end :: forall e. Parser e Char
end = do c <- item
_ <- char '@'
pure c
-- | TODO: `email` checks but doesn't consume the domain part of the email address.
email :: Parser EmailError String
email = do login <- login_part
_ <- char '@'
@ -34,4 +41,6 @@ email = do login <- login_part
case parse sub_eof input of
Left {error, position} ->
Parser \_ -> failureError position (Just $ InvalidDomain error)
Right {result} -> pure $ login <> "@" <> result
Right {result, suffix} -> do
_ <- rollback suffix
pure $ login <> "@" <> result

View File

@ -10,9 +10,8 @@ import Data.String.CodeUnits as CU
import GenericParser.Parser (Parser(..)
, failureError
, current_position
, tryMaybe
, many1
, current_position, current_input
, many1, rollback
, sat, char, nat)
import GenericParser.BaseFunctions (repeat, isHexaDecimal)
@ -42,12 +41,10 @@ ipv6_chunk' = do chunk <- ipv6_chunk
_ <- char ':'
pure chunk
-- | `ipv6_chunk''` is `ipv6_chunk` with a following *optional* ':' character.
-- | This last character is dropped and the result of `ipv6_chunk` is propagated.
-- | `ipv6_chunk''` is `ipv6_chunk` with a prefix ':' character.
ipv6_chunk'' :: Parser IPv6Error String
ipv6_chunk'' = do chunk <- ipv6_chunk
_ <- tryMaybe $ char ':'
pure chunk
ipv6_chunk'' = do _ <- char ':'
ipv6_chunk
ipv6_full :: Parser IPv6Error String
ipv6_full = do chunks <- many1 ipv6_chunk'
@ -59,20 +56,19 @@ ipv6_full = do chunks <- many1 ipv6_chunk'
GT -> Parser \_ -> failureError pos (Just TooManyChunks)
-- | `ipv6_shortened` parses a shortened representation of an IPv6 address.
-- |
-- | TODO: `ipv6_shortened` allows an invalid following ':' character.
ipv6_shortened :: Parser IPv6Error String
ipv6_shortened =
do chunks_part1 <- many1 ipv6_chunk'
pos <- current_position
input <- current_input
_ <- char ':'
_ <- rollback input
chunks_part2 <- many1 ipv6_chunk''
let part1 = A.fold (A.intersperse ":" (chunks_part1))
part2 = A.fold (A.intersperse ":" (chunks_part2))
nb_zero_filling = 8 - (A.length chunks_part1 + A.length chunks_part2)
filling = A.fold (A.intersperse ":" $ repeat nb_zero_filling "0000")
if nb_zero_filling < 1
then Parser \_ -> failureError pos (Just IPv6UnrelevantShortRepresentation)
then Parser \_ -> failureError input.position (Just IPv6UnrelevantShortRepresentation)
else pure $ A.fold (A.intersperse ":" [part1, filling, part2])
-- | TODO: accept IPv6 addresses between brackets ([ipv6]).
@ -97,11 +93,10 @@ ipv4_byte' = do number <- ipv4_byte
_ <- char '.'
pure number
-- | `ipv4_byte''` is `ipv4_byte` with an optional leading '.'.
-- | `ipv4_byte''` is `ipv4_byte` with a prefix '.'.
ipv4_byte'' :: Parser IPv4Error Int
ipv4_byte'' = do number <- ipv4_byte
_ <- tryMaybe $ char '.'
pure number
ipv4_byte'' = do _ <- char '.'
ipv4_byte
ipv4_generic4bytes :: Parser IPv4Error String
ipv4_generic4bytes =
@ -112,20 +107,19 @@ ipv4_generic4bytes =
pure $ A.fold (A.intersperse "." $ map show [b1,b2,b3,b4])
-- | `ipv4_shortened` parses a short representation of an IPv4 address, such as '127..1'.
-- |
-- | TODO: `ipv4_shortened` allows an invalid following '.' character.
ipv4_shortened :: Parser IPv4Error String
ipv4_shortened =
do chunks_part1 <- many1 ipv4_byte'
pos <- current_position
input <- current_input
_ <- char '.'
_ <- rollback input
chunks_part2 <- many1 ipv4_byte''
let part1 = A.fold (A.intersperse "." (map show chunks_part1))
part2 = A.fold (A.intersperse "." (map show chunks_part2))
nb_zero_filling = 4 - (A.length chunks_part1 + A.length chunks_part2)
filling = A.fold (A.intersperse "." $ repeat nb_zero_filling "0")
if nb_zero_filling < 1
then Parser \_ -> failureError pos (Just IPv4UnrelevantShortRepresentation)
then Parser \_ -> failureError input.position (Just IPv4UnrelevantShortRepresentation)
else pure $ A.fold (A.intersperse "." [part1, filling, part2])
ipv4 :: Parser IPv4Error String

View File

@ -218,7 +218,8 @@ many1 p = do first <- p
rest <- A.many p
pure $ A.cons first rest
-- | TODO: `until`
-- | `until` parses the input until an ending parser succeed.
-- | Arguments are the end-parser then the parser to read the input.
until :: forall e v. Parser e v -> Parser e v -> Parser e (Array v)
until parser_end p = do
input <- current_input

View File

@ -114,6 +114,7 @@ main = do
test_series "ModernDomains.domain" ModernDomains.domain id showerror domains
log ""
test_ipv4 "10..1."
test_ipv4 "10..1"
test_ipv4 "1..2"
test_ipv4 "1.2.3.4"
@ -125,6 +126,7 @@ main = do
log ""
test_ipv6 "2001:0"
test_ipv6 "2001::0"
test_ipv6 "2001::1:"
test_ipv6 "2001:0db8:0000:0000:0000:8a2e:0370:7334:30:1035:3"
test_ipv6 "2001:0db8:0000:0000:0000:8a2e:0370:7334"
test_ipv6 "2001:0db8::8a2e:0370:7334"
@ -133,3 +135,4 @@ main = do
test_email "guy@example.com"
test_email "guy.name@example.com"
test_email "well-look-at-this-domain@.com"
test_email "guy-@example.com"