From b3abe0241fafcd5d259edf29b2b4e883bb98fdf0 Mon Sep 17 00:00:00 2001 From: Philippe Pittoli Date: Thu, 25 Jan 2024 07:19:25 +0100 Subject: [PATCH] Provide a solution for "the last char" problem. --- src/GenericParser/EmailAddress.purs | 25 ++++++++++++++------- src/GenericParser/IPAddress.purs | 34 ++++++++++++----------------- src/GenericParser/Parser.purs | 3 ++- test/Main.purs | 3 +++ 4 files changed, 36 insertions(+), 29 deletions(-) diff --git a/src/GenericParser/EmailAddress.purs b/src/GenericParser/EmailAddress.purs index 98ccdf9..a16c520 100644 --- a/src/GenericParser/EmailAddress.purs +++ b/src/GenericParser/EmailAddress.purs @@ -12,21 +12,28 @@ import Data.String.CodeUnits as CU import GenericParser.DomainParser.Common (DomainError) import GenericParser.DomainParser (sub_eof) import GenericParser.Parser (Parser(..) - , char , digit , letter, many1 - , current_input, failureError, parse) + , char , digit , letter, item + , current_input, failureError, parse, rollback, until) data EmailError = InvalidCharacter | InvalidDomain (Maybe DomainError) --- | TODO: For now, `login_part` only checks that the first character is a letter, --- | the rest can be any letter, digit, '-' or '.', including the last character. +-- | TODO: For now, `login_part` only checks that +-- | (a) the first character is a letter, +-- | (b) the last character is either a letter or a digit. +-- | The rest can be any letter, digit, '-' or '.'. login_part :: Parser EmailError String login_part = do firstchar <- letter - rest <- many1 (letter <|> digit <|> char '-' <|> char '.') - pure $ CU.fromCharArray $ [firstchar] <> rest + rest <- until end (letter <|> digit <|> char '-' <|> char '.') + lastchar <- letter <|> digit + pure $ CU.fromCharArray $ [firstchar] <> rest <> [lastchar] + where + end :: forall e. Parser e Char + end = do c <- item + _ <- char '@' + pure c --- | TODO: `email` checks but doesn't consume the domain part of the email address. email :: Parser EmailError String email = do login <- login_part _ <- char '@' @@ -34,4 +41,6 @@ email = do login <- login_part case parse sub_eof input of Left {error, position} -> Parser \_ -> failureError position (Just $ InvalidDomain error) - Right {result} -> pure $ login <> "@" <> result + Right {result, suffix} -> do + _ <- rollback suffix + pure $ login <> "@" <> result diff --git a/src/GenericParser/IPAddress.purs b/src/GenericParser/IPAddress.purs index 8b75750..e2ba103 100644 --- a/src/GenericParser/IPAddress.purs +++ b/src/GenericParser/IPAddress.purs @@ -10,9 +10,8 @@ import Data.String.CodeUnits as CU import GenericParser.Parser (Parser(..) , failureError - , current_position - , tryMaybe - , many1 + , current_position, current_input + , many1, rollback , sat, char, nat) import GenericParser.BaseFunctions (repeat, isHexaDecimal) @@ -42,12 +41,10 @@ ipv6_chunk' = do chunk <- ipv6_chunk _ <- char ':' pure chunk --- | `ipv6_chunk''` is `ipv6_chunk` with a following *optional* ':' character. --- | This last character is dropped and the result of `ipv6_chunk` is propagated. +-- | `ipv6_chunk''` is `ipv6_chunk` with a prefix ':' character. ipv6_chunk'' :: Parser IPv6Error String -ipv6_chunk'' = do chunk <- ipv6_chunk - _ <- tryMaybe $ char ':' - pure chunk +ipv6_chunk'' = do _ <- char ':' + ipv6_chunk ipv6_full :: Parser IPv6Error String ipv6_full = do chunks <- many1 ipv6_chunk' @@ -59,20 +56,19 @@ ipv6_full = do chunks <- many1 ipv6_chunk' GT -> Parser \_ -> failureError pos (Just TooManyChunks) -- | `ipv6_shortened` parses a shortened representation of an IPv6 address. --- | --- | TODO: `ipv6_shortened` allows an invalid following ':' character. ipv6_shortened :: Parser IPv6Error String ipv6_shortened = do chunks_part1 <- many1 ipv6_chunk' - pos <- current_position + input <- current_input _ <- char ':' + _ <- rollback input chunks_part2 <- many1 ipv6_chunk'' let part1 = A.fold (A.intersperse ":" (chunks_part1)) part2 = A.fold (A.intersperse ":" (chunks_part2)) nb_zero_filling = 8 - (A.length chunks_part1 + A.length chunks_part2) filling = A.fold (A.intersperse ":" $ repeat nb_zero_filling "0000") if nb_zero_filling < 1 - then Parser \_ -> failureError pos (Just IPv6UnrelevantShortRepresentation) + then Parser \_ -> failureError input.position (Just IPv6UnrelevantShortRepresentation) else pure $ A.fold (A.intersperse ":" [part1, filling, part2]) -- | TODO: accept IPv6 addresses between brackets ([ipv6]). @@ -97,11 +93,10 @@ ipv4_byte' = do number <- ipv4_byte _ <- char '.' pure number --- | `ipv4_byte''` is `ipv4_byte` with an optional leading '.'. +-- | `ipv4_byte''` is `ipv4_byte` with a prefix '.'. ipv4_byte'' :: Parser IPv4Error Int -ipv4_byte'' = do number <- ipv4_byte - _ <- tryMaybe $ char '.' - pure number +ipv4_byte'' = do _ <- char '.' + ipv4_byte ipv4_generic4bytes :: Parser IPv4Error String ipv4_generic4bytes = @@ -112,20 +107,19 @@ ipv4_generic4bytes = pure $ A.fold (A.intersperse "." $ map show [b1,b2,b3,b4]) -- | `ipv4_shortened` parses a short representation of an IPv4 address, such as '127..1'. --- | --- | TODO: `ipv4_shortened` allows an invalid following '.' character. ipv4_shortened :: Parser IPv4Error String ipv4_shortened = do chunks_part1 <- many1 ipv4_byte' - pos <- current_position + input <- current_input _ <- char '.' + _ <- rollback input chunks_part2 <- many1 ipv4_byte'' let part1 = A.fold (A.intersperse "." (map show chunks_part1)) part2 = A.fold (A.intersperse "." (map show chunks_part2)) nb_zero_filling = 4 - (A.length chunks_part1 + A.length chunks_part2) filling = A.fold (A.intersperse "." $ repeat nb_zero_filling "0") if nb_zero_filling < 1 - then Parser \_ -> failureError pos (Just IPv4UnrelevantShortRepresentation) + then Parser \_ -> failureError input.position (Just IPv4UnrelevantShortRepresentation) else pure $ A.fold (A.intersperse "." [part1, filling, part2]) ipv4 :: Parser IPv4Error String diff --git a/src/GenericParser/Parser.purs b/src/GenericParser/Parser.purs index f7a8779..37773c3 100644 --- a/src/GenericParser/Parser.purs +++ b/src/GenericParser/Parser.purs @@ -218,7 +218,8 @@ many1 p = do first <- p rest <- A.many p pure $ A.cons first rest --- | TODO: `until` +-- | `until` parses the input until an ending parser succeed. +-- | Arguments are the end-parser then the parser to read the input. until :: forall e v. Parser e v -> Parser e v -> Parser e (Array v) until parser_end p = do input <- current_input diff --git a/test/Main.purs b/test/Main.purs index 24bd43a..c242003 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -114,6 +114,7 @@ main = do test_series "ModernDomains.domain" ModernDomains.domain id showerror domains log "" + test_ipv4 "10..1." test_ipv4 "10..1" test_ipv4 "1..2" test_ipv4 "1.2.3.4" @@ -125,6 +126,7 @@ main = do log "" test_ipv6 "2001:0" test_ipv6 "2001::0" + test_ipv6 "2001::1:" test_ipv6 "2001:0db8:0000:0000:0000:8a2e:0370:7334:30:1035:3" test_ipv6 "2001:0db8:0000:0000:0000:8a2e:0370:7334" test_ipv6 "2001:0db8::8a2e:0370:7334" @@ -133,3 +135,4 @@ main = do test_email "guy@example.com" test_email "guy.name@example.com" test_email "well-look-at-this-domain@.com" + test_email "guy-@example.com"