From 12487502eea6f4da28b7c2890968a9f29fafa641 Mon Sep 17 00:00:00 2001 From: Karchnu Date: Thu, 3 Dec 2020 02:43:09 +0100 Subject: [PATCH] Initial commit. --- .gitignore | 4 + assets/children.gui | 7 + assets/imbricated_rectangles.gui | 16 + assets/property_binding.gui | 6 + assets/property_binding_through_children.gui | 7 + assets/rectangle_with_dynamic_anchors.gui | 43 + assets/rectangle_with_event.gui | 7 + assets/rectangle_with_simple_event.gui | 7 + assets/simple_declarations.gui | 4 + assets/simple_rectangle.gui | 5 + assets/syntax.gui | 36 + assets/text_label.gui | 9 + src/tokenizer.zig | 1921 ++++++++++++++++++ 13 files changed, 2072 insertions(+) create mode 100644 .gitignore create mode 100644 assets/children.gui create mode 100644 assets/imbricated_rectangles.gui create mode 100644 assets/property_binding.gui create mode 100644 assets/property_binding_through_children.gui create mode 100644 assets/rectangle_with_dynamic_anchors.gui create mode 100644 assets/rectangle_with_event.gui create mode 100644 assets/rectangle_with_simple_event.gui create mode 100644 assets/simple_declarations.gui create mode 100644 assets/simple_rectangle.gui create mode 100644 assets/syntax.gui create mode 100644 assets/text_label.gui create mode 100644 src/tokenizer.zig diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..379defe --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +old/ +bin/ +lib/ +src/zig-cache diff --git a/assets/children.gui b/assets/children.gui new file mode 100644 index 0000000..2e127b3 --- /dev/null +++ b/assets/children.gui @@ -0,0 +1,7 @@ +Object { + property string thing: "i has the thing" + + Object { + property string thing: "i has the other thing" + } +} diff --git a/assets/imbricated_rectangles.gui b/assets/imbricated_rectangles.gui new file mode 100644 index 0000000..c97401e --- /dev/null +++ b/assets/imbricated_rectangles.gui @@ -0,0 +1,16 @@ +Rectangle { + color: "red" + width: 300 + height: 200 + + Rectangle { + color: "blue" + width: 100 + height: 100 + + anchors { + top: 50 + left: 50 + } + } +} diff --git a/assets/property_binding.gui b/assets/property_binding.gui new file mode 100644 index 0000000..a78d1fb --- /dev/null +++ b/assets/property_binding.gui @@ -0,0 +1,6 @@ +Object (the-object) { + property string thing: "i has the thing" + property string thing-alt: thing + property string thing-alt-2: self.thing + property string thing-alt-3: the-object.thing +} diff --git a/assets/property_binding_through_children.gui b/assets/property_binding_through_children.gui new file mode 100644 index 0000000..15a5e92 --- /dev/null +++ b/assets/property_binding_through_children.gui @@ -0,0 +1,7 @@ +Object { + property string thing: "i has the thing" + + Object { + property string thing: parent.thing + } +} diff --git a/assets/rectangle_with_dynamic_anchors.gui b/assets/rectangle_with_dynamic_anchors.gui new file mode 100644 index 0000000..c5201b0 --- /dev/null +++ b/assets/rectangle_with_dynamic_anchors.gui @@ -0,0 +1,43 @@ +Rectangle { + color: "red" + width: 300 + height: 200 + + Rectangle { + color: "black" + width: 50 + height: 50 + + anchors.centerIn: parent + } + Rectangle { + color: "blue" + width: 50 + height: 50 + + anchors { + top: parent.top + left: parent.left + } + } + Rectangle { + color: "yellow" + width: 50 + height: 50 + + anchors { + bottom: parent.bottom + right: parent.right + } + } + Rectangle { + color: "green" + width: 50 + height: 50 + + anchors { + horizontalCenter: parent.horizontalCenter + bottom: parent.bottom + } + } +} diff --git a/assets/rectangle_with_event.gui b/assets/rectangle_with_event.gui new file mode 100644 index 0000000..748b12e --- /dev/null +++ b/assets/rectangle_with_event.gui @@ -0,0 +1,7 @@ +Rectangle { + id: clickable + color: "blue" + width: 300 + height: 300 + onClick: emit ["hello, there", "i has events"] +} diff --git a/assets/rectangle_with_simple_event.gui b/assets/rectangle_with_simple_event.gui new file mode 100644 index 0000000..2538dce --- /dev/null +++ b/assets/rectangle_with_simple_event.gui @@ -0,0 +1,7 @@ +Rectangle { + id: clickable + color: "blue" + width: 300 + height: 300 + onClick: emit! +} diff --git a/assets/simple_declarations.gui b/assets/simple_declarations.gui new file mode 100644 index 0000000..948923c --- /dev/null +++ b/assets/simple_declarations.gui @@ -0,0 +1,4 @@ +Object { + property string a: "A" + property string b: "B" +} diff --git a/assets/simple_rectangle.gui b/assets/simple_rectangle.gui new file mode 100644 index 0000000..ae46de5 --- /dev/null +++ b/assets/simple_rectangle.gui @@ -0,0 +1,5 @@ +Rectangle { + color: "red" + width: 300 + height: 200 +} diff --git a/assets/syntax.gui b/assets/syntax.gui new file mode 100644 index 0000000..696b29a --- /dev/null +++ b/assets/syntax.gui @@ -0,0 +1,36 @@ +Object (id) { + property String property-string-lalala: "value" + property Number property-number+lalala??: 12.4 + property (String|Number) lalala: 42 + + # Sets a whole hash as attribute. + property hash property-hash: { + a: b + } + + ### [[[ UNCERTAIN FEATURE STARTS HERE + # Sets specific properties in a predefined hash. + property-hash-overload { + a: b + c: 12 + } + # Sets a single property in a predefined hash. + property-hash-overload-alt.a: b + ### ]]] UNCERTAIN FEATURE ENDS HERE + + Object (id-child-1) { + property string property-child: "value" + + Object (id-child-1-1) {} + } + + Object (id-child-2) { + property string property-child: "value-alt" + + Object {} + } + + "string-child, probably ignored" + + 12 # Integer child, probably ignored +} diff --git a/assets/text_label.gui b/assets/text_label.gui new file mode 100644 index 0000000..24c84d6 --- /dev/null +++ b/assets/text_label.gui @@ -0,0 +1,9 @@ +Text { + text: "Hello, there!" + font { + family: SansSerif + pixelSize: 15 + } + width: 200 + height: 200 +} diff --git a/src/tokenizer.zig b/src/tokenizer.zig new file mode 100644 index 0000000..7b3c536 --- /dev/null +++ b/src/tokenizer.zig @@ -0,0 +1,1921 @@ +const std = @import("std"); +const mem = std.mem; + +pub const Token = struct { + id: Id, + loc: Loc, + + pub const Loc = struct { + start: usize, + end: usize, + }; + + pub const keywords = std.ComptimeStringMap(Id, .{ + .{ "property", .Keyword_property }, + .{ "false", .Keyword_false }, + .{ "null", .Keyword_null }, + .{ "true", .Keyword_true }, + .{ "undefined", .Keyword_undefined }, + + .{ "text", .Keyword_text }, + .{ "pixel-size", .Keyword_pixel_size }, + .{ "family", .Keyword_family }, + .{ "height", .Keyword_height }, + + }); + + pub fn getKeyword(bytes: []const u8) ?Id { + return keywords.get(bytes); + } + + pub const Id = enum { + Invalid, + Invalid_ampersands, + Identifier, + StringLiteral, + MultilineStringLiteralLine, + CharLiteral, + Eof, + Builtin, + Bang, + Pipe, + PipePipe, + PipeEqual, + Equal, + EqualEqual, + EqualAngleBracketRight, + BangEqual, + LParen, + RParen, + Semicolon, + Percent, + PercentEqual, + LBrace, + RBrace, + LBracket, + RBracket, + Period, + PeriodAsterisk, + Ellipsis2, + Ellipsis3, + Caret, + CaretEqual, + Plus, + PlusPlus, + PlusEqual, + PlusPercent, + PlusPercentEqual, + Minus, + MinusEqual, + MinusPercent, + MinusPercentEqual, + Asterisk, + AsteriskEqual, + AsteriskAsterisk, + AsteriskPercent, + AsteriskPercentEqual, + Arrow, + Colon, + Slash, + SlashEqual, + Comma, + Ampersand, + AmpersandEqual, + QuestionMark, + AngleBracketLeft, + AngleBracketLeftEqual, + AngleBracketAngleBracketLeft, + AngleBracketAngleBracketLeftEqual, + AngleBracketRight, + AngleBracketRightEqual, + AngleBracketAngleBracketRight, + AngleBracketAngleBracketRightEqual, + Tilde, + IntegerLiteral, + FloatLiteral, + LineComment, + DocComment, + ContainerDocComment, + ShebangLine, + + Keyword_property, + Keyword_false, + Keyword_null, + Keyword_true, + Keyword_undefined, + + Keyword_text, + Keyword_pixel_size, + Keyword_family, + Keyword_height, + + pub fn symbol(id: Id) []const u8 { + return switch (id) { + .Invalid => "Invalid", + .Invalid_ampersands => "&&", + .Identifier => "Identifier", + .StringLiteral => "StringLiteral", + .MultilineStringLiteralLine => "MultilineStringLiteralLine", + .CharLiteral => "CharLiteral", + .Eof => "Eof", + .Builtin => "Builtin", + .IntegerLiteral => "IntegerLiteral", + .FloatLiteral => "FloatLiteral", + .LineComment => "LineComment", + .DocComment => "DocComment", + .ContainerDocComment => "ContainerDocComment", + .ShebangLine => "ShebangLine", + + .Bang => "!", + .Pipe => "|", + .PipePipe => "||", + .PipeEqual => "|=", + .Equal => "=", + .EqualEqual => "==", + .EqualAngleBracketRight => "=>", + .BangEqual => "!=", + .LParen => "(", + .RParen => ")", + .Semicolon => ";", + .Percent => "%", + .PercentEqual => "%=", + .LBrace => "{", + .RBrace => "}", + .LBracket => "[", + .RBracket => "]", + .Period => ".", + .PeriodAsterisk => ".*", + .Ellipsis2 => "..", + .Ellipsis3 => "...", + .Caret => "^", + .CaretEqual => "^=", + .Plus => "+", + .PlusPlus => "++", + .PlusEqual => "+=", + .PlusPercent => "+%", + .PlusPercentEqual => "+%=", + .Minus => "-", + .MinusEqual => "-=", + .MinusPercent => "-%", + .MinusPercentEqual => "-%=", + .Asterisk => "*", + .AsteriskEqual => "*=", + .AsteriskAsterisk => "**", + .AsteriskPercent => "*%", + .AsteriskPercentEqual => "*%=", + .Arrow => "->", + .Colon => ":", + .Slash => "/", + .SlashEqual => "/=", + .Comma => ",", + .Ampersand => "&", + .AmpersandEqual => "&=", + .QuestionMark => "?", + .AngleBracketLeft => "<", + .AngleBracketLeftEqual => "<=", + .AngleBracketAngleBracketLeft => "<<", + .AngleBracketAngleBracketLeftEqual => "<<=", + .AngleBracketRight => ">", + .AngleBracketRightEqual => ">=", + .AngleBracketAngleBracketRight => ">>", + .AngleBracketAngleBracketRightEqual => ">>=", + .Tilde => "~", + + .Keyword_property => "property", + .Keyword_and => "and", + .Keyword_false => "false", + .Keyword_null => "null", + .Keyword_true => "true", + .Keyword_undefined => "undefined", + + .Keyword_text => "text", + .Keyword_pixel_size => "pixel-size", + .Keyword_family => "family", + .Keyword_height => "height", + + }; + } + }; +}; + +pub const Tokenizer = struct { + buffer: []const u8, + index: usize, + pending_invalid_token: ?Token, + + /// For debugging purposes + pub fn dump(self: *Tokenizer, token: *const Token) void { + std.debug.warn("{} \"{}\"\n", .{ @tagName(token.id), self.buffer[token.start..token.end] }); + } + + pub fn init(buffer: []const u8) Tokenizer { + // Skip the UTF-8 BOM if present + const src_start = if (mem.startsWith(u8, buffer, "\xEF\xBB\xBF")) 3 else @as(usize, 0); + return Tokenizer{ + .buffer = buffer, + .index = src_start, + .pending_invalid_token = null, + }; + } + + const State = enum { + start, + identifier, + builtin, + string_literal, + string_literal_backslash, + multiline_string_literal_line, + char_literal, + char_literal_backslash, + char_literal_hex_escape, + char_literal_unicode_escape_saw_u, + char_literal_unicode_escape, + char_literal_unicode_invalid, + char_literal_unicode, + char_literal_end, + backslash, + equal, + bang, + pipe, + minus, + minus_percent, + asterisk, + asterisk_percent, + slash, + line_comment_start, + line_comment, + doc_comment_start, + doc_comment, + container_doc_comment, + zero, + int_literal_dec, + int_literal_dec_no_underscore, + int_literal_bin, + int_literal_bin_no_underscore, + int_literal_oct, + int_literal_oct_no_underscore, + int_literal_hex, + int_literal_hex_no_underscore, + num_dot_dec, + num_dot_hex, + float_fraction_dec, + float_fraction_dec_no_underscore, + float_fraction_hex, + float_fraction_hex_no_underscore, + float_exponent_unsigned, + float_exponent_num, + float_exponent_num_no_underscore, + ampersand, + caret, + percent, + plus, + plus_percent, + angle_bracket_left, + angle_bracket_angle_bracket_left, + angle_bracket_right, + angle_bracket_angle_bracket_right, + period, + period_2, + saw_at_sign, + }; + + fn isIdentifierChar(char: u8) bool { + return std.ascii.isAlNum(char) or char == '_'; + } + + pub fn next(self: *Tokenizer) Token { + if (self.pending_invalid_token) |token| { + self.pending_invalid_token = null; + return token; + } + + const start_index = self.index; + var state: State = .start; + var result = Token{ + .id = .Eof, + .loc = .{ + .start = self.index, + .end = undefined, + }, + }; + + var seen_escape_digits: usize = undefined; + var remaining_code_units: usize = undefined; + + while (self.index < self.buffer.len) : (self.index += 1) { + const c = self.buffer[self.index]; + + switch (state) { + + .start => switch (c) { + ' ', '\n', '\t', '\r' => { + result.loc.start = self.index + 1; + }, + '"' => { + state = .string_literal; + result.id = .StringLiteral; + }, + '\'' => { + state = .char_literal; + }, + 'a'...'z', 'A'...'Z', '_' => { + state = .identifier; + result.id = .Identifier; + }, + '@' => { + state = .saw_at_sign; + }, + '=' => { + state = .equal; + }, + '!' => { + state = .bang; + }, + '|' => { + state = .pipe; + }, + '(' => { + result.id = .LParen; + self.index += 1; + break; + }, + ')' => { + result.id = .RParen; + self.index += 1; + break; + }, + '[' => { + result.id = .LBracket; + self.index += 1; + break; + }, + ']' => { + result.id = .RBracket; + self.index += 1; + break; + }, + ';' => { + result.id = .Semicolon; + self.index += 1; + break; + }, + ',' => { + result.id = .Comma; + self.index += 1; + break; + }, + '?' => { + result.id = .QuestionMark; + self.index += 1; + break; + }, + ':' => { + result.id = .Colon; + self.index += 1; + break; + }, + '%' => { + state = .percent; + }, + '*' => { + state = .asterisk; + }, + '+' => { + state = .plus; + }, + '<' => { + state = .angle_bracket_left; + }, + '>' => { + state = .angle_bracket_right; + }, + '^' => { + state = .caret; + }, + '\\' => { + state = .backslash; + result.id = .MultilineStringLiteralLine; + }, + '{' => { + result.id = .LBrace; + self.index += 1; + break; + }, + '}' => { + result.id = .RBrace; + self.index += 1; + break; + }, + '~' => { + result.id = .Tilde; + self.index += 1; + break; + }, + '.' => { + state = .period; + }, + '-' => { + state = .minus; + }, + + '#' => { + state = .line_comment_start; + result.id = .LineComment; + }, + + '/' => { + state = .slash; + }, + '&' => { + state = .ampersand; + }, + '0' => { + state = .zero; + result.id = .IntegerLiteral; + }, + '1'...'9' => { + state = .int_literal_dec; + result.id = .IntegerLiteral; + }, + else => { + result.id = .Invalid; + self.index += 1; + break; + }, + }, + + .saw_at_sign => switch (c) { + '"' => { + result.id = .Identifier; + state = .string_literal; + }, + else => { + // reinterpret as a builtin + self.index -= 1; + state = .builtin; + result.id = .Builtin; + }, + }, + + .ampersand => switch (c) { + '&' => { + result.id = .Invalid_ampersands; + self.index += 1; + break; + }, + '=' => { + result.id = .AmpersandEqual; + self.index += 1; + break; + }, + else => { + result.id = .Ampersand; + break; + }, + }, + + .asterisk => switch (c) { + '=' => { + result.id = .AsteriskEqual; + self.index += 1; + break; + }, + '*' => { + result.id = .AsteriskAsterisk; + self.index += 1; + break; + }, + '%' => { + state = .asterisk_percent; + }, + else => { + result.id = .Asterisk; + break; + }, + }, + + .asterisk_percent => switch (c) { + '=' => { + result.id = .AsteriskPercentEqual; + self.index += 1; + break; + }, + else => { + result.id = .AsteriskPercent; + break; + }, + }, + + .percent => switch (c) { + '=' => { + result.id = .PercentEqual; + self.index += 1; + break; + }, + else => { + result.id = .Percent; + break; + }, + }, + + .plus => switch (c) { + '=' => { + result.id = .PlusEqual; + self.index += 1; + break; + }, + '+' => { + result.id = .PlusPlus; + self.index += 1; + break; + }, + '%' => { + state = .plus_percent; + }, + else => { + result.id = .Plus; + break; + }, + }, + + .plus_percent => switch (c) { + '=' => { + result.id = .PlusPercentEqual; + self.index += 1; + break; + }, + else => { + result.id = .PlusPercent; + break; + }, + }, + + .caret => switch (c) { + '=' => { + result.id = .CaretEqual; + self.index += 1; + break; + }, + else => { + result.id = .Caret; + break; + }, + }, + + .identifier => switch (c) { + 'a'...'z', 'A'...'Z', '_', '0'...'9' => {}, + else => { + if (Token.getKeyword(self.buffer[result.loc.start..self.index])) |id| { + result.id = id; + } + break; + }, + }, + + .builtin => switch (c) { + 'a'...'z', 'A'...'Z', '_', '0'...'9' => {}, + else => break, + }, + + .backslash => switch (c) { + '\\' => { + state = .multiline_string_literal_line; + }, + else => break, + }, + + .string_literal => switch (c) { + '\\' => { + state = .string_literal_backslash; + }, + '"' => { + self.index += 1; + break; + }, + '\n', '\r' => break, // Look for this error later. + else => self.checkLiteralCharacter(), + }, + + .string_literal_backslash => switch (c) { + '\n', '\r' => break, // Look for this error later. + else => { + state = .string_literal; + }, + }, + + .char_literal => switch (c) { + '\\' => { + state = .char_literal_backslash; + }, + '\'', 0x80...0xbf, 0xf8...0xff => { + result.id = .Invalid; + break; + }, + 0xc0...0xdf => { // 110xxxxx + remaining_code_units = 1; + state = .char_literal_unicode; + }, + 0xe0...0xef => { // 1110xxxx + remaining_code_units = 2; + state = .char_literal_unicode; + }, + 0xf0...0xf7 => { // 11110xxx + remaining_code_units = 3; + state = .char_literal_unicode; + }, + else => { + state = .char_literal_end; + }, + }, + + .char_literal_backslash => switch (c) { + '\n' => { + result.id = .Invalid; + break; + }, + 'x' => { + state = .char_literal_hex_escape; + seen_escape_digits = 0; + }, + 'u' => { + state = .char_literal_unicode_escape_saw_u; + }, + else => { + state = .char_literal_end; + }, + }, + + .char_literal_hex_escape => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => { + seen_escape_digits += 1; + if (seen_escape_digits == 2) { + state = .char_literal_end; + } + }, + else => { + result.id = .Invalid; + break; + }, + }, + + .char_literal_unicode_escape_saw_u => switch (c) { + '{' => { + state = .char_literal_unicode_escape; + seen_escape_digits = 0; + }, + else => { + result.id = .Invalid; + state = .char_literal_unicode_invalid; + }, + }, + + .char_literal_unicode_escape => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => { + seen_escape_digits += 1; + }, + '}' => { + if (seen_escape_digits == 0) { + result.id = .Invalid; + state = .char_literal_unicode_invalid; + } else { + state = .char_literal_end; + } + }, + else => { + result.id = .Invalid; + state = .char_literal_unicode_invalid; + }, + }, + + .char_literal_unicode_invalid => switch (c) { + // Keep consuming characters until an obvious stopping point. + // This consolidates e.g. `u{0ab1Q}` into a single invalid token + // instead of creating the tokens `u{0ab1`, `Q`, `}` + '0'...'9', 'a'...'z', 'A'...'Z', '}' => {}, + else => break, + }, + + .char_literal_end => switch (c) { + '\'' => { + result.id = .CharLiteral; + self.index += 1; + break; + }, + else => { + result.id = .Invalid; + break; + }, + }, + + .char_literal_unicode => switch (c) { + 0x80...0xbf => { + remaining_code_units -= 1; + if (remaining_code_units == 0) { + state = .char_literal_end; + } + }, + else => { + result.id = .Invalid; + break; + }, + }, + + .multiline_string_literal_line => switch (c) { + '\n' => { + self.index += 1; + break; + }, + '\t' => {}, + else => self.checkLiteralCharacter(), + }, + + .bang => switch (c) { + '=' => { + result.id = .BangEqual; + self.index += 1; + break; + }, + else => { + result.id = .Bang; + break; + }, + }, + + .pipe => switch (c) { + '=' => { + result.id = .PipeEqual; + self.index += 1; + break; + }, + '|' => { + result.id = .PipePipe; + self.index += 1; + break; + }, + else => { + result.id = .Pipe; + break; + }, + }, + + .equal => switch (c) { + '=' => { + result.id = .EqualEqual; + self.index += 1; + break; + }, + '>' => { + result.id = .EqualAngleBracketRight; + self.index += 1; + break; + }, + else => { + result.id = .Equal; + break; + }, + }, + + .minus => switch (c) { + '>' => { + result.id = .Arrow; + self.index += 1; + break; + }, + '=' => { + result.id = .MinusEqual; + self.index += 1; + break; + }, + '%' => { + state = .minus_percent; + }, + else => { + result.id = .Minus; + break; + }, + }, + + .minus_percent => switch (c) { + '=' => { + result.id = .MinusPercentEqual; + self.index += 1; + break; + }, + else => { + result.id = .MinusPercent; + break; + }, + }, + + .angle_bracket_left => switch (c) { + '<' => { + state = .angle_bracket_angle_bracket_left; + }, + '=' => { + result.id = .AngleBracketLeftEqual; + self.index += 1; + break; + }, + else => { + result.id = .AngleBracketLeft; + break; + }, + }, + + .angle_bracket_angle_bracket_left => switch (c) { + '=' => { + result.id = .AngleBracketAngleBracketLeftEqual; + self.index += 1; + break; + }, + else => { + result.id = .AngleBracketAngleBracketLeft; + break; + }, + }, + + .angle_bracket_right => switch (c) { + '>' => { + state = .angle_bracket_angle_bracket_right; + }, + '=' => { + result.id = .AngleBracketRightEqual; + self.index += 1; + break; + }, + else => { + result.id = .AngleBracketRight; + break; + }, + }, + + .angle_bracket_angle_bracket_right => switch (c) { + '=' => { + result.id = .AngleBracketAngleBracketRightEqual; + self.index += 1; + break; + }, + else => { + result.id = .AngleBracketAngleBracketRight; + break; + }, + }, + + .period => switch (c) { + '.' => { + state = .period_2; + }, + '*' => { + result.id = .PeriodAsterisk; + self.index += 1; + break; + }, + else => { + result.id = .Period; + break; + }, + }, + + .period_2 => switch (c) { + '.' => { + result.id = .Ellipsis3; + self.index += 1; + break; + }, + else => { + result.id = .Ellipsis2; + break; + }, + }, + + .slash => switch (c) { + '/' => { + state = .line_comment_start; + result.id = .LineComment; + }, + '=' => { + result.id = .SlashEqual; + self.index += 1; + break; + }, + else => { + result.id = .Slash; + break; + }, + }, + + .line_comment_start => switch (c) { + '/' => { + state = .doc_comment_start; + }, + '!' => { + result.id = .ContainerDocComment; + state = .container_doc_comment; + }, + '\n' => break, + '\t', '\r' => state = .line_comment, + else => { + state = .line_comment; + self.checkLiteralCharacter(); + }, + }, + + .doc_comment_start => switch (c) { + '/' => { + state = .line_comment; + }, + '\n' => { + result.id = .DocComment; + break; + }, + '\t', '\r' => { + state = .doc_comment; + result.id = .DocComment; + }, + else => { + state = .doc_comment; + result.id = .DocComment; + self.checkLiteralCharacter(); + }, + }, + + .line_comment, .doc_comment, .container_doc_comment => switch (c) { + '\n' => break, + '\t', '\r' => {}, + else => self.checkLiteralCharacter(), + }, + + .zero => switch (c) { + 'b' => { + state = .int_literal_bin_no_underscore; + }, + 'o' => { + state = .int_literal_oct_no_underscore; + }, + 'x' => { + state = .int_literal_hex_no_underscore; + }, + '0'...'9', '_', '.', 'e', 'E' => { + // reinterpret as a decimal number + self.index -= 1; + state = .int_literal_dec; + }, + else => { + if (isIdentifierChar(c)) { + result.id = .Invalid; + } + break; + }, + }, + + .int_literal_bin_no_underscore => switch (c) { + '0'...'1' => { + state = .int_literal_bin; + }, + else => { + result.id = .Invalid; + break; + }, + }, + + .int_literal_bin => switch (c) { + '_' => { + state = .int_literal_bin_no_underscore; + }, + '0'...'1' => {}, + else => { + if (isIdentifierChar(c)) { + result.id = .Invalid; + } + break; + }, + }, + + .int_literal_oct_no_underscore => switch (c) { + '0'...'7' => { + state = .int_literal_oct; + }, + else => { + result.id = .Invalid; + break; + }, + }, + + .int_literal_oct => switch (c) { + '_' => { + state = .int_literal_oct_no_underscore; + }, + '0'...'7' => {}, + else => { + if (isIdentifierChar(c)) { + result.id = .Invalid; + } + break; + }, + }, + + .int_literal_dec_no_underscore => switch (c) { + '0'...'9' => { + state = .int_literal_dec; + }, + else => { + result.id = .Invalid; + break; + }, + }, + + .int_literal_dec => switch (c) { + '_' => { + state = .int_literal_dec_no_underscore; + }, + '.' => { + state = .num_dot_dec; + result.id = .FloatLiteral; + }, + 'e', 'E' => { + state = .float_exponent_unsigned; + result.id = .FloatLiteral; + }, + '0'...'9' => {}, + else => { + if (isIdentifierChar(c)) { + result.id = .Invalid; + } + break; + }, + }, + + .int_literal_hex_no_underscore => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => { + state = .int_literal_hex; + }, + else => { + result.id = .Invalid; + break; + }, + }, + + .int_literal_hex => switch (c) { + '_' => { + state = .int_literal_hex_no_underscore; + }, + '.' => { + state = .num_dot_hex; + result.id = .FloatLiteral; + }, + 'p', 'P' => { + state = .float_exponent_unsigned; + result.id = .FloatLiteral; + }, + '0'...'9', 'a'...'f', 'A'...'F' => {}, + else => { + if (isIdentifierChar(c)) { + result.id = .Invalid; + } + break; + }, + }, + + .num_dot_dec => switch (c) { + '.' => { + self.index -= 1; + state = .start; + break; + }, + 'e', 'E' => { + state = .float_exponent_unsigned; + }, + '0'...'9' => { + result.id = .FloatLiteral; + state = .float_fraction_dec; + }, + else => { + if (isIdentifierChar(c)) { + result.id = .Invalid; + } + break; + }, + }, + + .num_dot_hex => switch (c) { + '.' => { + self.index -= 1; + state = .start; + break; + }, + 'p', 'P' => { + state = .float_exponent_unsigned; + }, + '0'...'9', 'a'...'f', 'A'...'F' => { + result.id = .FloatLiteral; + state = .float_fraction_hex; + }, + else => { + if (isIdentifierChar(c)) { + result.id = .Invalid; + } + break; + }, + }, + + .float_fraction_dec_no_underscore => switch (c) { + '0'...'9' => { + state = .float_fraction_dec; + }, + else => { + result.id = .Invalid; + break; + }, + }, + + .float_fraction_dec => switch (c) { + '_' => { + state = .float_fraction_dec_no_underscore; + }, + 'e', 'E' => { + state = .float_exponent_unsigned; + }, + '0'...'9' => {}, + else => { + if (isIdentifierChar(c)) { + result.id = .Invalid; + } + break; + }, + }, + + .float_fraction_hex_no_underscore => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => { + state = .float_fraction_hex; + }, + else => { + result.id = .Invalid; + break; + }, + }, + + .float_fraction_hex => switch (c) { + '_' => { + state = .float_fraction_hex_no_underscore; + }, + 'p', 'P' => { + state = .float_exponent_unsigned; + }, + '0'...'9', 'a'...'f', 'A'...'F' => {}, + else => { + if (isIdentifierChar(c)) { + result.id = .Invalid; + } + break; + }, + }, + + .float_exponent_unsigned => switch (c) { + '+', '-' => { + state = .float_exponent_num_no_underscore; + }, + else => { + // reinterpret as a normal exponent number + self.index -= 1; + state = .float_exponent_num_no_underscore; + }, + }, + + .float_exponent_num_no_underscore => switch (c) { + '0'...'9' => { + state = .float_exponent_num; + }, + else => { + result.id = .Invalid; + break; + }, + }, + + .float_exponent_num => switch (c) { + '_' => { + state = .float_exponent_num_no_underscore; + }, + '0'...'9' => {}, + else => { + if (isIdentifierChar(c)) { + result.id = .Invalid; + } + break; + }, + }, + + } + } else if (self.index == self.buffer.len) { + switch (state) { + + .start, + .int_literal_dec, + .int_literal_bin, + .int_literal_oct, + .int_literal_hex, + .num_dot_dec, + .num_dot_hex, + .float_fraction_dec, + .float_fraction_hex, + .float_exponent_num, + .string_literal, // find this error later + .multiline_string_literal_line, + .builtin, + => {}, + + .identifier => { + if (Token.getKeyword(self.buffer[result.loc.start..self.index])) |id| { + result.id = id; + } + }, + + .line_comment, .line_comment_start => { + result.id = .LineComment; + }, + .doc_comment, .doc_comment_start => { + result.id = .DocComment; + }, + .container_doc_comment => { + result.id = .ContainerDocComment; + }, + + + // Invalid states at the end of the buffer. + .int_literal_dec_no_underscore, + .int_literal_bin_no_underscore, + .int_literal_oct_no_underscore, + .int_literal_hex_no_underscore, + .float_fraction_dec_no_underscore, + .float_fraction_hex_no_underscore, + .float_exponent_num_no_underscore, + .float_exponent_unsigned, + .saw_at_sign, + .backslash, + .char_literal, + .char_literal_backslash, + .char_literal_hex_escape, + .char_literal_unicode_escape_saw_u, + .char_literal_unicode_escape, + .char_literal_unicode_invalid, + .char_literal_end, + .char_literal_unicode, + .string_literal_backslash, + => { + result.id = .Invalid; + }, + + .equal => { + result.id = .Equal; + }, + .bang => { + result.id = .Bang; + }, + .minus => { + result.id = .Minus; + }, + .slash => { + result.id = .Slash; + }, + .zero => { + result.id = .IntegerLiteral; + }, + .ampersand => { + result.id = .Ampersand; + }, + .period => { + result.id = .Period; + }, + .period_2 => { + result.id = .Ellipsis2; + }, + .pipe => { + result.id = .Pipe; + }, + .angle_bracket_angle_bracket_right => { + result.id = .AngleBracketAngleBracketRight; + }, + .angle_bracket_right => { + result.id = .AngleBracketRight; + }, + .angle_bracket_angle_bracket_left => { + result.id = .AngleBracketAngleBracketLeft; + }, + .angle_bracket_left => { + result.id = .AngleBracketLeft; + }, + .plus_percent => { + result.id = .PlusPercent; + }, + .plus => { + result.id = .Plus; + }, + .percent => { + result.id = .Percent; + }, + .caret => { + result.id = .Caret; + }, + .asterisk_percent => { + result.id = .AsteriskPercent; + }, + .asterisk => { + result.id = .Asterisk; + }, + .minus_percent => { + result.id = .MinusPercent; + }, + } + } + + if (result.id == .Eof) { + if (self.pending_invalid_token) |token| { + self.pending_invalid_token = null; + return token; + } + } + + result.loc.end = self.index; + return result; + } + + fn checkLiteralCharacter(self: *Tokenizer) void { + if (self.pending_invalid_token != null) return; + const invalid_length = self.getInvalidCharacterLength(); + if (invalid_length == 0) return; + self.pending_invalid_token = .{ + .id = .Invalid, + .loc = .{ + .start = self.index, + .end = self.index + invalid_length, + }, + }; + } + + fn getInvalidCharacterLength(self: *Tokenizer) u3 { + const c0 = self.buffer[self.index]; + if (c0 < 0x80) { + if (c0 < 0x20 or c0 == 0x7f) { + // ascii control codes are never allowed + // (note that \n was checked before we got here) + return 1; + } + // looks fine to me. + return 0; + } else { + // check utf8-encoded character. + const length = std.unicode.utf8ByteSequenceLength(c0) catch return 1; + if (self.index + length > self.buffer.len) { + return @intCast(u3, self.buffer.len - self.index); + } + const bytes = self.buffer[self.index .. self.index + length]; + switch (length) { + 2 => { + const value = std.unicode.utf8Decode2(bytes) catch return length; + if (value == 0x85) return length; // U+0085 (NEL) + }, + 3 => { + const value = std.unicode.utf8Decode3(bytes) catch return length; + if (value == 0x2028) return length; // U+2028 (LS) + if (value == 0x2029) return length; // U+2029 (PS) + }, + 4 => { + _ = std.unicode.utf8Decode4(bytes) catch return length; + }, + else => unreachable, + } + self.index += length - 1; + return 0; + } + } +}; + +test "tokenizer" { + testTokenize("property", &[_]Token.Id{.Keyword_property}); +} + +//test "tokenizer - unknown length pointer and then c pointer" { +// testTokenize( +// \\[*]u8 +// \\[*c]u8 +// , &[_]Token.Id{ +// .LBracket, +// .Asterisk, +// .RBracket, +// .Identifier, +// .LBracket, +// .Asterisk, +// .Identifier, +// .RBracket, +// .Identifier, +// }); +//} + +//test "tokenizer - char literal with hex escape" { +// testTokenize( +// \\'\x1b' +// , &[_]Token.Id{.CharLiteral}); +// testTokenize( +// \\'\x1' +// , &[_]Token.Id{ .Invalid, .Invalid }); +//} + +//test "tokenizer - char literal with unicode escapes" { +// // Valid unicode escapes +// testTokenize( +// \\'\u{3}' +// , &[_]Token.Id{.CharLiteral}); +// testTokenize( +// \\'\u{01}' +// , &[_]Token.Id{.CharLiteral}); +// testTokenize( +// \\'\u{2a}' +// , &[_]Token.Id{.CharLiteral}); +// testTokenize( +// \\'\u{3f9}' +// , &[_]Token.Id{.CharLiteral}); +// testTokenize( +// \\'\u{6E09aBc1523}' +// , &[_]Token.Id{.CharLiteral}); +// testTokenize( +// \\"\u{440}" +// , &[_]Token.Id{.StringLiteral}); + +// // Invalid unicode escapes +// testTokenize( +// \\'\u' +// , &[_]Token.Id{.Invalid}); +// testTokenize( +// \\'\u{{' +// , &[_]Token.Id{ .Invalid, .Invalid }); +// testTokenize( +// \\'\u{}' +// , &[_]Token.Id{ .Invalid, .Invalid }); +// testTokenize( +// \\'\u{s}' +// , &[_]Token.Id{ .Invalid, .Invalid }); +// testTokenize( +// \\'\u{2z}' +// , &[_]Token.Id{ .Invalid, .Invalid }); +// testTokenize( +// \\'\u{4a' +// , &[_]Token.Id{.Invalid}); + +// // Test old-style unicode literals +// testTokenize( +// \\'\u0333' +// , &[_]Token.Id{ .Invalid, .Invalid }); +// testTokenize( +// \\'\U0333' +// , &[_]Token.Id{ .Invalid, .IntegerLiteral, .Invalid }); +//} + +//test "tokenizer - char literal with unicode code point" { +// testTokenize( +// \\'💩' +// , &[_]Token.Id{.CharLiteral}); +//} + +test "tokenizer - float literal e exponent" { + testTokenize("a = 4.94065645841246544177e-324;\n", &[_]Token.Id{ + .Identifier, + .Equal, + .FloatLiteral, + .Semicolon, + }); +} + +test "tokenizer - float literal p exponent" { + testTokenize("a = 0x1.a827999fcef32p+1022;\n", &[_]Token.Id{ + .Identifier, + .Equal, + .FloatLiteral, + .Semicolon, + }); +} + +test "tokenizer - chars" { + testTokenize("'c'", &[_]Token.Id{.CharLiteral}); +} + +test "tokenizer - comments" { + testTokenize("#", &[_]Token.Id{.LineComment}); + testTokenize("//", &[_]Token.Id{.LineComment}); +} + +test "tokenizer - invalid token characters" { + testTokenize("`", &[_]Token.Id{.Invalid}); + testTokenize("'c", &[_]Token.Id{.Invalid}); + testTokenize("'", &[_]Token.Id{.Invalid}); + testTokenize("''", &[_]Token.Id{ .Invalid, .Invalid }); +} + +//test "tokenizer - invalid literal/comment characters" { +// testTokenize("\"\x00\"", &[_]Token.Id{ +// .StringLiteral, +// .Invalid, +// }); +// testTokenize("//\x00", &[_]Token.Id{ +// .LineComment, +// .Invalid, +// }); +// testTokenize("//\x1f", &[_]Token.Id{ +// .LineComment, +// .Invalid, +// }); +// testTokenize("//\x7f", &[_]Token.Id{ +// .LineComment, +// .Invalid, +// }); +//} + +//test "tokenizer - utf8" { +// testTokenize("//\xc2\x80", &[_]Token.Id{.LineComment}); +// testTokenize("//\xf4\x8f\xbf\xbf", &[_]Token.Id{.LineComment}); +//} + +//test "tokenizer - invalid utf8" { +// testTokenize("//\x80", &[_]Token.Id{ +// .LineComment, +// .Invalid, +// }); +// testTokenize("//\xbf", &[_]Token.Id{ +// .LineComment, +// .Invalid, +// }); +// testTokenize("//\xf8", &[_]Token.Id{ +// .LineComment, +// .Invalid, +// }); +// testTokenize("//\xff", &[_]Token.Id{ +// .LineComment, +// .Invalid, +// }); +// testTokenize("//\xc2\xc0", &[_]Token.Id{ +// .LineComment, +// .Invalid, +// }); +// testTokenize("//\xe0", &[_]Token.Id{ +// .LineComment, +// .Invalid, +// }); +// testTokenize("//\xf0", &[_]Token.Id{ +// .LineComment, +// .Invalid, +// }); +// testTokenize("//\xf0\x90\x80\xc0", &[_]Token.Id{ +// .LineComment, +// .Invalid, +// }); +//} + +//test "tokenizer - illegal unicode codepoints" { +// // unicode newline characters.U+0085, U+2028, U+2029 +// testTokenize("//\xc2\x84", &[_]Token.Id{.LineComment}); +// testTokenize("//\xc2\x85", &[_]Token.Id{ +// .LineComment, +// .Invalid, +// }); +// testTokenize("//\xc2\x86", &[_]Token.Id{.LineComment}); +// testTokenize("//\xe2\x80\xa7", &[_]Token.Id{.LineComment}); +// testTokenize("//\xe2\x80\xa8", &[_]Token.Id{ +// .LineComment, +// .Invalid, +// }); +// testTokenize("//\xe2\x80\xa9", &[_]Token.Id{ +// .LineComment, +// .Invalid, +// }); +// testTokenize("//\xe2\x80\xaa", &[_]Token.Id{.LineComment}); +//} + +//test "tokenizer - string identifier and builtin fns" { +// testTokenize( +// \\const @"if" = @import("std"); +// , &[_]Token.Id{ +// .Keyword_const, +// .Identifier, +// .Equal, +// .Builtin, +// .LParen, +// .StringLiteral, +// .RParen, +// .Semicolon, +// }); +//} + +//test "tokenizer - multiline string literal with literal tab" { +// testTokenize( +// \\\\foo bar +// , &[_]Token.Id{ +// .MultilineStringLiteralLine, +// }); +//} + +test "tokenizer - comments with literal tab" { + testTokenize( + \\//foo bar + \\//!foo bar + \\///foo bar + \\// foo + \\/// foo + \\/// /foo + , &[_]Token.Id{ + .LineComment, + .ContainerDocComment, + .DocComment, + .LineComment, + .DocComment, + .DocComment, + }); +} + +//test "tokenizer - pipe and then invalid" { +// testTokenize("||=", &[_]Token.Id{ +// .PipePipe, +// .Equal, +// }); +//} + +//test "tokenizer - line comment and doc comment" { +// testTokenize("//", &[_]Token.Id{.LineComment}); +// testTokenize("// a / b", &[_]Token.Id{.LineComment}); +// testTokenize("// /", &[_]Token.Id{.LineComment}); +// testTokenize("/// a", &[_]Token.Id{.DocComment}); +// testTokenize("///", &[_]Token.Id{.DocComment}); +// testTokenize("////", &[_]Token.Id{.LineComment}); +// testTokenize("//!", &[_]Token.Id{.ContainerDocComment}); +// testTokenize("//!!", &[_]Token.Id{.ContainerDocComment}); +//} + +//test "tokenizer - line comment followed by identifier" { +// testTokenize( +// \\ Unexpected, +// \\ // another +// \\ Another, +// , &[_]Token.Id{ +// .Identifier, +// .Comma, +// .LineComment, +// .Identifier, +// .Comma, +// }); +//} + +//test "tokenizer - UTF-8 BOM is recognized and skipped" { +// testTokenize("\xEF\xBB\xBFa;\n", &[_]Token.Id{ +// .Identifier, +// .Semicolon, +// }); +//} + +//test "correctly parse pointer assignment" { +// testTokenize("b.*=3;\n", &[_]Token.Id{ +// .Identifier, +// .PeriodAsterisk, +// .Equal, +// .IntegerLiteral, +// .Semicolon, +// }); +//} + +//test "tokenizer - number literals decimal" { +// testTokenize("0", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("1", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("2", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("3", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("4", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("5", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("6", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("7", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("8", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("9", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0a", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("9b", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("1z", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("1z_1", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("9z3", &[_]Token.Id{ .Invalid, .Identifier }); + +// testTokenize("0_0", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0001", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("01234567890", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("012_345_6789_0", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0_1_2_3_4_5_6_7_8_9_0", &[_]Token.Id{.IntegerLiteral}); + +// testTokenize("00_", &[_]Token.Id{.Invalid}); +// testTokenize("0_0_", &[_]Token.Id{.Invalid}); +// testTokenize("0__0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0_0f", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0_0_f", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0_0_f_00", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("1_,", &[_]Token.Id{ .Invalid, .Comma }); + +// testTokenize("1.", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0.0", &[_]Token.Id{.FloatLiteral}); +// testTokenize("1.0", &[_]Token.Id{.FloatLiteral}); +// testTokenize("10.0", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0e0", &[_]Token.Id{.FloatLiteral}); +// testTokenize("1e0", &[_]Token.Id{.FloatLiteral}); +// testTokenize("1e100", &[_]Token.Id{.FloatLiteral}); +// testTokenize("1.e100", &[_]Token.Id{.FloatLiteral}); +// testTokenize("1.0e100", &[_]Token.Id{.FloatLiteral}); +// testTokenize("1.0e+100", &[_]Token.Id{.FloatLiteral}); +// testTokenize("1.0e-100", &[_]Token.Id{.FloatLiteral}); +// testTokenize("1_0_0_0.0_0_0_0_0_1e1_0_0_0", &[_]Token.Id{.FloatLiteral}); +// testTokenize("1.+", &[_]Token.Id{ .FloatLiteral, .Plus }); + +// testTokenize("1e", &[_]Token.Id{.Invalid}); +// testTokenize("1.0e1f0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("1.0p100", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("1.0p-100", &[_]Token.Id{ .Invalid, .Identifier, .Minus, .IntegerLiteral }); +// testTokenize("1.0p1f0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("1.0_,", &[_]Token.Id{ .Invalid, .Comma }); +// testTokenize("1_.0", &[_]Token.Id{ .Invalid, .Period, .IntegerLiteral }); +// testTokenize("1._", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("1.a", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("1.z", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("1._0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("1._+", &[_]Token.Id{ .Invalid, .Identifier, .Plus }); +// testTokenize("1._e", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("1.0e", &[_]Token.Id{.Invalid}); +// testTokenize("1.0e,", &[_]Token.Id{ .Invalid, .Comma }); +// testTokenize("1.0e_", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("1.0e+_", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("1.0e-_", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("1.0e0_+", &[_]Token.Id{ .Invalid, .Plus }); +//} + +//test "tokenizer - number literals binary" { +// testTokenize("0b0", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0b1", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0b2", &[_]Token.Id{ .Invalid, .IntegerLiteral }); +// testTokenize("0b3", &[_]Token.Id{ .Invalid, .IntegerLiteral }); +// testTokenize("0b4", &[_]Token.Id{ .Invalid, .IntegerLiteral }); +// testTokenize("0b5", &[_]Token.Id{ .Invalid, .IntegerLiteral }); +// testTokenize("0b6", &[_]Token.Id{ .Invalid, .IntegerLiteral }); +// testTokenize("0b7", &[_]Token.Id{ .Invalid, .IntegerLiteral }); +// testTokenize("0b8", &[_]Token.Id{ .Invalid, .IntegerLiteral }); +// testTokenize("0b9", &[_]Token.Id{ .Invalid, .IntegerLiteral }); +// testTokenize("0ba", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0bb", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0bc", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0bd", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0be", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0bf", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0bz", &[_]Token.Id{ .Invalid, .Identifier }); + +// testTokenize("0b0000_0000", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0b1111_1111", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0b10_10_10_10", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0b0_1_0_1_0_1_0_1", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0b1.", &[_]Token.Id{ .IntegerLiteral, .Period }); +// testTokenize("0b1.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral }); + +// testTokenize("0B0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0b_", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0b_0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0b1_", &[_]Token.Id{.Invalid}); +// testTokenize("0b0__1", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0b0_1_", &[_]Token.Id{.Invalid}); +// testTokenize("0b1e", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0b1p", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0b1e0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0b1p0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0b1_,", &[_]Token.Id{ .Invalid, .Comma }); +//} + +//test "tokenizer - number literals octal" { +// testTokenize("0o0", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0o1", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0o2", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0o3", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0o4", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0o5", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0o6", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0o7", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0o8", &[_]Token.Id{ .Invalid, .IntegerLiteral }); +// testTokenize("0o9", &[_]Token.Id{ .Invalid, .IntegerLiteral }); +// testTokenize("0oa", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0ob", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0oc", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0od", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0oe", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0of", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0oz", &[_]Token.Id{ .Invalid, .Identifier }); + +// testTokenize("0o01234567", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0o0123_4567", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0o01_23_45_67", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0o0_1_2_3_4_5_6_7", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0o7.", &[_]Token.Id{ .IntegerLiteral, .Period }); +// testTokenize("0o7.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral }); + +// testTokenize("0O0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0o_", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0o_0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0o1_", &[_]Token.Id{.Invalid}); +// testTokenize("0o0__1", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0o0_1_", &[_]Token.Id{.Invalid}); +// testTokenize("0o1e", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0o1p", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0o1e0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0o1p0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0o_,", &[_]Token.Id{ .Invalid, .Identifier, .Comma }); +//} + +//test "tokenizer - number literals hexadeciaml" { +// testTokenize("0x0", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0x1", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0x2", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0x3", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0x4", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0x5", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0x6", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0x7", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0x8", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0x9", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0xa", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0xb", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0xc", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0xd", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0xe", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0xf", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0xA", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0xB", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0xC", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0xD", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0xE", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0xF", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0x0z", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0xz", &[_]Token.Id{ .Invalid, .Identifier }); + +// testTokenize("0x0123456789ABCDEF", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0x0123_4567_89AB_CDEF", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0x01_23_45_67_89AB_CDE_F", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0x0_1_2_3_4_5_6_7_8_9_A_B_C_D_E_F", &[_]Token.Id{.IntegerLiteral}); + +// testTokenize("0X0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0x_", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0x_1", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0x1_", &[_]Token.Id{.Invalid}); +// testTokenize("0x0__1", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0x0_1_", &[_]Token.Id{.Invalid}); +// testTokenize("0x_,", &[_]Token.Id{ .Invalid, .Identifier, .Comma }); + +// testTokenize("0x1.", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0x1.0", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0xF.", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0xF.0", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0xF.F", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0xF.Fp0", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0xF.FP0", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0x1p0", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0xfp0", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0x1.+0xF.", &[_]Token.Id{ .FloatLiteral, .Plus, .FloatLiteral }); + +// testTokenize("0x0123456.789ABCDEF", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0x0_123_456.789_ABC_DEF", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0x0_1_2_3_4_5_6.7_8_9_A_B_C_D_E_F", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0x0p0", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0x0.0p0", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0xff.ffp10", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0xff.ffP10", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0xff.p10", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0xffp10", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0xff_ff.ff_ffp1_0_0_0", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0xf_f_f_f.f_f_f_fp+1_000", &[_]Token.Id{.FloatLiteral}); +// testTokenize("0xf_f_f_f.f_f_f_fp-1_00_0", &[_]Token.Id{.FloatLiteral}); + +// testTokenize("0x1e", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0x1e0", &[_]Token.Id{.IntegerLiteral}); +// testTokenize("0x1p", &[_]Token.Id{.Invalid}); +// testTokenize("0xfp0z1", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0xff.ffpff", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0x0.p", &[_]Token.Id{.Invalid}); +// testTokenize("0x0.z", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0x0._", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0x0_.0", &[_]Token.Id{ .Invalid, .Period, .IntegerLiteral }); +// testTokenize("0x0_.0.0", &[_]Token.Id{ .Invalid, .Period, .FloatLiteral }); +// testTokenize("0x0._0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0x0.0_", &[_]Token.Id{.Invalid}); +// testTokenize("0x0_p0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0x0_.p0", &[_]Token.Id{ .Invalid, .Period, .Identifier }); +// testTokenize("0x0._p0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0x0.0_p0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0x0._0p0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0x0.0p_0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0x0.0p+_0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0x0.0p-_0", &[_]Token.Id{ .Invalid, .Identifier }); +// testTokenize("0x0.0p0_", &[_]Token.Id{ .Invalid, .Eof }); +//} + +fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void { + var tokenizer = Tokenizer.init(source); + for (expected_tokens) |expected_token_id| { + const token = tokenizer.next(); + if (token.id != expected_token_id) { + std.debug.panic("expected {}, found {}\n", .{ @tagName(expected_token_id), @tagName(token.id) }); + } + } + const last_token = tokenizer.next(); + std.testing.expect(last_token.id == .Eof); +}