guid/src/tokenizer.zig

1922 lines
65 KiB
Zig

const std = @import("std");
const mem = std.mem;
pub const Token = struct {
id: Id,
loc: Loc,
pub const Loc = struct {
start: usize,
end: usize,
};
pub const keywords = std.ComptimeStringMap(Id, .{
.{ "property", .Keyword_property },
.{ "false", .Keyword_false },
.{ "null", .Keyword_null },
.{ "true", .Keyword_true },
.{ "undefined", .Keyword_undefined },
.{ "text", .Keyword_text },
.{ "pixel-size", .Keyword_pixel_size },
.{ "family", .Keyword_family },
.{ "height", .Keyword_height },
});
pub fn getKeyword(bytes: []const u8) ?Id {
return keywords.get(bytes);
}
pub const Id = enum {
Invalid,
Invalid_ampersands,
Identifier,
StringLiteral,
MultilineStringLiteralLine,
CharLiteral,
Eof,
Builtin,
Bang,
Pipe,
PipePipe,
PipeEqual,
Equal,
EqualEqual,
EqualAngleBracketRight,
BangEqual,
LParen,
RParen,
Semicolon,
Percent,
PercentEqual,
LBrace,
RBrace,
LBracket,
RBracket,
Period,
PeriodAsterisk,
Ellipsis2,
Ellipsis3,
Caret,
CaretEqual,
Plus,
PlusPlus,
PlusEqual,
PlusPercent,
PlusPercentEqual,
Minus,
MinusEqual,
MinusPercent,
MinusPercentEqual,
Asterisk,
AsteriskEqual,
AsteriskAsterisk,
AsteriskPercent,
AsteriskPercentEqual,
Arrow,
Colon,
Slash,
SlashEqual,
Comma,
Ampersand,
AmpersandEqual,
QuestionMark,
AngleBracketLeft,
AngleBracketLeftEqual,
AngleBracketAngleBracketLeft,
AngleBracketAngleBracketLeftEqual,
AngleBracketRight,
AngleBracketRightEqual,
AngleBracketAngleBracketRight,
AngleBracketAngleBracketRightEqual,
Tilde,
IntegerLiteral,
FloatLiteral,
LineComment,
DocComment,
ContainerDocComment,
ShebangLine,
Keyword_property,
Keyword_false,
Keyword_null,
Keyword_true,
Keyword_undefined,
Keyword_text,
Keyword_pixel_size,
Keyword_family,
Keyword_height,
pub fn symbol(id: Id) []const u8 {
return switch (id) {
.Invalid => "Invalid",
.Invalid_ampersands => "&&",
.Identifier => "Identifier",
.StringLiteral => "StringLiteral",
.MultilineStringLiteralLine => "MultilineStringLiteralLine",
.CharLiteral => "CharLiteral",
.Eof => "Eof",
.Builtin => "Builtin",
.IntegerLiteral => "IntegerLiteral",
.FloatLiteral => "FloatLiteral",
.LineComment => "LineComment",
.DocComment => "DocComment",
.ContainerDocComment => "ContainerDocComment",
.ShebangLine => "ShebangLine",
.Bang => "!",
.Pipe => "|",
.PipePipe => "||",
.PipeEqual => "|=",
.Equal => "=",
.EqualEqual => "==",
.EqualAngleBracketRight => "=>",
.BangEqual => "!=",
.LParen => "(",
.RParen => ")",
.Semicolon => ";",
.Percent => "%",
.PercentEqual => "%=",
.LBrace => "{",
.RBrace => "}",
.LBracket => "[",
.RBracket => "]",
.Period => ".",
.PeriodAsterisk => ".*",
.Ellipsis2 => "..",
.Ellipsis3 => "...",
.Caret => "^",
.CaretEqual => "^=",
.Plus => "+",
.PlusPlus => "++",
.PlusEqual => "+=",
.PlusPercent => "+%",
.PlusPercentEqual => "+%=",
.Minus => "-",
.MinusEqual => "-=",
.MinusPercent => "-%",
.MinusPercentEqual => "-%=",
.Asterisk => "*",
.AsteriskEqual => "*=",
.AsteriskAsterisk => "**",
.AsteriskPercent => "*%",
.AsteriskPercentEqual => "*%=",
.Arrow => "->",
.Colon => ":",
.Slash => "/",
.SlashEqual => "/=",
.Comma => ",",
.Ampersand => "&",
.AmpersandEqual => "&=",
.QuestionMark => "?",
.AngleBracketLeft => "<",
.AngleBracketLeftEqual => "<=",
.AngleBracketAngleBracketLeft => "<<",
.AngleBracketAngleBracketLeftEqual => "<<=",
.AngleBracketRight => ">",
.AngleBracketRightEqual => ">=",
.AngleBracketAngleBracketRight => ">>",
.AngleBracketAngleBracketRightEqual => ">>=",
.Tilde => "~",
.Keyword_property => "property",
.Keyword_and => "and",
.Keyword_false => "false",
.Keyword_null => "null",
.Keyword_true => "true",
.Keyword_undefined => "undefined",
.Keyword_text => "text",
.Keyword_pixel_size => "pixel-size",
.Keyword_family => "family",
.Keyword_height => "height",
};
}
};
};
pub const Tokenizer = struct {
buffer: []const u8,
index: usize,
pending_invalid_token: ?Token,
/// For debugging purposes
pub fn dump(self: *Tokenizer, token: *const Token) void {
std.debug.warn("{} \"{}\"\n", .{ @tagName(token.id), self.buffer[token.start..token.end] });
}
pub fn init(buffer: []const u8) Tokenizer {
// Skip the UTF-8 BOM if present
const src_start = if (mem.startsWith(u8, buffer, "\xEF\xBB\xBF")) 3 else @as(usize, 0);
return Tokenizer{
.buffer = buffer,
.index = src_start,
.pending_invalid_token = null,
};
}
const State = enum {
start,
identifier,
builtin,
string_literal,
string_literal_backslash,
multiline_string_literal_line,
char_literal,
char_literal_backslash,
char_literal_hex_escape,
char_literal_unicode_escape_saw_u,
char_literal_unicode_escape,
char_literal_unicode_invalid,
char_literal_unicode,
char_literal_end,
backslash,
equal,
bang,
pipe,
minus,
minus_percent,
asterisk,
asterisk_percent,
slash,
line_comment_start,
line_comment,
doc_comment_start,
doc_comment,
container_doc_comment,
zero,
int_literal_dec,
int_literal_dec_no_underscore,
int_literal_bin,
int_literal_bin_no_underscore,
int_literal_oct,
int_literal_oct_no_underscore,
int_literal_hex,
int_literal_hex_no_underscore,
num_dot_dec,
num_dot_hex,
float_fraction_dec,
float_fraction_dec_no_underscore,
float_fraction_hex,
float_fraction_hex_no_underscore,
float_exponent_unsigned,
float_exponent_num,
float_exponent_num_no_underscore,
ampersand,
caret,
percent,
plus,
plus_percent,
angle_bracket_left,
angle_bracket_angle_bracket_left,
angle_bracket_right,
angle_bracket_angle_bracket_right,
period,
period_2,
saw_at_sign,
};
fn isIdentifierChar(char: u8) bool {
return std.ascii.isAlNum(char) or char == '_';
}
pub fn next(self: *Tokenizer) Token {
if (self.pending_invalid_token) |token| {
self.pending_invalid_token = null;
return token;
}
const start_index = self.index;
var state: State = .start;
var result = Token{
.id = .Eof,
.loc = .{
.start = self.index,
.end = undefined,
},
};
var seen_escape_digits: usize = undefined;
var remaining_code_units: usize = undefined;
while (self.index < self.buffer.len) : (self.index += 1) {
const c = self.buffer[self.index];
switch (state) {
.start => switch (c) {
' ', '\n', '\t', '\r' => {
result.loc.start = self.index + 1;
},
'"' => {
state = .string_literal;
result.id = .StringLiteral;
},
'\'' => {
state = .char_literal;
},
'a'...'z', 'A'...'Z', '_' => {
state = .identifier;
result.id = .Identifier;
},
'@' => {
state = .saw_at_sign;
},
'=' => {
state = .equal;
},
'!' => {
state = .bang;
},
'|' => {
state = .pipe;
},
'(' => {
result.id = .LParen;
self.index += 1;
break;
},
')' => {
result.id = .RParen;
self.index += 1;
break;
},
'[' => {
result.id = .LBracket;
self.index += 1;
break;
},
']' => {
result.id = .RBracket;
self.index += 1;
break;
},
';' => {
result.id = .Semicolon;
self.index += 1;
break;
},
',' => {
result.id = .Comma;
self.index += 1;
break;
},
'?' => {
result.id = .QuestionMark;
self.index += 1;
break;
},
':' => {
result.id = .Colon;
self.index += 1;
break;
},
'%' => {
state = .percent;
},
'*' => {
state = .asterisk;
},
'+' => {
state = .plus;
},
'<' => {
state = .angle_bracket_left;
},
'>' => {
state = .angle_bracket_right;
},
'^' => {
state = .caret;
},
'\\' => {
state = .backslash;
result.id = .MultilineStringLiteralLine;
},
'{' => {
result.id = .LBrace;
self.index += 1;
break;
},
'}' => {
result.id = .RBrace;
self.index += 1;
break;
},
'~' => {
result.id = .Tilde;
self.index += 1;
break;
},
'.' => {
state = .period;
},
'-' => {
state = .minus;
},
'#' => {
state = .line_comment_start;
result.id = .LineComment;
},
'/' => {
state = .slash;
},
'&' => {
state = .ampersand;
},
'0' => {
state = .zero;
result.id = .IntegerLiteral;
},
'1'...'9' => {
state = .int_literal_dec;
result.id = .IntegerLiteral;
},
else => {
result.id = .Invalid;
self.index += 1;
break;
},
},
.saw_at_sign => switch (c) {
'"' => {
result.id = .Identifier;
state = .string_literal;
},
else => {
// reinterpret as a builtin
self.index -= 1;
state = .builtin;
result.id = .Builtin;
},
},
.ampersand => switch (c) {
'&' => {
result.id = .Invalid_ampersands;
self.index += 1;
break;
},
'=' => {
result.id = .AmpersandEqual;
self.index += 1;
break;
},
else => {
result.id = .Ampersand;
break;
},
},
.asterisk => switch (c) {
'=' => {
result.id = .AsteriskEqual;
self.index += 1;
break;
},
'*' => {
result.id = .AsteriskAsterisk;
self.index += 1;
break;
},
'%' => {
state = .asterisk_percent;
},
else => {
result.id = .Asterisk;
break;
},
},
.asterisk_percent => switch (c) {
'=' => {
result.id = .AsteriskPercentEqual;
self.index += 1;
break;
},
else => {
result.id = .AsteriskPercent;
break;
},
},
.percent => switch (c) {
'=' => {
result.id = .PercentEqual;
self.index += 1;
break;
},
else => {
result.id = .Percent;
break;
},
},
.plus => switch (c) {
'=' => {
result.id = .PlusEqual;
self.index += 1;
break;
},
'+' => {
result.id = .PlusPlus;
self.index += 1;
break;
},
'%' => {
state = .plus_percent;
},
else => {
result.id = .Plus;
break;
},
},
.plus_percent => switch (c) {
'=' => {
result.id = .PlusPercentEqual;
self.index += 1;
break;
},
else => {
result.id = .PlusPercent;
break;
},
},
.caret => switch (c) {
'=' => {
result.id = .CaretEqual;
self.index += 1;
break;
},
else => {
result.id = .Caret;
break;
},
},
.identifier => switch (c) {
'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
else => {
if (Token.getKeyword(self.buffer[result.loc.start..self.index])) |id| {
result.id = id;
}
break;
},
},
.builtin => switch (c) {
'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
else => break,
},
.backslash => switch (c) {
'\\' => {
state = .multiline_string_literal_line;
},
else => break,
},
.string_literal => switch (c) {
'\\' => {
state = .string_literal_backslash;
},
'"' => {
self.index += 1;
break;
},
'\n', '\r' => break, // Look for this error later.
else => self.checkLiteralCharacter(),
},
.string_literal_backslash => switch (c) {
'\n', '\r' => break, // Look for this error later.
else => {
state = .string_literal;
},
},
.char_literal => switch (c) {
'\\' => {
state = .char_literal_backslash;
},
'\'', 0x80...0xbf, 0xf8...0xff => {
result.id = .Invalid;
break;
},
0xc0...0xdf => { // 110xxxxx
remaining_code_units = 1;
state = .char_literal_unicode;
},
0xe0...0xef => { // 1110xxxx
remaining_code_units = 2;
state = .char_literal_unicode;
},
0xf0...0xf7 => { // 11110xxx
remaining_code_units = 3;
state = .char_literal_unicode;
},
else => {
state = .char_literal_end;
},
},
.char_literal_backslash => switch (c) {
'\n' => {
result.id = .Invalid;
break;
},
'x' => {
state = .char_literal_hex_escape;
seen_escape_digits = 0;
},
'u' => {
state = .char_literal_unicode_escape_saw_u;
},
else => {
state = .char_literal_end;
},
},
.char_literal_hex_escape => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {
seen_escape_digits += 1;
if (seen_escape_digits == 2) {
state = .char_literal_end;
}
},
else => {
result.id = .Invalid;
break;
},
},
.char_literal_unicode_escape_saw_u => switch (c) {
'{' => {
state = .char_literal_unicode_escape;
seen_escape_digits = 0;
},
else => {
result.id = .Invalid;
state = .char_literal_unicode_invalid;
},
},
.char_literal_unicode_escape => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {
seen_escape_digits += 1;
},
'}' => {
if (seen_escape_digits == 0) {
result.id = .Invalid;
state = .char_literal_unicode_invalid;
} else {
state = .char_literal_end;
}
},
else => {
result.id = .Invalid;
state = .char_literal_unicode_invalid;
},
},
.char_literal_unicode_invalid => switch (c) {
// Keep consuming characters until an obvious stopping point.
// This consolidates e.g. `u{0ab1Q}` into a single invalid token
// instead of creating the tokens `u{0ab1`, `Q`, `}`
'0'...'9', 'a'...'z', 'A'...'Z', '}' => {},
else => break,
},
.char_literal_end => switch (c) {
'\'' => {
result.id = .CharLiteral;
self.index += 1;
break;
},
else => {
result.id = .Invalid;
break;
},
},
.char_literal_unicode => switch (c) {
0x80...0xbf => {
remaining_code_units -= 1;
if (remaining_code_units == 0) {
state = .char_literal_end;
}
},
else => {
result.id = .Invalid;
break;
},
},
.multiline_string_literal_line => switch (c) {
'\n' => {
self.index += 1;
break;
},
'\t' => {},
else => self.checkLiteralCharacter(),
},
.bang => switch (c) {
'=' => {
result.id = .BangEqual;
self.index += 1;
break;
},
else => {
result.id = .Bang;
break;
},
},
.pipe => switch (c) {
'=' => {
result.id = .PipeEqual;
self.index += 1;
break;
},
'|' => {
result.id = .PipePipe;
self.index += 1;
break;
},
else => {
result.id = .Pipe;
break;
},
},
.equal => switch (c) {
'=' => {
result.id = .EqualEqual;
self.index += 1;
break;
},
'>' => {
result.id = .EqualAngleBracketRight;
self.index += 1;
break;
},
else => {
result.id = .Equal;
break;
},
},
.minus => switch (c) {
'>' => {
result.id = .Arrow;
self.index += 1;
break;
},
'=' => {
result.id = .MinusEqual;
self.index += 1;
break;
},
'%' => {
state = .minus_percent;
},
else => {
result.id = .Minus;
break;
},
},
.minus_percent => switch (c) {
'=' => {
result.id = .MinusPercentEqual;
self.index += 1;
break;
},
else => {
result.id = .MinusPercent;
break;
},
},
.angle_bracket_left => switch (c) {
'<' => {
state = .angle_bracket_angle_bracket_left;
},
'=' => {
result.id = .AngleBracketLeftEqual;
self.index += 1;
break;
},
else => {
result.id = .AngleBracketLeft;
break;
},
},
.angle_bracket_angle_bracket_left => switch (c) {
'=' => {
result.id = .AngleBracketAngleBracketLeftEqual;
self.index += 1;
break;
},
else => {
result.id = .AngleBracketAngleBracketLeft;
break;
},
},
.angle_bracket_right => switch (c) {
'>' => {
state = .angle_bracket_angle_bracket_right;
},
'=' => {
result.id = .AngleBracketRightEqual;
self.index += 1;
break;
},
else => {
result.id = .AngleBracketRight;
break;
},
},
.angle_bracket_angle_bracket_right => switch (c) {
'=' => {
result.id = .AngleBracketAngleBracketRightEqual;
self.index += 1;
break;
},
else => {
result.id = .AngleBracketAngleBracketRight;
break;
},
},
.period => switch (c) {
'.' => {
state = .period_2;
},
'*' => {
result.id = .PeriodAsterisk;
self.index += 1;
break;
},
else => {
result.id = .Period;
break;
},
},
.period_2 => switch (c) {
'.' => {
result.id = .Ellipsis3;
self.index += 1;
break;
},
else => {
result.id = .Ellipsis2;
break;
},
},
.slash => switch (c) {
'/' => {
state = .line_comment_start;
result.id = .LineComment;
},
'=' => {
result.id = .SlashEqual;
self.index += 1;
break;
},
else => {
result.id = .Slash;
break;
},
},
.line_comment_start => switch (c) {
'/' => {
state = .doc_comment_start;
},
'!' => {
result.id = .ContainerDocComment;
state = .container_doc_comment;
},
'\n' => break,
'\t', '\r' => state = .line_comment,
else => {
state = .line_comment;
self.checkLiteralCharacter();
},
},
.doc_comment_start => switch (c) {
'/' => {
state = .line_comment;
},
'\n' => {
result.id = .DocComment;
break;
},
'\t', '\r' => {
state = .doc_comment;
result.id = .DocComment;
},
else => {
state = .doc_comment;
result.id = .DocComment;
self.checkLiteralCharacter();
},
},
.line_comment, .doc_comment, .container_doc_comment => switch (c) {
'\n' => break,
'\t', '\r' => {},
else => self.checkLiteralCharacter(),
},
.zero => switch (c) {
'b' => {
state = .int_literal_bin_no_underscore;
},
'o' => {
state = .int_literal_oct_no_underscore;
},
'x' => {
state = .int_literal_hex_no_underscore;
},
'0'...'9', '_', '.', 'e', 'E' => {
// reinterpret as a decimal number
self.index -= 1;
state = .int_literal_dec;
},
else => {
if (isIdentifierChar(c)) {
result.id = .Invalid;
}
break;
},
},
.int_literal_bin_no_underscore => switch (c) {
'0'...'1' => {
state = .int_literal_bin;
},
else => {
result.id = .Invalid;
break;
},
},
.int_literal_bin => switch (c) {
'_' => {
state = .int_literal_bin_no_underscore;
},
'0'...'1' => {},
else => {
if (isIdentifierChar(c)) {
result.id = .Invalid;
}
break;
},
},
.int_literal_oct_no_underscore => switch (c) {
'0'...'7' => {
state = .int_literal_oct;
},
else => {
result.id = .Invalid;
break;
},
},
.int_literal_oct => switch (c) {
'_' => {
state = .int_literal_oct_no_underscore;
},
'0'...'7' => {},
else => {
if (isIdentifierChar(c)) {
result.id = .Invalid;
}
break;
},
},
.int_literal_dec_no_underscore => switch (c) {
'0'...'9' => {
state = .int_literal_dec;
},
else => {
result.id = .Invalid;
break;
},
},
.int_literal_dec => switch (c) {
'_' => {
state = .int_literal_dec_no_underscore;
},
'.' => {
state = .num_dot_dec;
result.id = .FloatLiteral;
},
'e', 'E' => {
state = .float_exponent_unsigned;
result.id = .FloatLiteral;
},
'0'...'9' => {},
else => {
if (isIdentifierChar(c)) {
result.id = .Invalid;
}
break;
},
},
.int_literal_hex_no_underscore => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {
state = .int_literal_hex;
},
else => {
result.id = .Invalid;
break;
},
},
.int_literal_hex => switch (c) {
'_' => {
state = .int_literal_hex_no_underscore;
},
'.' => {
state = .num_dot_hex;
result.id = .FloatLiteral;
},
'p', 'P' => {
state = .float_exponent_unsigned;
result.id = .FloatLiteral;
},
'0'...'9', 'a'...'f', 'A'...'F' => {},
else => {
if (isIdentifierChar(c)) {
result.id = .Invalid;
}
break;
},
},
.num_dot_dec => switch (c) {
'.' => {
self.index -= 1;
state = .start;
break;
},
'e', 'E' => {
state = .float_exponent_unsigned;
},
'0'...'9' => {
result.id = .FloatLiteral;
state = .float_fraction_dec;
},
else => {
if (isIdentifierChar(c)) {
result.id = .Invalid;
}
break;
},
},
.num_dot_hex => switch (c) {
'.' => {
self.index -= 1;
state = .start;
break;
},
'p', 'P' => {
state = .float_exponent_unsigned;
},
'0'...'9', 'a'...'f', 'A'...'F' => {
result.id = .FloatLiteral;
state = .float_fraction_hex;
},
else => {
if (isIdentifierChar(c)) {
result.id = .Invalid;
}
break;
},
},
.float_fraction_dec_no_underscore => switch (c) {
'0'...'9' => {
state = .float_fraction_dec;
},
else => {
result.id = .Invalid;
break;
},
},
.float_fraction_dec => switch (c) {
'_' => {
state = .float_fraction_dec_no_underscore;
},
'e', 'E' => {
state = .float_exponent_unsigned;
},
'0'...'9' => {},
else => {
if (isIdentifierChar(c)) {
result.id = .Invalid;
}
break;
},
},
.float_fraction_hex_no_underscore => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {
state = .float_fraction_hex;
},
else => {
result.id = .Invalid;
break;
},
},
.float_fraction_hex => switch (c) {
'_' => {
state = .float_fraction_hex_no_underscore;
},
'p', 'P' => {
state = .float_exponent_unsigned;
},
'0'...'9', 'a'...'f', 'A'...'F' => {},
else => {
if (isIdentifierChar(c)) {
result.id = .Invalid;
}
break;
},
},
.float_exponent_unsigned => switch (c) {
'+', '-' => {
state = .float_exponent_num_no_underscore;
},
else => {
// reinterpret as a normal exponent number
self.index -= 1;
state = .float_exponent_num_no_underscore;
},
},
.float_exponent_num_no_underscore => switch (c) {
'0'...'9' => {
state = .float_exponent_num;
},
else => {
result.id = .Invalid;
break;
},
},
.float_exponent_num => switch (c) {
'_' => {
state = .float_exponent_num_no_underscore;
},
'0'...'9' => {},
else => {
if (isIdentifierChar(c)) {
result.id = .Invalid;
}
break;
},
},
}
} else if (self.index == self.buffer.len) {
switch (state) {
.start,
.int_literal_dec,
.int_literal_bin,
.int_literal_oct,
.int_literal_hex,
.num_dot_dec,
.num_dot_hex,
.float_fraction_dec,
.float_fraction_hex,
.float_exponent_num,
.string_literal, // find this error later
.multiline_string_literal_line,
.builtin,
=> {},
.identifier => {
if (Token.getKeyword(self.buffer[result.loc.start..self.index])) |id| {
result.id = id;
}
},
.line_comment, .line_comment_start => {
result.id = .LineComment;
},
.doc_comment, .doc_comment_start => {
result.id = .DocComment;
},
.container_doc_comment => {
result.id = .ContainerDocComment;
},
// Invalid states at the end of the buffer.
.int_literal_dec_no_underscore,
.int_literal_bin_no_underscore,
.int_literal_oct_no_underscore,
.int_literal_hex_no_underscore,
.float_fraction_dec_no_underscore,
.float_fraction_hex_no_underscore,
.float_exponent_num_no_underscore,
.float_exponent_unsigned,
.saw_at_sign,
.backslash,
.char_literal,
.char_literal_backslash,
.char_literal_hex_escape,
.char_literal_unicode_escape_saw_u,
.char_literal_unicode_escape,
.char_literal_unicode_invalid,
.char_literal_end,
.char_literal_unicode,
.string_literal_backslash,
=> {
result.id = .Invalid;
},
.equal => {
result.id = .Equal;
},
.bang => {
result.id = .Bang;
},
.minus => {
result.id = .Minus;
},
.slash => {
result.id = .Slash;
},
.zero => {
result.id = .IntegerLiteral;
},
.ampersand => {
result.id = .Ampersand;
},
.period => {
result.id = .Period;
},
.period_2 => {
result.id = .Ellipsis2;
},
.pipe => {
result.id = .Pipe;
},
.angle_bracket_angle_bracket_right => {
result.id = .AngleBracketAngleBracketRight;
},
.angle_bracket_right => {
result.id = .AngleBracketRight;
},
.angle_bracket_angle_bracket_left => {
result.id = .AngleBracketAngleBracketLeft;
},
.angle_bracket_left => {
result.id = .AngleBracketLeft;
},
.plus_percent => {
result.id = .PlusPercent;
},
.plus => {
result.id = .Plus;
},
.percent => {
result.id = .Percent;
},
.caret => {
result.id = .Caret;
},
.asterisk_percent => {
result.id = .AsteriskPercent;
},
.asterisk => {
result.id = .Asterisk;
},
.minus_percent => {
result.id = .MinusPercent;
},
}
}
if (result.id == .Eof) {
if (self.pending_invalid_token) |token| {
self.pending_invalid_token = null;
return token;
}
}
result.loc.end = self.index;
return result;
}
fn checkLiteralCharacter(self: *Tokenizer) void {
if (self.pending_invalid_token != null) return;
const invalid_length = self.getInvalidCharacterLength();
if (invalid_length == 0) return;
self.pending_invalid_token = .{
.id = .Invalid,
.loc = .{
.start = self.index,
.end = self.index + invalid_length,
},
};
}
fn getInvalidCharacterLength(self: *Tokenizer) u3 {
const c0 = self.buffer[self.index];
if (c0 < 0x80) {
if (c0 < 0x20 or c0 == 0x7f) {
// ascii control codes are never allowed
// (note that \n was checked before we got here)
return 1;
}
// looks fine to me.
return 0;
} else {
// check utf8-encoded character.
const length = std.unicode.utf8ByteSequenceLength(c0) catch return 1;
if (self.index + length > self.buffer.len) {
return @intCast(u3, self.buffer.len - self.index);
}
const bytes = self.buffer[self.index .. self.index + length];
switch (length) {
2 => {
const value = std.unicode.utf8Decode2(bytes) catch return length;
if (value == 0x85) return length; // U+0085 (NEL)
},
3 => {
const value = std.unicode.utf8Decode3(bytes) catch return length;
if (value == 0x2028) return length; // U+2028 (LS)
if (value == 0x2029) return length; // U+2029 (PS)
},
4 => {
_ = std.unicode.utf8Decode4(bytes) catch return length;
},
else => unreachable,
}
self.index += length - 1;
return 0;
}
}
};
test "tokenizer" {
testTokenize("property", &[_]Token.Id{.Keyword_property});
}
//test "tokenizer - unknown length pointer and then c pointer" {
// testTokenize(
// \\[*]u8
// \\[*c]u8
// , &[_]Token.Id{
// .LBracket,
// .Asterisk,
// .RBracket,
// .Identifier,
// .LBracket,
// .Asterisk,
// .Identifier,
// .RBracket,
// .Identifier,
// });
//}
//test "tokenizer - char literal with hex escape" {
// testTokenize(
// \\'\x1b'
// , &[_]Token.Id{.CharLiteral});
// testTokenize(
// \\'\x1'
// , &[_]Token.Id{ .Invalid, .Invalid });
//}
//test "tokenizer - char literal with unicode escapes" {
// // Valid unicode escapes
// testTokenize(
// \\'\u{3}'
// , &[_]Token.Id{.CharLiteral});
// testTokenize(
// \\'\u{01}'
// , &[_]Token.Id{.CharLiteral});
// testTokenize(
// \\'\u{2a}'
// , &[_]Token.Id{.CharLiteral});
// testTokenize(
// \\'\u{3f9}'
// , &[_]Token.Id{.CharLiteral});
// testTokenize(
// \\'\u{6E09aBc1523}'
// , &[_]Token.Id{.CharLiteral});
// testTokenize(
// \\"\u{440}"
// , &[_]Token.Id{.StringLiteral});
// // Invalid unicode escapes
// testTokenize(
// \\'\u'
// , &[_]Token.Id{.Invalid});
// testTokenize(
// \\'\u{{'
// , &[_]Token.Id{ .Invalid, .Invalid });
// testTokenize(
// \\'\u{}'
// , &[_]Token.Id{ .Invalid, .Invalid });
// testTokenize(
// \\'\u{s}'
// , &[_]Token.Id{ .Invalid, .Invalid });
// testTokenize(
// \\'\u{2z}'
// , &[_]Token.Id{ .Invalid, .Invalid });
// testTokenize(
// \\'\u{4a'
// , &[_]Token.Id{.Invalid});
// // Test old-style unicode literals
// testTokenize(
// \\'\u0333'
// , &[_]Token.Id{ .Invalid, .Invalid });
// testTokenize(
// \\'\U0333'
// , &[_]Token.Id{ .Invalid, .IntegerLiteral, .Invalid });
//}
//test "tokenizer - char literal with unicode code point" {
// testTokenize(
// \\'💩'
// , &[_]Token.Id{.CharLiteral});
//}
test "tokenizer - float literal e exponent" {
testTokenize("a = 4.94065645841246544177e-324;\n", &[_]Token.Id{
.Identifier,
.Equal,
.FloatLiteral,
.Semicolon,
});
}
test "tokenizer - float literal p exponent" {
testTokenize("a = 0x1.a827999fcef32p+1022;\n", &[_]Token.Id{
.Identifier,
.Equal,
.FloatLiteral,
.Semicolon,
});
}
test "tokenizer - chars" {
testTokenize("'c'", &[_]Token.Id{.CharLiteral});
}
test "tokenizer - comments" {
testTokenize("#", &[_]Token.Id{.LineComment});
testTokenize("//", &[_]Token.Id{.LineComment});
}
test "tokenizer - invalid token characters" {
testTokenize("`", &[_]Token.Id{.Invalid});
testTokenize("'c", &[_]Token.Id{.Invalid});
testTokenize("'", &[_]Token.Id{.Invalid});
testTokenize("''", &[_]Token.Id{ .Invalid, .Invalid });
}
//test "tokenizer - invalid literal/comment characters" {
// testTokenize("\"\x00\"", &[_]Token.Id{
// .StringLiteral,
// .Invalid,
// });
// testTokenize("//\x00", &[_]Token.Id{
// .LineComment,
// .Invalid,
// });
// testTokenize("//\x1f", &[_]Token.Id{
// .LineComment,
// .Invalid,
// });
// testTokenize("//\x7f", &[_]Token.Id{
// .LineComment,
// .Invalid,
// });
//}
//test "tokenizer - utf8" {
// testTokenize("//\xc2\x80", &[_]Token.Id{.LineComment});
// testTokenize("//\xf4\x8f\xbf\xbf", &[_]Token.Id{.LineComment});
//}
//test "tokenizer - invalid utf8" {
// testTokenize("//\x80", &[_]Token.Id{
// .LineComment,
// .Invalid,
// });
// testTokenize("//\xbf", &[_]Token.Id{
// .LineComment,
// .Invalid,
// });
// testTokenize("//\xf8", &[_]Token.Id{
// .LineComment,
// .Invalid,
// });
// testTokenize("//\xff", &[_]Token.Id{
// .LineComment,
// .Invalid,
// });
// testTokenize("//\xc2\xc0", &[_]Token.Id{
// .LineComment,
// .Invalid,
// });
// testTokenize("//\xe0", &[_]Token.Id{
// .LineComment,
// .Invalid,
// });
// testTokenize("//\xf0", &[_]Token.Id{
// .LineComment,
// .Invalid,
// });
// testTokenize("//\xf0\x90\x80\xc0", &[_]Token.Id{
// .LineComment,
// .Invalid,
// });
//}
//test "tokenizer - illegal unicode codepoints" {
// // unicode newline characters.U+0085, U+2028, U+2029
// testTokenize("//\xc2\x84", &[_]Token.Id{.LineComment});
// testTokenize("//\xc2\x85", &[_]Token.Id{
// .LineComment,
// .Invalid,
// });
// testTokenize("//\xc2\x86", &[_]Token.Id{.LineComment});
// testTokenize("//\xe2\x80\xa7", &[_]Token.Id{.LineComment});
// testTokenize("//\xe2\x80\xa8", &[_]Token.Id{
// .LineComment,
// .Invalid,
// });
// testTokenize("//\xe2\x80\xa9", &[_]Token.Id{
// .LineComment,
// .Invalid,
// });
// testTokenize("//\xe2\x80\xaa", &[_]Token.Id{.LineComment});
//}
//test "tokenizer - string identifier and builtin fns" {
// testTokenize(
// \\const @"if" = @import("std");
// , &[_]Token.Id{
// .Keyword_const,
// .Identifier,
// .Equal,
// .Builtin,
// .LParen,
// .StringLiteral,
// .RParen,
// .Semicolon,
// });
//}
//test "tokenizer - multiline string literal with literal tab" {
// testTokenize(
// \\\\foo bar
// , &[_]Token.Id{
// .MultilineStringLiteralLine,
// });
//}
test "tokenizer - comments with literal tab" {
testTokenize(
\\//foo bar
\\//!foo bar
\\///foo bar
\\// foo
\\/// foo
\\/// /foo
, &[_]Token.Id{
.LineComment,
.ContainerDocComment,
.DocComment,
.LineComment,
.DocComment,
.DocComment,
});
}
//test "tokenizer - pipe and then invalid" {
// testTokenize("||=", &[_]Token.Id{
// .PipePipe,
// .Equal,
// });
//}
//test "tokenizer - line comment and doc comment" {
// testTokenize("//", &[_]Token.Id{.LineComment});
// testTokenize("// a / b", &[_]Token.Id{.LineComment});
// testTokenize("// /", &[_]Token.Id{.LineComment});
// testTokenize("/// a", &[_]Token.Id{.DocComment});
// testTokenize("///", &[_]Token.Id{.DocComment});
// testTokenize("////", &[_]Token.Id{.LineComment});
// testTokenize("//!", &[_]Token.Id{.ContainerDocComment});
// testTokenize("//!!", &[_]Token.Id{.ContainerDocComment});
//}
//test "tokenizer - line comment followed by identifier" {
// testTokenize(
// \\ Unexpected,
// \\ // another
// \\ Another,
// , &[_]Token.Id{
// .Identifier,
// .Comma,
// .LineComment,
// .Identifier,
// .Comma,
// });
//}
//test "tokenizer - UTF-8 BOM is recognized and skipped" {
// testTokenize("\xEF\xBB\xBFa;\n", &[_]Token.Id{
// .Identifier,
// .Semicolon,
// });
//}
//test "correctly parse pointer assignment" {
// testTokenize("b.*=3;\n", &[_]Token.Id{
// .Identifier,
// .PeriodAsterisk,
// .Equal,
// .IntegerLiteral,
// .Semicolon,
// });
//}
//test "tokenizer - number literals decimal" {
// testTokenize("0", &[_]Token.Id{.IntegerLiteral});
// testTokenize("1", &[_]Token.Id{.IntegerLiteral});
// testTokenize("2", &[_]Token.Id{.IntegerLiteral});
// testTokenize("3", &[_]Token.Id{.IntegerLiteral});
// testTokenize("4", &[_]Token.Id{.IntegerLiteral});
// testTokenize("5", &[_]Token.Id{.IntegerLiteral});
// testTokenize("6", &[_]Token.Id{.IntegerLiteral});
// testTokenize("7", &[_]Token.Id{.IntegerLiteral});
// testTokenize("8", &[_]Token.Id{.IntegerLiteral});
// testTokenize("9", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0a", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("9b", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("1z", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("1z_1", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("9z3", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0_0", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0001", &[_]Token.Id{.IntegerLiteral});
// testTokenize("01234567890", &[_]Token.Id{.IntegerLiteral});
// testTokenize("012_345_6789_0", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0_1_2_3_4_5_6_7_8_9_0", &[_]Token.Id{.IntegerLiteral});
// testTokenize("00_", &[_]Token.Id{.Invalid});
// testTokenize("0_0_", &[_]Token.Id{.Invalid});
// testTokenize("0__0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0_0f", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0_0_f", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0_0_f_00", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("1_,", &[_]Token.Id{ .Invalid, .Comma });
// testTokenize("1.", &[_]Token.Id{.FloatLiteral});
// testTokenize("0.0", &[_]Token.Id{.FloatLiteral});
// testTokenize("1.0", &[_]Token.Id{.FloatLiteral});
// testTokenize("10.0", &[_]Token.Id{.FloatLiteral});
// testTokenize("0e0", &[_]Token.Id{.FloatLiteral});
// testTokenize("1e0", &[_]Token.Id{.FloatLiteral});
// testTokenize("1e100", &[_]Token.Id{.FloatLiteral});
// testTokenize("1.e100", &[_]Token.Id{.FloatLiteral});
// testTokenize("1.0e100", &[_]Token.Id{.FloatLiteral});
// testTokenize("1.0e+100", &[_]Token.Id{.FloatLiteral});
// testTokenize("1.0e-100", &[_]Token.Id{.FloatLiteral});
// testTokenize("1_0_0_0.0_0_0_0_0_1e1_0_0_0", &[_]Token.Id{.FloatLiteral});
// testTokenize("1.+", &[_]Token.Id{ .FloatLiteral, .Plus });
// testTokenize("1e", &[_]Token.Id{.Invalid});
// testTokenize("1.0e1f0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("1.0p100", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("1.0p-100", &[_]Token.Id{ .Invalid, .Identifier, .Minus, .IntegerLiteral });
// testTokenize("1.0p1f0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("1.0_,", &[_]Token.Id{ .Invalid, .Comma });
// testTokenize("1_.0", &[_]Token.Id{ .Invalid, .Period, .IntegerLiteral });
// testTokenize("1._", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("1.a", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("1.z", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("1._0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("1._+", &[_]Token.Id{ .Invalid, .Identifier, .Plus });
// testTokenize("1._e", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("1.0e", &[_]Token.Id{.Invalid});
// testTokenize("1.0e,", &[_]Token.Id{ .Invalid, .Comma });
// testTokenize("1.0e_", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("1.0e+_", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("1.0e-_", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("1.0e0_+", &[_]Token.Id{ .Invalid, .Plus });
//}
//test "tokenizer - number literals binary" {
// testTokenize("0b0", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0b1", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0b2", &[_]Token.Id{ .Invalid, .IntegerLiteral });
// testTokenize("0b3", &[_]Token.Id{ .Invalid, .IntegerLiteral });
// testTokenize("0b4", &[_]Token.Id{ .Invalid, .IntegerLiteral });
// testTokenize("0b5", &[_]Token.Id{ .Invalid, .IntegerLiteral });
// testTokenize("0b6", &[_]Token.Id{ .Invalid, .IntegerLiteral });
// testTokenize("0b7", &[_]Token.Id{ .Invalid, .IntegerLiteral });
// testTokenize("0b8", &[_]Token.Id{ .Invalid, .IntegerLiteral });
// testTokenize("0b9", &[_]Token.Id{ .Invalid, .IntegerLiteral });
// testTokenize("0ba", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0bb", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0bc", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0bd", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0be", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0bf", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0bz", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0b0000_0000", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0b1111_1111", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0b10_10_10_10", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0b0_1_0_1_0_1_0_1", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0b1.", &[_]Token.Id{ .IntegerLiteral, .Period });
// testTokenize("0b1.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral });
// testTokenize("0B0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0b_", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0b_0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0b1_", &[_]Token.Id{.Invalid});
// testTokenize("0b0__1", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0b0_1_", &[_]Token.Id{.Invalid});
// testTokenize("0b1e", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0b1p", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0b1e0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0b1p0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0b1_,", &[_]Token.Id{ .Invalid, .Comma });
//}
//test "tokenizer - number literals octal" {
// testTokenize("0o0", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0o1", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0o2", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0o3", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0o4", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0o5", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0o6", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0o7", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0o8", &[_]Token.Id{ .Invalid, .IntegerLiteral });
// testTokenize("0o9", &[_]Token.Id{ .Invalid, .IntegerLiteral });
// testTokenize("0oa", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0ob", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0oc", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0od", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0oe", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0of", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0oz", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0o01234567", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0o0123_4567", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0o01_23_45_67", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0o0_1_2_3_4_5_6_7", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0o7.", &[_]Token.Id{ .IntegerLiteral, .Period });
// testTokenize("0o7.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral });
// testTokenize("0O0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0o_", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0o_0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0o1_", &[_]Token.Id{.Invalid});
// testTokenize("0o0__1", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0o0_1_", &[_]Token.Id{.Invalid});
// testTokenize("0o1e", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0o1p", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0o1e0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0o1p0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0o_,", &[_]Token.Id{ .Invalid, .Identifier, .Comma });
//}
//test "tokenizer - number literals hexadeciaml" {
// testTokenize("0x0", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0x1", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0x2", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0x3", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0x4", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0x5", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0x6", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0x7", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0x8", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0x9", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0xa", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0xb", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0xc", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0xd", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0xe", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0xf", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0xA", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0xB", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0xC", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0xD", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0xE", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0xF", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0x0z", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0xz", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0x0123456789ABCDEF", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0x0123_4567_89AB_CDEF", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0x01_23_45_67_89AB_CDE_F", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0x0_1_2_3_4_5_6_7_8_9_A_B_C_D_E_F", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0X0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0x_", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0x_1", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0x1_", &[_]Token.Id{.Invalid});
// testTokenize("0x0__1", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0x0_1_", &[_]Token.Id{.Invalid});
// testTokenize("0x_,", &[_]Token.Id{ .Invalid, .Identifier, .Comma });
// testTokenize("0x1.", &[_]Token.Id{.FloatLiteral});
// testTokenize("0x1.0", &[_]Token.Id{.FloatLiteral});
// testTokenize("0xF.", &[_]Token.Id{.FloatLiteral});
// testTokenize("0xF.0", &[_]Token.Id{.FloatLiteral});
// testTokenize("0xF.F", &[_]Token.Id{.FloatLiteral});
// testTokenize("0xF.Fp0", &[_]Token.Id{.FloatLiteral});
// testTokenize("0xF.FP0", &[_]Token.Id{.FloatLiteral});
// testTokenize("0x1p0", &[_]Token.Id{.FloatLiteral});
// testTokenize("0xfp0", &[_]Token.Id{.FloatLiteral});
// testTokenize("0x1.+0xF.", &[_]Token.Id{ .FloatLiteral, .Plus, .FloatLiteral });
// testTokenize("0x0123456.789ABCDEF", &[_]Token.Id{.FloatLiteral});
// testTokenize("0x0_123_456.789_ABC_DEF", &[_]Token.Id{.FloatLiteral});
// testTokenize("0x0_1_2_3_4_5_6.7_8_9_A_B_C_D_E_F", &[_]Token.Id{.FloatLiteral});
// testTokenize("0x0p0", &[_]Token.Id{.FloatLiteral});
// testTokenize("0x0.0p0", &[_]Token.Id{.FloatLiteral});
// testTokenize("0xff.ffp10", &[_]Token.Id{.FloatLiteral});
// testTokenize("0xff.ffP10", &[_]Token.Id{.FloatLiteral});
// testTokenize("0xff.p10", &[_]Token.Id{.FloatLiteral});
// testTokenize("0xffp10", &[_]Token.Id{.FloatLiteral});
// testTokenize("0xff_ff.ff_ffp1_0_0_0", &[_]Token.Id{.FloatLiteral});
// testTokenize("0xf_f_f_f.f_f_f_fp+1_000", &[_]Token.Id{.FloatLiteral});
// testTokenize("0xf_f_f_f.f_f_f_fp-1_00_0", &[_]Token.Id{.FloatLiteral});
// testTokenize("0x1e", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0x1e0", &[_]Token.Id{.IntegerLiteral});
// testTokenize("0x1p", &[_]Token.Id{.Invalid});
// testTokenize("0xfp0z1", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0xff.ffpff", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0x0.p", &[_]Token.Id{.Invalid});
// testTokenize("0x0.z", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0x0._", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0x0_.0", &[_]Token.Id{ .Invalid, .Period, .IntegerLiteral });
// testTokenize("0x0_.0.0", &[_]Token.Id{ .Invalid, .Period, .FloatLiteral });
// testTokenize("0x0._0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0x0.0_", &[_]Token.Id{.Invalid});
// testTokenize("0x0_p0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0x0_.p0", &[_]Token.Id{ .Invalid, .Period, .Identifier });
// testTokenize("0x0._p0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0x0.0_p0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0x0._0p0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0x0.0p_0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0x0.0p+_0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0x0.0p-_0", &[_]Token.Id{ .Invalid, .Identifier });
// testTokenize("0x0.0p0_", &[_]Token.Id{ .Invalid, .Eof });
//}
fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void {
var tokenizer = Tokenizer.init(source);
for (expected_tokens) |expected_token_id| {
const token = tokenizer.next();
if (token.id != expected_token_id) {
std.debug.panic("expected {}, found {}\n", .{ @tagName(expected_token_id), @tagName(token.id) });
}
}
const last_token = tokenizer.next();
std.testing.expect(last_token.id == .Eof);
}