1922 lines
65 KiB
Zig
1922 lines
65 KiB
Zig
const std = @import("std");
|
|
const mem = std.mem;
|
|
|
|
pub const Token = struct {
|
|
id: Id,
|
|
loc: Loc,
|
|
|
|
pub const Loc = struct {
|
|
start: usize,
|
|
end: usize,
|
|
};
|
|
|
|
pub const keywords = std.ComptimeStringMap(Id, .{
|
|
.{ "property", .Keyword_property },
|
|
.{ "false", .Keyword_false },
|
|
.{ "null", .Keyword_null },
|
|
.{ "true", .Keyword_true },
|
|
.{ "undefined", .Keyword_undefined },
|
|
|
|
.{ "text", .Keyword_text },
|
|
.{ "pixel-size", .Keyword_pixel_size },
|
|
.{ "family", .Keyword_family },
|
|
.{ "height", .Keyword_height },
|
|
|
|
});
|
|
|
|
pub fn getKeyword(bytes: []const u8) ?Id {
|
|
return keywords.get(bytes);
|
|
}
|
|
|
|
pub const Id = enum {
|
|
Invalid,
|
|
Invalid_ampersands,
|
|
Identifier,
|
|
StringLiteral,
|
|
MultilineStringLiteralLine,
|
|
CharLiteral,
|
|
Eof,
|
|
Builtin,
|
|
Bang,
|
|
Pipe,
|
|
PipePipe,
|
|
PipeEqual,
|
|
Equal,
|
|
EqualEqual,
|
|
EqualAngleBracketRight,
|
|
BangEqual,
|
|
LParen,
|
|
RParen,
|
|
Semicolon,
|
|
Percent,
|
|
PercentEqual,
|
|
LBrace,
|
|
RBrace,
|
|
LBracket,
|
|
RBracket,
|
|
Period,
|
|
PeriodAsterisk,
|
|
Ellipsis2,
|
|
Ellipsis3,
|
|
Caret,
|
|
CaretEqual,
|
|
Plus,
|
|
PlusPlus,
|
|
PlusEqual,
|
|
PlusPercent,
|
|
PlusPercentEqual,
|
|
Minus,
|
|
MinusEqual,
|
|
MinusPercent,
|
|
MinusPercentEqual,
|
|
Asterisk,
|
|
AsteriskEqual,
|
|
AsteriskAsterisk,
|
|
AsteriskPercent,
|
|
AsteriskPercentEqual,
|
|
Arrow,
|
|
Colon,
|
|
Slash,
|
|
SlashEqual,
|
|
Comma,
|
|
Ampersand,
|
|
AmpersandEqual,
|
|
QuestionMark,
|
|
AngleBracketLeft,
|
|
AngleBracketLeftEqual,
|
|
AngleBracketAngleBracketLeft,
|
|
AngleBracketAngleBracketLeftEqual,
|
|
AngleBracketRight,
|
|
AngleBracketRightEqual,
|
|
AngleBracketAngleBracketRight,
|
|
AngleBracketAngleBracketRightEqual,
|
|
Tilde,
|
|
IntegerLiteral,
|
|
FloatLiteral,
|
|
LineComment,
|
|
DocComment,
|
|
ContainerDocComment,
|
|
ShebangLine,
|
|
|
|
Keyword_property,
|
|
Keyword_false,
|
|
Keyword_null,
|
|
Keyword_true,
|
|
Keyword_undefined,
|
|
|
|
Keyword_text,
|
|
Keyword_pixel_size,
|
|
Keyword_family,
|
|
Keyword_height,
|
|
|
|
pub fn symbol(id: Id) []const u8 {
|
|
return switch (id) {
|
|
.Invalid => "Invalid",
|
|
.Invalid_ampersands => "&&",
|
|
.Identifier => "Identifier",
|
|
.StringLiteral => "StringLiteral",
|
|
.MultilineStringLiteralLine => "MultilineStringLiteralLine",
|
|
.CharLiteral => "CharLiteral",
|
|
.Eof => "Eof",
|
|
.Builtin => "Builtin",
|
|
.IntegerLiteral => "IntegerLiteral",
|
|
.FloatLiteral => "FloatLiteral",
|
|
.LineComment => "LineComment",
|
|
.DocComment => "DocComment",
|
|
.ContainerDocComment => "ContainerDocComment",
|
|
.ShebangLine => "ShebangLine",
|
|
|
|
.Bang => "!",
|
|
.Pipe => "|",
|
|
.PipePipe => "||",
|
|
.PipeEqual => "|=",
|
|
.Equal => "=",
|
|
.EqualEqual => "==",
|
|
.EqualAngleBracketRight => "=>",
|
|
.BangEqual => "!=",
|
|
.LParen => "(",
|
|
.RParen => ")",
|
|
.Semicolon => ";",
|
|
.Percent => "%",
|
|
.PercentEqual => "%=",
|
|
.LBrace => "{",
|
|
.RBrace => "}",
|
|
.LBracket => "[",
|
|
.RBracket => "]",
|
|
.Period => ".",
|
|
.PeriodAsterisk => ".*",
|
|
.Ellipsis2 => "..",
|
|
.Ellipsis3 => "...",
|
|
.Caret => "^",
|
|
.CaretEqual => "^=",
|
|
.Plus => "+",
|
|
.PlusPlus => "++",
|
|
.PlusEqual => "+=",
|
|
.PlusPercent => "+%",
|
|
.PlusPercentEqual => "+%=",
|
|
.Minus => "-",
|
|
.MinusEqual => "-=",
|
|
.MinusPercent => "-%",
|
|
.MinusPercentEqual => "-%=",
|
|
.Asterisk => "*",
|
|
.AsteriskEqual => "*=",
|
|
.AsteriskAsterisk => "**",
|
|
.AsteriskPercent => "*%",
|
|
.AsteriskPercentEqual => "*%=",
|
|
.Arrow => "->",
|
|
.Colon => ":",
|
|
.Slash => "/",
|
|
.SlashEqual => "/=",
|
|
.Comma => ",",
|
|
.Ampersand => "&",
|
|
.AmpersandEqual => "&=",
|
|
.QuestionMark => "?",
|
|
.AngleBracketLeft => "<",
|
|
.AngleBracketLeftEqual => "<=",
|
|
.AngleBracketAngleBracketLeft => "<<",
|
|
.AngleBracketAngleBracketLeftEqual => "<<=",
|
|
.AngleBracketRight => ">",
|
|
.AngleBracketRightEqual => ">=",
|
|
.AngleBracketAngleBracketRight => ">>",
|
|
.AngleBracketAngleBracketRightEqual => ">>=",
|
|
.Tilde => "~",
|
|
|
|
.Keyword_property => "property",
|
|
.Keyword_and => "and",
|
|
.Keyword_false => "false",
|
|
.Keyword_null => "null",
|
|
.Keyword_true => "true",
|
|
.Keyword_undefined => "undefined",
|
|
|
|
.Keyword_text => "text",
|
|
.Keyword_pixel_size => "pixel-size",
|
|
.Keyword_family => "family",
|
|
.Keyword_height => "height",
|
|
|
|
};
|
|
}
|
|
};
|
|
};
|
|
|
|
pub const Tokenizer = struct {
|
|
buffer: []const u8,
|
|
index: usize,
|
|
pending_invalid_token: ?Token,
|
|
|
|
/// For debugging purposes
|
|
pub fn dump(self: *Tokenizer, token: *const Token) void {
|
|
std.debug.warn("{} \"{}\"\n", .{ @tagName(token.id), self.buffer[token.start..token.end] });
|
|
}
|
|
|
|
pub fn init(buffer: []const u8) Tokenizer {
|
|
// Skip the UTF-8 BOM if present
|
|
const src_start = if (mem.startsWith(u8, buffer, "\xEF\xBB\xBF")) 3 else @as(usize, 0);
|
|
return Tokenizer{
|
|
.buffer = buffer,
|
|
.index = src_start,
|
|
.pending_invalid_token = null,
|
|
};
|
|
}
|
|
|
|
const State = enum {
|
|
start,
|
|
identifier,
|
|
builtin,
|
|
string_literal,
|
|
string_literal_backslash,
|
|
multiline_string_literal_line,
|
|
char_literal,
|
|
char_literal_backslash,
|
|
char_literal_hex_escape,
|
|
char_literal_unicode_escape_saw_u,
|
|
char_literal_unicode_escape,
|
|
char_literal_unicode_invalid,
|
|
char_literal_unicode,
|
|
char_literal_end,
|
|
backslash,
|
|
equal,
|
|
bang,
|
|
pipe,
|
|
minus,
|
|
minus_percent,
|
|
asterisk,
|
|
asterisk_percent,
|
|
slash,
|
|
line_comment_start,
|
|
line_comment,
|
|
doc_comment_start,
|
|
doc_comment,
|
|
container_doc_comment,
|
|
zero,
|
|
int_literal_dec,
|
|
int_literal_dec_no_underscore,
|
|
int_literal_bin,
|
|
int_literal_bin_no_underscore,
|
|
int_literal_oct,
|
|
int_literal_oct_no_underscore,
|
|
int_literal_hex,
|
|
int_literal_hex_no_underscore,
|
|
num_dot_dec,
|
|
num_dot_hex,
|
|
float_fraction_dec,
|
|
float_fraction_dec_no_underscore,
|
|
float_fraction_hex,
|
|
float_fraction_hex_no_underscore,
|
|
float_exponent_unsigned,
|
|
float_exponent_num,
|
|
float_exponent_num_no_underscore,
|
|
ampersand,
|
|
caret,
|
|
percent,
|
|
plus,
|
|
plus_percent,
|
|
angle_bracket_left,
|
|
angle_bracket_angle_bracket_left,
|
|
angle_bracket_right,
|
|
angle_bracket_angle_bracket_right,
|
|
period,
|
|
period_2,
|
|
saw_at_sign,
|
|
};
|
|
|
|
fn isIdentifierChar(char: u8) bool {
|
|
return std.ascii.isAlNum(char) or char == '_';
|
|
}
|
|
|
|
pub fn next(self: *Tokenizer) Token {
|
|
if (self.pending_invalid_token) |token| {
|
|
self.pending_invalid_token = null;
|
|
return token;
|
|
}
|
|
|
|
const start_index = self.index;
|
|
var state: State = .start;
|
|
var result = Token{
|
|
.id = .Eof,
|
|
.loc = .{
|
|
.start = self.index,
|
|
.end = undefined,
|
|
},
|
|
};
|
|
|
|
var seen_escape_digits: usize = undefined;
|
|
var remaining_code_units: usize = undefined;
|
|
|
|
while (self.index < self.buffer.len) : (self.index += 1) {
|
|
const c = self.buffer[self.index];
|
|
|
|
switch (state) {
|
|
|
|
.start => switch (c) {
|
|
' ', '\n', '\t', '\r' => {
|
|
result.loc.start = self.index + 1;
|
|
},
|
|
'"' => {
|
|
state = .string_literal;
|
|
result.id = .StringLiteral;
|
|
},
|
|
'\'' => {
|
|
state = .char_literal;
|
|
},
|
|
'a'...'z', 'A'...'Z', '_' => {
|
|
state = .identifier;
|
|
result.id = .Identifier;
|
|
},
|
|
'@' => {
|
|
state = .saw_at_sign;
|
|
},
|
|
'=' => {
|
|
state = .equal;
|
|
},
|
|
'!' => {
|
|
state = .bang;
|
|
},
|
|
'|' => {
|
|
state = .pipe;
|
|
},
|
|
'(' => {
|
|
result.id = .LParen;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
')' => {
|
|
result.id = .RParen;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'[' => {
|
|
result.id = .LBracket;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
']' => {
|
|
result.id = .RBracket;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
';' => {
|
|
result.id = .Semicolon;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
',' => {
|
|
result.id = .Comma;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'?' => {
|
|
result.id = .QuestionMark;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
':' => {
|
|
result.id = .Colon;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'%' => {
|
|
state = .percent;
|
|
},
|
|
'*' => {
|
|
state = .asterisk;
|
|
},
|
|
'+' => {
|
|
state = .plus;
|
|
},
|
|
'<' => {
|
|
state = .angle_bracket_left;
|
|
},
|
|
'>' => {
|
|
state = .angle_bracket_right;
|
|
},
|
|
'^' => {
|
|
state = .caret;
|
|
},
|
|
'\\' => {
|
|
state = .backslash;
|
|
result.id = .MultilineStringLiteralLine;
|
|
},
|
|
'{' => {
|
|
result.id = .LBrace;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'}' => {
|
|
result.id = .RBrace;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'~' => {
|
|
result.id = .Tilde;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'.' => {
|
|
state = .period;
|
|
},
|
|
'-' => {
|
|
state = .minus;
|
|
},
|
|
|
|
'#' => {
|
|
state = .line_comment_start;
|
|
result.id = .LineComment;
|
|
},
|
|
|
|
'/' => {
|
|
state = .slash;
|
|
},
|
|
'&' => {
|
|
state = .ampersand;
|
|
},
|
|
'0' => {
|
|
state = .zero;
|
|
result.id = .IntegerLiteral;
|
|
},
|
|
'1'...'9' => {
|
|
state = .int_literal_dec;
|
|
result.id = .IntegerLiteral;
|
|
},
|
|
else => {
|
|
result.id = .Invalid;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.saw_at_sign => switch (c) {
|
|
'"' => {
|
|
result.id = .Identifier;
|
|
state = .string_literal;
|
|
},
|
|
else => {
|
|
// reinterpret as a builtin
|
|
self.index -= 1;
|
|
state = .builtin;
|
|
result.id = .Builtin;
|
|
},
|
|
},
|
|
|
|
.ampersand => switch (c) {
|
|
'&' => {
|
|
result.id = .Invalid_ampersands;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'=' => {
|
|
result.id = .AmpersandEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Ampersand;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.asterisk => switch (c) {
|
|
'=' => {
|
|
result.id = .AsteriskEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'*' => {
|
|
result.id = .AsteriskAsterisk;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'%' => {
|
|
state = .asterisk_percent;
|
|
},
|
|
else => {
|
|
result.id = .Asterisk;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.asterisk_percent => switch (c) {
|
|
'=' => {
|
|
result.id = .AsteriskPercentEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .AsteriskPercent;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.percent => switch (c) {
|
|
'=' => {
|
|
result.id = .PercentEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Percent;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.plus => switch (c) {
|
|
'=' => {
|
|
result.id = .PlusEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'+' => {
|
|
result.id = .PlusPlus;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'%' => {
|
|
state = .plus_percent;
|
|
},
|
|
else => {
|
|
result.id = .Plus;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.plus_percent => switch (c) {
|
|
'=' => {
|
|
result.id = .PlusPercentEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .PlusPercent;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.caret => switch (c) {
|
|
'=' => {
|
|
result.id = .CaretEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Caret;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.identifier => switch (c) {
|
|
'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
|
|
else => {
|
|
if (Token.getKeyword(self.buffer[result.loc.start..self.index])) |id| {
|
|
result.id = id;
|
|
}
|
|
break;
|
|
},
|
|
},
|
|
|
|
.builtin => switch (c) {
|
|
'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
|
|
else => break,
|
|
},
|
|
|
|
.backslash => switch (c) {
|
|
'\\' => {
|
|
state = .multiline_string_literal_line;
|
|
},
|
|
else => break,
|
|
},
|
|
|
|
.string_literal => switch (c) {
|
|
'\\' => {
|
|
state = .string_literal_backslash;
|
|
},
|
|
'"' => {
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'\n', '\r' => break, // Look for this error later.
|
|
else => self.checkLiteralCharacter(),
|
|
},
|
|
|
|
.string_literal_backslash => switch (c) {
|
|
'\n', '\r' => break, // Look for this error later.
|
|
else => {
|
|
state = .string_literal;
|
|
},
|
|
},
|
|
|
|
.char_literal => switch (c) {
|
|
'\\' => {
|
|
state = .char_literal_backslash;
|
|
},
|
|
'\'', 0x80...0xbf, 0xf8...0xff => {
|
|
result.id = .Invalid;
|
|
break;
|
|
},
|
|
0xc0...0xdf => { // 110xxxxx
|
|
remaining_code_units = 1;
|
|
state = .char_literal_unicode;
|
|
},
|
|
0xe0...0xef => { // 1110xxxx
|
|
remaining_code_units = 2;
|
|
state = .char_literal_unicode;
|
|
},
|
|
0xf0...0xf7 => { // 11110xxx
|
|
remaining_code_units = 3;
|
|
state = .char_literal_unicode;
|
|
},
|
|
else => {
|
|
state = .char_literal_end;
|
|
},
|
|
},
|
|
|
|
.char_literal_backslash => switch (c) {
|
|
'\n' => {
|
|
result.id = .Invalid;
|
|
break;
|
|
},
|
|
'x' => {
|
|
state = .char_literal_hex_escape;
|
|
seen_escape_digits = 0;
|
|
},
|
|
'u' => {
|
|
state = .char_literal_unicode_escape_saw_u;
|
|
},
|
|
else => {
|
|
state = .char_literal_end;
|
|
},
|
|
},
|
|
|
|
.char_literal_hex_escape => switch (c) {
|
|
'0'...'9', 'a'...'f', 'A'...'F' => {
|
|
seen_escape_digits += 1;
|
|
if (seen_escape_digits == 2) {
|
|
state = .char_literal_end;
|
|
}
|
|
},
|
|
else => {
|
|
result.id = .Invalid;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.char_literal_unicode_escape_saw_u => switch (c) {
|
|
'{' => {
|
|
state = .char_literal_unicode_escape;
|
|
seen_escape_digits = 0;
|
|
},
|
|
else => {
|
|
result.id = .Invalid;
|
|
state = .char_literal_unicode_invalid;
|
|
},
|
|
},
|
|
|
|
.char_literal_unicode_escape => switch (c) {
|
|
'0'...'9', 'a'...'f', 'A'...'F' => {
|
|
seen_escape_digits += 1;
|
|
},
|
|
'}' => {
|
|
if (seen_escape_digits == 0) {
|
|
result.id = .Invalid;
|
|
state = .char_literal_unicode_invalid;
|
|
} else {
|
|
state = .char_literal_end;
|
|
}
|
|
},
|
|
else => {
|
|
result.id = .Invalid;
|
|
state = .char_literal_unicode_invalid;
|
|
},
|
|
},
|
|
|
|
.char_literal_unicode_invalid => switch (c) {
|
|
// Keep consuming characters until an obvious stopping point.
|
|
// This consolidates e.g. `u{0ab1Q}` into a single invalid token
|
|
// instead of creating the tokens `u{0ab1`, `Q`, `}`
|
|
'0'...'9', 'a'...'z', 'A'...'Z', '}' => {},
|
|
else => break,
|
|
},
|
|
|
|
.char_literal_end => switch (c) {
|
|
'\'' => {
|
|
result.id = .CharLiteral;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Invalid;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.char_literal_unicode => switch (c) {
|
|
0x80...0xbf => {
|
|
remaining_code_units -= 1;
|
|
if (remaining_code_units == 0) {
|
|
state = .char_literal_end;
|
|
}
|
|
},
|
|
else => {
|
|
result.id = .Invalid;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.multiline_string_literal_line => switch (c) {
|
|
'\n' => {
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'\t' => {},
|
|
else => self.checkLiteralCharacter(),
|
|
},
|
|
|
|
.bang => switch (c) {
|
|
'=' => {
|
|
result.id = .BangEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Bang;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.pipe => switch (c) {
|
|
'=' => {
|
|
result.id = .PipeEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'|' => {
|
|
result.id = .PipePipe;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Pipe;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.equal => switch (c) {
|
|
'=' => {
|
|
result.id = .EqualEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'>' => {
|
|
result.id = .EqualAngleBracketRight;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Equal;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.minus => switch (c) {
|
|
'>' => {
|
|
result.id = .Arrow;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'=' => {
|
|
result.id = .MinusEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'%' => {
|
|
state = .minus_percent;
|
|
},
|
|
else => {
|
|
result.id = .Minus;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.minus_percent => switch (c) {
|
|
'=' => {
|
|
result.id = .MinusPercentEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .MinusPercent;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.angle_bracket_left => switch (c) {
|
|
'<' => {
|
|
state = .angle_bracket_angle_bracket_left;
|
|
},
|
|
'=' => {
|
|
result.id = .AngleBracketLeftEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .AngleBracketLeft;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.angle_bracket_angle_bracket_left => switch (c) {
|
|
'=' => {
|
|
result.id = .AngleBracketAngleBracketLeftEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .AngleBracketAngleBracketLeft;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.angle_bracket_right => switch (c) {
|
|
'>' => {
|
|
state = .angle_bracket_angle_bracket_right;
|
|
},
|
|
'=' => {
|
|
result.id = .AngleBracketRightEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .AngleBracketRight;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.angle_bracket_angle_bracket_right => switch (c) {
|
|
'=' => {
|
|
result.id = .AngleBracketAngleBracketRightEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .AngleBracketAngleBracketRight;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.period => switch (c) {
|
|
'.' => {
|
|
state = .period_2;
|
|
},
|
|
'*' => {
|
|
result.id = .PeriodAsterisk;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Period;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.period_2 => switch (c) {
|
|
'.' => {
|
|
result.id = .Ellipsis3;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Ellipsis2;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.slash => switch (c) {
|
|
'/' => {
|
|
state = .line_comment_start;
|
|
result.id = .LineComment;
|
|
},
|
|
'=' => {
|
|
result.id = .SlashEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Slash;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.line_comment_start => switch (c) {
|
|
'/' => {
|
|
state = .doc_comment_start;
|
|
},
|
|
'!' => {
|
|
result.id = .ContainerDocComment;
|
|
state = .container_doc_comment;
|
|
},
|
|
'\n' => break,
|
|
'\t', '\r' => state = .line_comment,
|
|
else => {
|
|
state = .line_comment;
|
|
self.checkLiteralCharacter();
|
|
},
|
|
},
|
|
|
|
.doc_comment_start => switch (c) {
|
|
'/' => {
|
|
state = .line_comment;
|
|
},
|
|
'\n' => {
|
|
result.id = .DocComment;
|
|
break;
|
|
},
|
|
'\t', '\r' => {
|
|
state = .doc_comment;
|
|
result.id = .DocComment;
|
|
},
|
|
else => {
|
|
state = .doc_comment;
|
|
result.id = .DocComment;
|
|
self.checkLiteralCharacter();
|
|
},
|
|
},
|
|
|
|
.line_comment, .doc_comment, .container_doc_comment => switch (c) {
|
|
'\n' => break,
|
|
'\t', '\r' => {},
|
|
else => self.checkLiteralCharacter(),
|
|
},
|
|
|
|
.zero => switch (c) {
|
|
'b' => {
|
|
state = .int_literal_bin_no_underscore;
|
|
},
|
|
'o' => {
|
|
state = .int_literal_oct_no_underscore;
|
|
},
|
|
'x' => {
|
|
state = .int_literal_hex_no_underscore;
|
|
},
|
|
'0'...'9', '_', '.', 'e', 'E' => {
|
|
// reinterpret as a decimal number
|
|
self.index -= 1;
|
|
state = .int_literal_dec;
|
|
},
|
|
else => {
|
|
if (isIdentifierChar(c)) {
|
|
result.id = .Invalid;
|
|
}
|
|
break;
|
|
},
|
|
},
|
|
|
|
.int_literal_bin_no_underscore => switch (c) {
|
|
'0'...'1' => {
|
|
state = .int_literal_bin;
|
|
},
|
|
else => {
|
|
result.id = .Invalid;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.int_literal_bin => switch (c) {
|
|
'_' => {
|
|
state = .int_literal_bin_no_underscore;
|
|
},
|
|
'0'...'1' => {},
|
|
else => {
|
|
if (isIdentifierChar(c)) {
|
|
result.id = .Invalid;
|
|
}
|
|
break;
|
|
},
|
|
},
|
|
|
|
.int_literal_oct_no_underscore => switch (c) {
|
|
'0'...'7' => {
|
|
state = .int_literal_oct;
|
|
},
|
|
else => {
|
|
result.id = .Invalid;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.int_literal_oct => switch (c) {
|
|
'_' => {
|
|
state = .int_literal_oct_no_underscore;
|
|
},
|
|
'0'...'7' => {},
|
|
else => {
|
|
if (isIdentifierChar(c)) {
|
|
result.id = .Invalid;
|
|
}
|
|
break;
|
|
},
|
|
},
|
|
|
|
.int_literal_dec_no_underscore => switch (c) {
|
|
'0'...'9' => {
|
|
state = .int_literal_dec;
|
|
},
|
|
else => {
|
|
result.id = .Invalid;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.int_literal_dec => switch (c) {
|
|
'_' => {
|
|
state = .int_literal_dec_no_underscore;
|
|
},
|
|
'.' => {
|
|
state = .num_dot_dec;
|
|
result.id = .FloatLiteral;
|
|
},
|
|
'e', 'E' => {
|
|
state = .float_exponent_unsigned;
|
|
result.id = .FloatLiteral;
|
|
},
|
|
'0'...'9' => {},
|
|
else => {
|
|
if (isIdentifierChar(c)) {
|
|
result.id = .Invalid;
|
|
}
|
|
break;
|
|
},
|
|
},
|
|
|
|
.int_literal_hex_no_underscore => switch (c) {
|
|
'0'...'9', 'a'...'f', 'A'...'F' => {
|
|
state = .int_literal_hex;
|
|
},
|
|
else => {
|
|
result.id = .Invalid;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.int_literal_hex => switch (c) {
|
|
'_' => {
|
|
state = .int_literal_hex_no_underscore;
|
|
},
|
|
'.' => {
|
|
state = .num_dot_hex;
|
|
result.id = .FloatLiteral;
|
|
},
|
|
'p', 'P' => {
|
|
state = .float_exponent_unsigned;
|
|
result.id = .FloatLiteral;
|
|
},
|
|
'0'...'9', 'a'...'f', 'A'...'F' => {},
|
|
else => {
|
|
if (isIdentifierChar(c)) {
|
|
result.id = .Invalid;
|
|
}
|
|
break;
|
|
},
|
|
},
|
|
|
|
.num_dot_dec => switch (c) {
|
|
'.' => {
|
|
self.index -= 1;
|
|
state = .start;
|
|
break;
|
|
},
|
|
'e', 'E' => {
|
|
state = .float_exponent_unsigned;
|
|
},
|
|
'0'...'9' => {
|
|
result.id = .FloatLiteral;
|
|
state = .float_fraction_dec;
|
|
},
|
|
else => {
|
|
if (isIdentifierChar(c)) {
|
|
result.id = .Invalid;
|
|
}
|
|
break;
|
|
},
|
|
},
|
|
|
|
.num_dot_hex => switch (c) {
|
|
'.' => {
|
|
self.index -= 1;
|
|
state = .start;
|
|
break;
|
|
},
|
|
'p', 'P' => {
|
|
state = .float_exponent_unsigned;
|
|
},
|
|
'0'...'9', 'a'...'f', 'A'...'F' => {
|
|
result.id = .FloatLiteral;
|
|
state = .float_fraction_hex;
|
|
},
|
|
else => {
|
|
if (isIdentifierChar(c)) {
|
|
result.id = .Invalid;
|
|
}
|
|
break;
|
|
},
|
|
},
|
|
|
|
.float_fraction_dec_no_underscore => switch (c) {
|
|
'0'...'9' => {
|
|
state = .float_fraction_dec;
|
|
},
|
|
else => {
|
|
result.id = .Invalid;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.float_fraction_dec => switch (c) {
|
|
'_' => {
|
|
state = .float_fraction_dec_no_underscore;
|
|
},
|
|
'e', 'E' => {
|
|
state = .float_exponent_unsigned;
|
|
},
|
|
'0'...'9' => {},
|
|
else => {
|
|
if (isIdentifierChar(c)) {
|
|
result.id = .Invalid;
|
|
}
|
|
break;
|
|
},
|
|
},
|
|
|
|
.float_fraction_hex_no_underscore => switch (c) {
|
|
'0'...'9', 'a'...'f', 'A'...'F' => {
|
|
state = .float_fraction_hex;
|
|
},
|
|
else => {
|
|
result.id = .Invalid;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.float_fraction_hex => switch (c) {
|
|
'_' => {
|
|
state = .float_fraction_hex_no_underscore;
|
|
},
|
|
'p', 'P' => {
|
|
state = .float_exponent_unsigned;
|
|
},
|
|
'0'...'9', 'a'...'f', 'A'...'F' => {},
|
|
else => {
|
|
if (isIdentifierChar(c)) {
|
|
result.id = .Invalid;
|
|
}
|
|
break;
|
|
},
|
|
},
|
|
|
|
.float_exponent_unsigned => switch (c) {
|
|
'+', '-' => {
|
|
state = .float_exponent_num_no_underscore;
|
|
},
|
|
else => {
|
|
// reinterpret as a normal exponent number
|
|
self.index -= 1;
|
|
state = .float_exponent_num_no_underscore;
|
|
},
|
|
},
|
|
|
|
.float_exponent_num_no_underscore => switch (c) {
|
|
'0'...'9' => {
|
|
state = .float_exponent_num;
|
|
},
|
|
else => {
|
|
result.id = .Invalid;
|
|
break;
|
|
},
|
|
},
|
|
|
|
.float_exponent_num => switch (c) {
|
|
'_' => {
|
|
state = .float_exponent_num_no_underscore;
|
|
},
|
|
'0'...'9' => {},
|
|
else => {
|
|
if (isIdentifierChar(c)) {
|
|
result.id = .Invalid;
|
|
}
|
|
break;
|
|
},
|
|
},
|
|
|
|
}
|
|
} else if (self.index == self.buffer.len) {
|
|
switch (state) {
|
|
|
|
.start,
|
|
.int_literal_dec,
|
|
.int_literal_bin,
|
|
.int_literal_oct,
|
|
.int_literal_hex,
|
|
.num_dot_dec,
|
|
.num_dot_hex,
|
|
.float_fraction_dec,
|
|
.float_fraction_hex,
|
|
.float_exponent_num,
|
|
.string_literal, // find this error later
|
|
.multiline_string_literal_line,
|
|
.builtin,
|
|
=> {},
|
|
|
|
.identifier => {
|
|
if (Token.getKeyword(self.buffer[result.loc.start..self.index])) |id| {
|
|
result.id = id;
|
|
}
|
|
},
|
|
|
|
.line_comment, .line_comment_start => {
|
|
result.id = .LineComment;
|
|
},
|
|
.doc_comment, .doc_comment_start => {
|
|
result.id = .DocComment;
|
|
},
|
|
.container_doc_comment => {
|
|
result.id = .ContainerDocComment;
|
|
},
|
|
|
|
|
|
// Invalid states at the end of the buffer.
|
|
.int_literal_dec_no_underscore,
|
|
.int_literal_bin_no_underscore,
|
|
.int_literal_oct_no_underscore,
|
|
.int_literal_hex_no_underscore,
|
|
.float_fraction_dec_no_underscore,
|
|
.float_fraction_hex_no_underscore,
|
|
.float_exponent_num_no_underscore,
|
|
.float_exponent_unsigned,
|
|
.saw_at_sign,
|
|
.backslash,
|
|
.char_literal,
|
|
.char_literal_backslash,
|
|
.char_literal_hex_escape,
|
|
.char_literal_unicode_escape_saw_u,
|
|
.char_literal_unicode_escape,
|
|
.char_literal_unicode_invalid,
|
|
.char_literal_end,
|
|
.char_literal_unicode,
|
|
.string_literal_backslash,
|
|
=> {
|
|
result.id = .Invalid;
|
|
},
|
|
|
|
.equal => {
|
|
result.id = .Equal;
|
|
},
|
|
.bang => {
|
|
result.id = .Bang;
|
|
},
|
|
.minus => {
|
|
result.id = .Minus;
|
|
},
|
|
.slash => {
|
|
result.id = .Slash;
|
|
},
|
|
.zero => {
|
|
result.id = .IntegerLiteral;
|
|
},
|
|
.ampersand => {
|
|
result.id = .Ampersand;
|
|
},
|
|
.period => {
|
|
result.id = .Period;
|
|
},
|
|
.period_2 => {
|
|
result.id = .Ellipsis2;
|
|
},
|
|
.pipe => {
|
|
result.id = .Pipe;
|
|
},
|
|
.angle_bracket_angle_bracket_right => {
|
|
result.id = .AngleBracketAngleBracketRight;
|
|
},
|
|
.angle_bracket_right => {
|
|
result.id = .AngleBracketRight;
|
|
},
|
|
.angle_bracket_angle_bracket_left => {
|
|
result.id = .AngleBracketAngleBracketLeft;
|
|
},
|
|
.angle_bracket_left => {
|
|
result.id = .AngleBracketLeft;
|
|
},
|
|
.plus_percent => {
|
|
result.id = .PlusPercent;
|
|
},
|
|
.plus => {
|
|
result.id = .Plus;
|
|
},
|
|
.percent => {
|
|
result.id = .Percent;
|
|
},
|
|
.caret => {
|
|
result.id = .Caret;
|
|
},
|
|
.asterisk_percent => {
|
|
result.id = .AsteriskPercent;
|
|
},
|
|
.asterisk => {
|
|
result.id = .Asterisk;
|
|
},
|
|
.minus_percent => {
|
|
result.id = .MinusPercent;
|
|
},
|
|
}
|
|
}
|
|
|
|
if (result.id == .Eof) {
|
|
if (self.pending_invalid_token) |token| {
|
|
self.pending_invalid_token = null;
|
|
return token;
|
|
}
|
|
}
|
|
|
|
result.loc.end = self.index;
|
|
return result;
|
|
}
|
|
|
|
fn checkLiteralCharacter(self: *Tokenizer) void {
|
|
if (self.pending_invalid_token != null) return;
|
|
const invalid_length = self.getInvalidCharacterLength();
|
|
if (invalid_length == 0) return;
|
|
self.pending_invalid_token = .{
|
|
.id = .Invalid,
|
|
.loc = .{
|
|
.start = self.index,
|
|
.end = self.index + invalid_length,
|
|
},
|
|
};
|
|
}
|
|
|
|
fn getInvalidCharacterLength(self: *Tokenizer) u3 {
|
|
const c0 = self.buffer[self.index];
|
|
if (c0 < 0x80) {
|
|
if (c0 < 0x20 or c0 == 0x7f) {
|
|
// ascii control codes are never allowed
|
|
// (note that \n was checked before we got here)
|
|
return 1;
|
|
}
|
|
// looks fine to me.
|
|
return 0;
|
|
} else {
|
|
// check utf8-encoded character.
|
|
const length = std.unicode.utf8ByteSequenceLength(c0) catch return 1;
|
|
if (self.index + length > self.buffer.len) {
|
|
return @intCast(u3, self.buffer.len - self.index);
|
|
}
|
|
const bytes = self.buffer[self.index .. self.index + length];
|
|
switch (length) {
|
|
2 => {
|
|
const value = std.unicode.utf8Decode2(bytes) catch return length;
|
|
if (value == 0x85) return length; // U+0085 (NEL)
|
|
},
|
|
3 => {
|
|
const value = std.unicode.utf8Decode3(bytes) catch return length;
|
|
if (value == 0x2028) return length; // U+2028 (LS)
|
|
if (value == 0x2029) return length; // U+2029 (PS)
|
|
},
|
|
4 => {
|
|
_ = std.unicode.utf8Decode4(bytes) catch return length;
|
|
},
|
|
else => unreachable,
|
|
}
|
|
self.index += length - 1;
|
|
return 0;
|
|
}
|
|
}
|
|
};
|
|
|
|
test "tokenizer" {
|
|
testTokenize("property", &[_]Token.Id{.Keyword_property});
|
|
}
|
|
|
|
//test "tokenizer - unknown length pointer and then c pointer" {
|
|
// testTokenize(
|
|
// \\[*]u8
|
|
// \\[*c]u8
|
|
// , &[_]Token.Id{
|
|
// .LBracket,
|
|
// .Asterisk,
|
|
// .RBracket,
|
|
// .Identifier,
|
|
// .LBracket,
|
|
// .Asterisk,
|
|
// .Identifier,
|
|
// .RBracket,
|
|
// .Identifier,
|
|
// });
|
|
//}
|
|
|
|
//test "tokenizer - char literal with hex escape" {
|
|
// testTokenize(
|
|
// \\'\x1b'
|
|
// , &[_]Token.Id{.CharLiteral});
|
|
// testTokenize(
|
|
// \\'\x1'
|
|
// , &[_]Token.Id{ .Invalid, .Invalid });
|
|
//}
|
|
|
|
//test "tokenizer - char literal with unicode escapes" {
|
|
// // Valid unicode escapes
|
|
// testTokenize(
|
|
// \\'\u{3}'
|
|
// , &[_]Token.Id{.CharLiteral});
|
|
// testTokenize(
|
|
// \\'\u{01}'
|
|
// , &[_]Token.Id{.CharLiteral});
|
|
// testTokenize(
|
|
// \\'\u{2a}'
|
|
// , &[_]Token.Id{.CharLiteral});
|
|
// testTokenize(
|
|
// \\'\u{3f9}'
|
|
// , &[_]Token.Id{.CharLiteral});
|
|
// testTokenize(
|
|
// \\'\u{6E09aBc1523}'
|
|
// , &[_]Token.Id{.CharLiteral});
|
|
// testTokenize(
|
|
// \\"\u{440}"
|
|
// , &[_]Token.Id{.StringLiteral});
|
|
|
|
// // Invalid unicode escapes
|
|
// testTokenize(
|
|
// \\'\u'
|
|
// , &[_]Token.Id{.Invalid});
|
|
// testTokenize(
|
|
// \\'\u{{'
|
|
// , &[_]Token.Id{ .Invalid, .Invalid });
|
|
// testTokenize(
|
|
// \\'\u{}'
|
|
// , &[_]Token.Id{ .Invalid, .Invalid });
|
|
// testTokenize(
|
|
// \\'\u{s}'
|
|
// , &[_]Token.Id{ .Invalid, .Invalid });
|
|
// testTokenize(
|
|
// \\'\u{2z}'
|
|
// , &[_]Token.Id{ .Invalid, .Invalid });
|
|
// testTokenize(
|
|
// \\'\u{4a'
|
|
// , &[_]Token.Id{.Invalid});
|
|
|
|
// // Test old-style unicode literals
|
|
// testTokenize(
|
|
// \\'\u0333'
|
|
// , &[_]Token.Id{ .Invalid, .Invalid });
|
|
// testTokenize(
|
|
// \\'\U0333'
|
|
// , &[_]Token.Id{ .Invalid, .IntegerLiteral, .Invalid });
|
|
//}
|
|
|
|
//test "tokenizer - char literal with unicode code point" {
|
|
// testTokenize(
|
|
// \\'💩'
|
|
// , &[_]Token.Id{.CharLiteral});
|
|
//}
|
|
|
|
test "tokenizer - float literal e exponent" {
|
|
testTokenize("a = 4.94065645841246544177e-324;\n", &[_]Token.Id{
|
|
.Identifier,
|
|
.Equal,
|
|
.FloatLiteral,
|
|
.Semicolon,
|
|
});
|
|
}
|
|
|
|
test "tokenizer - float literal p exponent" {
|
|
testTokenize("a = 0x1.a827999fcef32p+1022;\n", &[_]Token.Id{
|
|
.Identifier,
|
|
.Equal,
|
|
.FloatLiteral,
|
|
.Semicolon,
|
|
});
|
|
}
|
|
|
|
test "tokenizer - chars" {
|
|
testTokenize("'c'", &[_]Token.Id{.CharLiteral});
|
|
}
|
|
|
|
test "tokenizer - comments" {
|
|
testTokenize("#", &[_]Token.Id{.LineComment});
|
|
testTokenize("//", &[_]Token.Id{.LineComment});
|
|
}
|
|
|
|
test "tokenizer - invalid token characters" {
|
|
testTokenize("`", &[_]Token.Id{.Invalid});
|
|
testTokenize("'c", &[_]Token.Id{.Invalid});
|
|
testTokenize("'", &[_]Token.Id{.Invalid});
|
|
testTokenize("''", &[_]Token.Id{ .Invalid, .Invalid });
|
|
}
|
|
|
|
//test "tokenizer - invalid literal/comment characters" {
|
|
// testTokenize("\"\x00\"", &[_]Token.Id{
|
|
// .StringLiteral,
|
|
// .Invalid,
|
|
// });
|
|
// testTokenize("//\x00", &[_]Token.Id{
|
|
// .LineComment,
|
|
// .Invalid,
|
|
// });
|
|
// testTokenize("//\x1f", &[_]Token.Id{
|
|
// .LineComment,
|
|
// .Invalid,
|
|
// });
|
|
// testTokenize("//\x7f", &[_]Token.Id{
|
|
// .LineComment,
|
|
// .Invalid,
|
|
// });
|
|
//}
|
|
|
|
//test "tokenizer - utf8" {
|
|
// testTokenize("//\xc2\x80", &[_]Token.Id{.LineComment});
|
|
// testTokenize("//\xf4\x8f\xbf\xbf", &[_]Token.Id{.LineComment});
|
|
//}
|
|
|
|
//test "tokenizer - invalid utf8" {
|
|
// testTokenize("//\x80", &[_]Token.Id{
|
|
// .LineComment,
|
|
// .Invalid,
|
|
// });
|
|
// testTokenize("//\xbf", &[_]Token.Id{
|
|
// .LineComment,
|
|
// .Invalid,
|
|
// });
|
|
// testTokenize("//\xf8", &[_]Token.Id{
|
|
// .LineComment,
|
|
// .Invalid,
|
|
// });
|
|
// testTokenize("//\xff", &[_]Token.Id{
|
|
// .LineComment,
|
|
// .Invalid,
|
|
// });
|
|
// testTokenize("//\xc2\xc0", &[_]Token.Id{
|
|
// .LineComment,
|
|
// .Invalid,
|
|
// });
|
|
// testTokenize("//\xe0", &[_]Token.Id{
|
|
// .LineComment,
|
|
// .Invalid,
|
|
// });
|
|
// testTokenize("//\xf0", &[_]Token.Id{
|
|
// .LineComment,
|
|
// .Invalid,
|
|
// });
|
|
// testTokenize("//\xf0\x90\x80\xc0", &[_]Token.Id{
|
|
// .LineComment,
|
|
// .Invalid,
|
|
// });
|
|
//}
|
|
|
|
//test "tokenizer - illegal unicode codepoints" {
|
|
// // unicode newline characters.U+0085, U+2028, U+2029
|
|
// testTokenize("//\xc2\x84", &[_]Token.Id{.LineComment});
|
|
// testTokenize("//\xc2\x85", &[_]Token.Id{
|
|
// .LineComment,
|
|
// .Invalid,
|
|
// });
|
|
// testTokenize("//\xc2\x86", &[_]Token.Id{.LineComment});
|
|
// testTokenize("//\xe2\x80\xa7", &[_]Token.Id{.LineComment});
|
|
// testTokenize("//\xe2\x80\xa8", &[_]Token.Id{
|
|
// .LineComment,
|
|
// .Invalid,
|
|
// });
|
|
// testTokenize("//\xe2\x80\xa9", &[_]Token.Id{
|
|
// .LineComment,
|
|
// .Invalid,
|
|
// });
|
|
// testTokenize("//\xe2\x80\xaa", &[_]Token.Id{.LineComment});
|
|
//}
|
|
|
|
//test "tokenizer - string identifier and builtin fns" {
|
|
// testTokenize(
|
|
// \\const @"if" = @import("std");
|
|
// , &[_]Token.Id{
|
|
// .Keyword_const,
|
|
// .Identifier,
|
|
// .Equal,
|
|
// .Builtin,
|
|
// .LParen,
|
|
// .StringLiteral,
|
|
// .RParen,
|
|
// .Semicolon,
|
|
// });
|
|
//}
|
|
|
|
//test "tokenizer - multiline string literal with literal tab" {
|
|
// testTokenize(
|
|
// \\\\foo bar
|
|
// , &[_]Token.Id{
|
|
// .MultilineStringLiteralLine,
|
|
// });
|
|
//}
|
|
|
|
test "tokenizer - comments with literal tab" {
|
|
testTokenize(
|
|
\\//foo bar
|
|
\\//!foo bar
|
|
\\///foo bar
|
|
\\// foo
|
|
\\/// foo
|
|
\\/// /foo
|
|
, &[_]Token.Id{
|
|
.LineComment,
|
|
.ContainerDocComment,
|
|
.DocComment,
|
|
.LineComment,
|
|
.DocComment,
|
|
.DocComment,
|
|
});
|
|
}
|
|
|
|
//test "tokenizer - pipe and then invalid" {
|
|
// testTokenize("||=", &[_]Token.Id{
|
|
// .PipePipe,
|
|
// .Equal,
|
|
// });
|
|
//}
|
|
|
|
//test "tokenizer - line comment and doc comment" {
|
|
// testTokenize("//", &[_]Token.Id{.LineComment});
|
|
// testTokenize("// a / b", &[_]Token.Id{.LineComment});
|
|
// testTokenize("// /", &[_]Token.Id{.LineComment});
|
|
// testTokenize("/// a", &[_]Token.Id{.DocComment});
|
|
// testTokenize("///", &[_]Token.Id{.DocComment});
|
|
// testTokenize("////", &[_]Token.Id{.LineComment});
|
|
// testTokenize("//!", &[_]Token.Id{.ContainerDocComment});
|
|
// testTokenize("//!!", &[_]Token.Id{.ContainerDocComment});
|
|
//}
|
|
|
|
//test "tokenizer - line comment followed by identifier" {
|
|
// testTokenize(
|
|
// \\ Unexpected,
|
|
// \\ // another
|
|
// \\ Another,
|
|
// , &[_]Token.Id{
|
|
// .Identifier,
|
|
// .Comma,
|
|
// .LineComment,
|
|
// .Identifier,
|
|
// .Comma,
|
|
// });
|
|
//}
|
|
|
|
//test "tokenizer - UTF-8 BOM is recognized and skipped" {
|
|
// testTokenize("\xEF\xBB\xBFa;\n", &[_]Token.Id{
|
|
// .Identifier,
|
|
// .Semicolon,
|
|
// });
|
|
//}
|
|
|
|
//test "correctly parse pointer assignment" {
|
|
// testTokenize("b.*=3;\n", &[_]Token.Id{
|
|
// .Identifier,
|
|
// .PeriodAsterisk,
|
|
// .Equal,
|
|
// .IntegerLiteral,
|
|
// .Semicolon,
|
|
// });
|
|
//}
|
|
|
|
//test "tokenizer - number literals decimal" {
|
|
// testTokenize("0", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("1", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("2", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("3", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("4", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("5", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("6", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("7", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("8", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("9", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0a", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("9b", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("1z", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("1z_1", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("9z3", &[_]Token.Id{ .Invalid, .Identifier });
|
|
|
|
// testTokenize("0_0", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0001", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("01234567890", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("012_345_6789_0", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0_1_2_3_4_5_6_7_8_9_0", &[_]Token.Id{.IntegerLiteral});
|
|
|
|
// testTokenize("00_", &[_]Token.Id{.Invalid});
|
|
// testTokenize("0_0_", &[_]Token.Id{.Invalid});
|
|
// testTokenize("0__0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0_0f", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0_0_f", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0_0_f_00", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("1_,", &[_]Token.Id{ .Invalid, .Comma });
|
|
|
|
// testTokenize("1.", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0.0", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("1.0", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("10.0", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0e0", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("1e0", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("1e100", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("1.e100", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("1.0e100", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("1.0e+100", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("1.0e-100", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("1_0_0_0.0_0_0_0_0_1e1_0_0_0", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("1.+", &[_]Token.Id{ .FloatLiteral, .Plus });
|
|
|
|
// testTokenize("1e", &[_]Token.Id{.Invalid});
|
|
// testTokenize("1.0e1f0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("1.0p100", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("1.0p-100", &[_]Token.Id{ .Invalid, .Identifier, .Minus, .IntegerLiteral });
|
|
// testTokenize("1.0p1f0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("1.0_,", &[_]Token.Id{ .Invalid, .Comma });
|
|
// testTokenize("1_.0", &[_]Token.Id{ .Invalid, .Period, .IntegerLiteral });
|
|
// testTokenize("1._", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("1.a", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("1.z", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("1._0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("1._+", &[_]Token.Id{ .Invalid, .Identifier, .Plus });
|
|
// testTokenize("1._e", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("1.0e", &[_]Token.Id{.Invalid});
|
|
// testTokenize("1.0e,", &[_]Token.Id{ .Invalid, .Comma });
|
|
// testTokenize("1.0e_", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("1.0e+_", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("1.0e-_", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("1.0e0_+", &[_]Token.Id{ .Invalid, .Plus });
|
|
//}
|
|
|
|
//test "tokenizer - number literals binary" {
|
|
// testTokenize("0b0", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0b1", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0b2", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
|
// testTokenize("0b3", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
|
// testTokenize("0b4", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
|
// testTokenize("0b5", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
|
// testTokenize("0b6", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
|
// testTokenize("0b7", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
|
// testTokenize("0b8", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
|
// testTokenize("0b9", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
|
// testTokenize("0ba", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0bb", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0bc", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0bd", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0be", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0bf", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0bz", &[_]Token.Id{ .Invalid, .Identifier });
|
|
|
|
// testTokenize("0b0000_0000", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0b1111_1111", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0b10_10_10_10", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0b0_1_0_1_0_1_0_1", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0b1.", &[_]Token.Id{ .IntegerLiteral, .Period });
|
|
// testTokenize("0b1.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral });
|
|
|
|
// testTokenize("0B0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0b_", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0b_0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0b1_", &[_]Token.Id{.Invalid});
|
|
// testTokenize("0b0__1", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0b0_1_", &[_]Token.Id{.Invalid});
|
|
// testTokenize("0b1e", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0b1p", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0b1e0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0b1p0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0b1_,", &[_]Token.Id{ .Invalid, .Comma });
|
|
//}
|
|
|
|
//test "tokenizer - number literals octal" {
|
|
// testTokenize("0o0", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0o1", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0o2", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0o3", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0o4", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0o5", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0o6", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0o7", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0o8", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
|
// testTokenize("0o9", &[_]Token.Id{ .Invalid, .IntegerLiteral });
|
|
// testTokenize("0oa", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0ob", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0oc", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0od", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0oe", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0of", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0oz", &[_]Token.Id{ .Invalid, .Identifier });
|
|
|
|
// testTokenize("0o01234567", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0o0123_4567", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0o01_23_45_67", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0o0_1_2_3_4_5_6_7", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0o7.", &[_]Token.Id{ .IntegerLiteral, .Period });
|
|
// testTokenize("0o7.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral });
|
|
|
|
// testTokenize("0O0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0o_", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0o_0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0o1_", &[_]Token.Id{.Invalid});
|
|
// testTokenize("0o0__1", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0o0_1_", &[_]Token.Id{.Invalid});
|
|
// testTokenize("0o1e", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0o1p", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0o1e0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0o1p0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0o_,", &[_]Token.Id{ .Invalid, .Identifier, .Comma });
|
|
//}
|
|
|
|
//test "tokenizer - number literals hexadeciaml" {
|
|
// testTokenize("0x0", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0x1", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0x2", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0x3", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0x4", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0x5", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0x6", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0x7", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0x8", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0x9", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0xa", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0xb", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0xc", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0xd", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0xe", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0xf", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0xA", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0xB", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0xC", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0xD", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0xE", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0xF", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0x0z", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0xz", &[_]Token.Id{ .Invalid, .Identifier });
|
|
|
|
// testTokenize("0x0123456789ABCDEF", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0x0123_4567_89AB_CDEF", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0x01_23_45_67_89AB_CDE_F", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0x0_1_2_3_4_5_6_7_8_9_A_B_C_D_E_F", &[_]Token.Id{.IntegerLiteral});
|
|
|
|
// testTokenize("0X0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0x_", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0x_1", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0x1_", &[_]Token.Id{.Invalid});
|
|
// testTokenize("0x0__1", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0x0_1_", &[_]Token.Id{.Invalid});
|
|
// testTokenize("0x_,", &[_]Token.Id{ .Invalid, .Identifier, .Comma });
|
|
|
|
// testTokenize("0x1.", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0x1.0", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0xF.", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0xF.0", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0xF.F", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0xF.Fp0", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0xF.FP0", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0x1p0", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0xfp0", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0x1.+0xF.", &[_]Token.Id{ .FloatLiteral, .Plus, .FloatLiteral });
|
|
|
|
// testTokenize("0x0123456.789ABCDEF", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0x0_123_456.789_ABC_DEF", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0x0_1_2_3_4_5_6.7_8_9_A_B_C_D_E_F", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0x0p0", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0x0.0p0", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0xff.ffp10", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0xff.ffP10", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0xff.p10", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0xffp10", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0xff_ff.ff_ffp1_0_0_0", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0xf_f_f_f.f_f_f_fp+1_000", &[_]Token.Id{.FloatLiteral});
|
|
// testTokenize("0xf_f_f_f.f_f_f_fp-1_00_0", &[_]Token.Id{.FloatLiteral});
|
|
|
|
// testTokenize("0x1e", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0x1e0", &[_]Token.Id{.IntegerLiteral});
|
|
// testTokenize("0x1p", &[_]Token.Id{.Invalid});
|
|
// testTokenize("0xfp0z1", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0xff.ffpff", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0x0.p", &[_]Token.Id{.Invalid});
|
|
// testTokenize("0x0.z", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0x0._", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0x0_.0", &[_]Token.Id{ .Invalid, .Period, .IntegerLiteral });
|
|
// testTokenize("0x0_.0.0", &[_]Token.Id{ .Invalid, .Period, .FloatLiteral });
|
|
// testTokenize("0x0._0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0x0.0_", &[_]Token.Id{.Invalid});
|
|
// testTokenize("0x0_p0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0x0_.p0", &[_]Token.Id{ .Invalid, .Period, .Identifier });
|
|
// testTokenize("0x0._p0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0x0.0_p0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0x0._0p0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0x0.0p_0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0x0.0p+_0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0x0.0p-_0", &[_]Token.Id{ .Invalid, .Identifier });
|
|
// testTokenize("0x0.0p0_", &[_]Token.Id{ .Invalid, .Eof });
|
|
//}
|
|
|
|
fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void {
|
|
var tokenizer = Tokenizer.init(source);
|
|
for (expected_tokens) |expected_token_id| {
|
|
const token = tokenizer.next();
|
|
if (token.id != expected_token_id) {
|
|
std.debug.panic("expected {}, found {}\n", .{ @tagName(expected_token_id), @tagName(token.id) });
|
|
}
|
|
}
|
|
const last_token = tokenizer.next();
|
|
std.testing.expect(last_token.id == .Eof);
|
|
}
|