566 lines
21 KiB
Zig
566 lines
21 KiB
Zig
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2015-2020 Zig Contributors
|
|
// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
|
|
// The MIT license requires this copyright notice to be included in all copies
|
|
// and substantial portions of the software.
|
|
const std = @import("std");
|
|
const assert = std.debug.assert;
|
|
const Allocator = std.mem.Allocator;
|
|
const ast = @import("ast.zig");
|
|
const Node = ast.Node;
|
|
const AstError = ast.Error;
|
|
const TokenIndex = ast.TokenIndex;
|
|
const NodeIndex = ast.NodeIndex;
|
|
|
|
const lexer = @import("tokenizer.zig");
|
|
const Token = lexer.Token;
|
|
const Tokenizer = lexer.Tokenizer;
|
|
|
|
const cs = @import("common-structures.zig");
|
|
|
|
pub const Error = error{ParseError} || Allocator.Error;
|
|
|
|
// Start => Requires Thing => End
|
|
// Requires => require StringLiteral Requires | nil
|
|
// Thing => Definition Thing | NodeHdr Thing | nil
|
|
|
|
// Definition => define Identifier NodeHdrSimple
|
|
|
|
// NodeHdr => NodeHdrFull LBrace NodeContent RBrace | NodeHdrSimple LBrace NodeContent RBrace
|
|
// NodeHdrSimple: a node without identifier, for a definition or when this won't be used later.
|
|
// NodeHdrSimple => Identifier
|
|
// NodeHdrFull: a node with an identifier.
|
|
// NodeHdrFull => Identifier LParen Identifier RParen
|
|
// NodeContent => NodeHdr | statement NodeContent | nil
|
|
|
|
// Property keyword adds a new property to the definition,
|
|
// without it, default property values can be changed.
|
|
|
|
// statement => Keyword_property Identifier Identifier Colon value
|
|
// value => StringLiteral | Keyword_null | IntegerLiteral | FloatLiteral
|
|
|
|
/// Result should be freed with tree.deinit() when there are
|
|
/// no more references to any of the tokens or nodes.
|
|
pub fn parse(gpa: *Allocator, source: []const u8) Allocator.Error!cs.Tree {
|
|
|
|
var token_ids = std.ArrayList(Token.Id).init(gpa);
|
|
defer token_ids.deinit();
|
|
var token_locs = std.ArrayList(Token.Loc).init(gpa);
|
|
defer token_locs.deinit();
|
|
|
|
// Before starting, just check that we have about enough memory.
|
|
const estimated_token_count = source.len / 8;
|
|
try token_ids.ensureCapacity(estimated_token_count);
|
|
try token_locs.ensureCapacity(estimated_token_count);
|
|
|
|
// Use the lexer to get all the tokens from the source code.
|
|
var tokenizer = Tokenizer.init(source);
|
|
while (true) {
|
|
const token = tokenizer.next();
|
|
try token_ids.append(token.id);
|
|
try token_locs.append(token.loc);
|
|
if (token.id == .Eof) break;
|
|
}
|
|
|
|
// Create a Parser structure.
|
|
var parser: Parser = .{
|
|
.source = source, // Source code.
|
|
.arena = std.heap.ArenaAllocator.init(gpa), // Arena allocator.
|
|
.gpa = gpa, // General Purpose Allocator.
|
|
.token_ids = token_ids.items, // IDs of the tokens.
|
|
.token_locs = token_locs.items, // Location of the tokens.
|
|
.errors = .{}, // List of errors in our parsing.
|
|
.tok_i = 0, // Index of current token being analyzed.
|
|
.indent = 0, // Indentation for debug.
|
|
};
|
|
defer parser.errors.deinit(gpa);
|
|
errdefer parser.arena.deinit();
|
|
|
|
// Ignore the first line comments from our code.
|
|
while (token_ids.items[parser.tok_i] == .LineComment) parser.tok_i += 1;
|
|
|
|
// Perform parsing of the source, extract data, create a Tree structure and its content.
|
|
// Create a parsing Tree, with the nodes parsed early on.
|
|
// toOwnedSlice: free the memory and return the list. Arrays are empty,
|
|
// allocator can be free, arrays are owned by a different allocator.
|
|
var tree = try parser.parseTree();
|
|
|
|
return tree;
|
|
}
|
|
|
|
const Assignment = struct {
|
|
id_attribute: TokenIndex,
|
|
id_value: TokenIndex,
|
|
};
|
|
|
|
/// Represents in-progress parsing, will be converted to an cs.Tree after completion.
|
|
const Parser = struct {
|
|
arena: std.heap.ArenaAllocator,
|
|
gpa: *Allocator,
|
|
source: []const u8,
|
|
token_ids: []const Token.Id,
|
|
token_locs: []const Token.Loc,
|
|
tok_i: TokenIndex,
|
|
errors: std.ArrayListUnmanaged(AstError),
|
|
indent: u16, // Indentation for debug.
|
|
|
|
// parseTree: create a cs.Tree with all its content.
|
|
fn parseTree(p: *Parser) Allocator.Error!cs.Tree {
|
|
|
|
// Create a tree.
|
|
var tree = try cs.Tree.create(&p.arena.allocator);
|
|
|
|
// Parse the content.
|
|
try parseTopLevel(p, &tree);
|
|
|
|
// parseTopLevel will try to skip as much
|
|
// invalid tokens as it can so this can only be the EOF
|
|
// eatToken returns next token or null (if current token id isn't parameter).
|
|
// If current token is .Eof, next token is actually the first.
|
|
const eof_token = p.eatToken(.Eof).?;
|
|
|
|
return tree;
|
|
}
|
|
|
|
// parseTopLevel: actual parsing code starts here.
|
|
fn parseTopLevel(p: *Parser, tree: *cs.Tree) !void {
|
|
|
|
// list: all nodes in the ast.
|
|
var list = std.ArrayList(*Node).init(p.gpa);
|
|
defer list.deinit();
|
|
|
|
// True start of parsing.
|
|
while (true) {
|
|
const token = p.nextToken();
|
|
switch (p.token_ids[token]) {
|
|
.Keyword_require => {
|
|
p.putBackToken(token);
|
|
// TODO: read file required and parse its content.
|
|
p.parseRequire();
|
|
},
|
|
|
|
.Keyword_define => {
|
|
p.putBackToken(token);
|
|
// TODO: definitions.
|
|
p.parseDefine() catch |err| switch(err) {
|
|
// Propagate memory errors.
|
|
error.OutOfMemory => { return (error.OutOfMemory); },
|
|
error.InvalidCharacter => continue,
|
|
error.NoSpaceLeft => continue,
|
|
error.Overflow => continue,
|
|
error.ParseError => continue,
|
|
}; // |normal_value| { stuff; to; do; }
|
|
},
|
|
|
|
.Identifier => {
|
|
// Identifier => on top level this means a node.
|
|
p.putBackToken(token);
|
|
p.parseNode() catch |err| switch(err) {
|
|
// Propagate memory errors.
|
|
error.OutOfMemory => { return (error.OutOfMemory); },
|
|
error.InvalidCharacter => continue,
|
|
error.NoSpaceLeft => continue,
|
|
error.Overflow => continue,
|
|
error.ParseError => {
|
|
p.say("we catched a ParseError on token: {}\n"
|
|
, .{p.giveTokenContent(p.tok_i)});
|
|
continue;
|
|
}
|
|
}; // |normal_value| { stuff; to; do; }
|
|
},
|
|
|
|
.Eof => {
|
|
p.putBackToken(token);
|
|
break;
|
|
},
|
|
|
|
else => {
|
|
p.say("unexpected token: {}\n", .{p.token_ids[token]});
|
|
continue;
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
fn say(p: *Parser, comptime fmt: []const u8, args: anytype) void {
|
|
var i = p.indent;
|
|
while (i > 0) {
|
|
std.debug.print(" ", .{});
|
|
i-=1;
|
|
}
|
|
std.debug.print(fmt, args);
|
|
}
|
|
|
|
// TODO: require "file"
|
|
// file should be read, parsed and a loop detection should take place.
|
|
fn parseRequire(p: *Parser) void {
|
|
const require_token = p.eatToken(.Keyword_require);
|
|
const file_to_read = p.eatToken(.StringLiteral);
|
|
std.debug.print("TODO: file required: {}\n", .{file_to_read});
|
|
}
|
|
|
|
// TODO: node definition (inheritance).
|
|
// fn parseDefine(p: *Parser) !?Definition {
|
|
fn parseDefine(p: *Parser) !void {
|
|
const define_token = try p.expectToken(.Keyword_define);
|
|
const new_class_name = try p.expectToken(.Identifier);
|
|
const parent_node_name = try p.expectToken(.Identifier);
|
|
|
|
std.debug.print("TODO: node inheritance: {} < {}\n",
|
|
.{p.giveTokenContent(new_class_name), p.giveTokenContent(parent_node_name)});
|
|
|
|
p.putBackToken(parent_node_name);
|
|
try p.parseNode();
|
|
// TODO: get the old node definition,
|
|
// create a new definition,
|
|
// then add old and new properties and children to it.
|
|
}
|
|
|
|
// TODO: node definition (inheritance).
|
|
fn parseNode(p: *Parser) !void {
|
|
const node_name = p.eatToken(.Identifier);
|
|
if (node_name == null) {
|
|
return;
|
|
}
|
|
// Either simple or full header.
|
|
const identifier: ?[] const u8 = try p.parseFullNodeHeader();
|
|
|
|
if (p.indent > 0) {
|
|
p.say("Child {}", .{p.giveTokenContent(node_name.?)});
|
|
}
|
|
else {
|
|
p.say("Node {}", .{p.giveTokenContent(node_name.?)});
|
|
}
|
|
|
|
if (identifier) |id| {
|
|
std.debug.print(", id: {}\n", .{id});
|
|
}
|
|
else {
|
|
std.debug.print("\n", .{});
|
|
}
|
|
|
|
p.indent += 1;
|
|
defer { p.indent -= 1; }
|
|
|
|
// Starting the node.
|
|
// const lbrace = p.nextToken();
|
|
const ignored = try p.expectToken(.LBrace);
|
|
|
|
while (true) {
|
|
const token = p.nextToken();
|
|
// p.say("within a node, token: {}\n", .{p.giveTokenContent(token)});
|
|
switch (p.token_ids[token]) {
|
|
|
|
.Keyword_null,
|
|
.StringLiteral,
|
|
.FloatLiteral,
|
|
.IntegerLiteral => {
|
|
p.say("reading (and ignoring) a literal: {}\n", .{p.giveTokenContent(token)});
|
|
continue;
|
|
},
|
|
|
|
.Identifier => {
|
|
const following = p.nextToken();
|
|
switch (p.token_ids[following]) {
|
|
|
|
// Node header (with or without id).
|
|
.LParen,
|
|
.LBrace => {
|
|
p.putBackToken(following);
|
|
p.putBackToken(token);
|
|
// p.say("reading a new node\n", .{});
|
|
// WARNING: RECURSION: this may cause errors.
|
|
const res = p.parseNode();
|
|
continue;
|
|
},
|
|
|
|
.Colon => {
|
|
// p.putBackToken(following);
|
|
// p.putBackToken(token);
|
|
|
|
const value: cs.PropertyValue = try p.parseValue();
|
|
p.say("attribute {:>20} => {}\n"
|
|
, .{ p.giveTokenContent(token)
|
|
, value});
|
|
// const assignment = try p.parseAssignment();
|
|
// p.say("redefining an attribute {} => {}\n"
|
|
// , .{ p.giveTokenContent(assignment.id_attribute)
|
|
// , p.giveTokenContent(assignment.id_value)});
|
|
},
|
|
|
|
.Period => {
|
|
p.putBackToken(following);
|
|
p.putBackToken(token);
|
|
|
|
// Hacking a bit, this is not a value but an identifier.
|
|
const attribute_loc: Token.Loc = try p.parseReference();
|
|
|
|
const colon = p.expectToken(.Colon);
|
|
|
|
const value: cs.PropertyValue = try p.parseValue();
|
|
p.say("attribute {:>20} => {}\n"
|
|
, .{ p.source[attribute_loc.start..attribute_loc.end]
|
|
, value});
|
|
},
|
|
|
|
else => {
|
|
// Wasn't expected.
|
|
// Couln't understand what was in this node.
|
|
p.say("did not understand {} then {}\n"
|
|
, .{ p.giveTokenContent(token)
|
|
, p.giveTokenContent(following)});
|
|
p.putBackToken(following);
|
|
p.putBackToken(token);
|
|
break;
|
|
}
|
|
}
|
|
},
|
|
|
|
.Keyword_property => {
|
|
p.putBackToken(token);
|
|
try p.parseProperty();
|
|
continue;
|
|
},
|
|
|
|
.RBrace => {
|
|
p.putBackToken(token);
|
|
break;
|
|
},
|
|
|
|
else => {
|
|
p.putBackToken(token);
|
|
p.say("reading {} in a node, backing up\n"
|
|
, .{p.giveTokenContent(token)});
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Node definition or instance ends with a RBrace.
|
|
const end_of_class = try p.expectToken(.RBrace);
|
|
}
|
|
|
|
// A property value can either be a simple value,
|
|
// or a reference (for property binding stuff).
|
|
// Simple values are copied and casted into the real type.
|
|
// For a reference, we keep a copy of the string representation.
|
|
fn parseValue(p: *Parser) !cs.PropertyValue {
|
|
while(true) {
|
|
const token = p.nextToken();
|
|
switch (p.token_ids[token]) {
|
|
// .Keyword_null => {
|
|
// return cs.PropertyValue{.nil};
|
|
// },
|
|
|
|
.StringLiteral => {
|
|
return cs.PropertyValue{.string = p.giveTokenContent(token)};
|
|
},
|
|
|
|
.IntegerLiteral => {
|
|
return cs.PropertyValue{
|
|
.integer = try std.fmt.parseInt(u64, p.giveTokenContent(token), 10)
|
|
};
|
|
},
|
|
|
|
.FloatLiteral => {
|
|
// p.say("property: {} {} = {}\n"
|
|
// , .{p.giveTokenContent(node_name)
|
|
// , p.giveTokenContent(attribute_name)
|
|
// , p.giveTokenContent(id_value)});
|
|
return cs.PropertyValue{
|
|
.float = try std.fmt.parseFloat(f64, p.giveTokenContent(token))
|
|
};
|
|
},
|
|
|
|
.Identifier => {
|
|
// Loop over identifier and points.
|
|
p.putBackToken(token);
|
|
const loc: Token.Loc = try p.parseReference();
|
|
// p.say("loc: {}\n", .{loc});
|
|
// const val: []const u8 = try p.parseReference();
|
|
// p.say("value: {}\n", .{val});
|
|
return cs.PropertyValue{
|
|
.reference = p.source[loc.start..loc.end]
|
|
};
|
|
},
|
|
|
|
else => {
|
|
return error.ParseError;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// // statement => Keyword_property Identifier Identifier Colon value
|
|
// // value => StringLiteral | Keyword_null | IntegerLiteral | FloatLiteral
|
|
// fn parseAssignment(p: *Parser) !Assignment {
|
|
// const id_attribute = p.eatToken(.Identifier);
|
|
// const ignored = try p.expectToken(.Colon);
|
|
// const id_value = p.nextToken();
|
|
// switch (p.token_ids[id_value]) {
|
|
// .Keyword_null,
|
|
// .StringLiteral,
|
|
// .IntegerLiteral,
|
|
// .FloatLiteral => {
|
|
// if (id_attribute) |ia| {
|
|
// return Assignment{.id_attribute = ia, .id_value = id_value};
|
|
// }
|
|
// },
|
|
// else => {
|
|
// return error.ParseError;
|
|
// }
|
|
// }
|
|
// return error.ParseError;
|
|
// }
|
|
|
|
// Get the representation of the reference: returning a location.
|
|
fn parseReference(p: *Parser) !Token.Loc {
|
|
|
|
var representation: [100]u8 = undefined;
|
|
var fbs = std.io.fixedBufferStream(representation[0..]);
|
|
|
|
// First part of the reference has to be an Identifier.
|
|
const id = try p.expectToken(.Identifier);
|
|
// try std.fmt.format(fbs.writer(), "{}", .{p.giveTokenContent(id)});
|
|
|
|
var loc: Token.Loc = Token.Loc{
|
|
.start = p.token_locs[id].start,
|
|
.end = p.token_locs[id].end,
|
|
};
|
|
|
|
while (true) {
|
|
const token = p.nextToken();
|
|
switch (p.token_ids[token]) {
|
|
|
|
.Period => {
|
|
// Following token is expected to be an Identifier.
|
|
const following = try p.expectToken(.Identifier);
|
|
try std.fmt.format(fbs.writer(), ".{}", .{p.giveTokenContent(following)});
|
|
loc.end = p.token_locs[following].end;
|
|
},
|
|
|
|
else => {
|
|
p.putBackToken(token);
|
|
break;
|
|
},
|
|
}
|
|
}
|
|
|
|
// return representation[0..fbs.pos];
|
|
return loc;
|
|
}
|
|
|
|
fn parseProperty(p: *Parser) !void {
|
|
const property = try p.expectToken(.Keyword_property);
|
|
const type_name = try p.expectToken(.Identifier);
|
|
const attribute_name = try p.expectToken(.Identifier);
|
|
const colon = try p.expectToken(.Colon);
|
|
const value: cs.PropertyValue = try p.parseValue();
|
|
p.say("- {} (type {}) = {}\n"
|
|
, .{ p.giveTokenContent(attribute_name)
|
|
, p.giveTokenContent(type_name)
|
|
, value});
|
|
}
|
|
|
|
fn parseFullNodeHeader(p: *Parser) !?[]const u8 {
|
|
if (p.eatToken(.LParen) == null)
|
|
return null;
|
|
|
|
// Once we know this is a full header, an identifier then a right
|
|
// parenthesis are expected, and should trigger a parsing error if not there.
|
|
const identifier = try p.expectToken(.Identifier);
|
|
const blah = try p.expectToken(.RParen);
|
|
|
|
return p.giveTokenContent(identifier);
|
|
}
|
|
|
|
|
|
fn eatToken(p: *Parser, id: Token.Id) ?TokenIndex {
|
|
return if (p.token_ids[p.tok_i] == id) p.nextToken() else null;
|
|
}
|
|
|
|
// expectToken: either returns the token or an error.
|
|
fn expectToken(p: *Parser, id: Token.Id) Error!TokenIndex {
|
|
return (try p.expectTokenRecoverable(id)) orelse error.ParseError;
|
|
}
|
|
|
|
// expectTokenRecoverable: either returns the token or null if not the one expected.
|
|
// Also, appends the error inside p.errors.
|
|
fn expectTokenRecoverable(p: *Parser, id: Token.Id) !?TokenIndex {
|
|
const token = p.nextToken();
|
|
if (p.token_ids[token] != id) {
|
|
try p.errors.append(p.gpa, .{
|
|
.ExpectedToken = .{ .token = token, .expected_id = id },
|
|
});
|
|
// go back so that we can recover properly
|
|
p.putBackToken(token);
|
|
return null;
|
|
}
|
|
return token;
|
|
}
|
|
|
|
// nextToken: provide the TokenIndex of the current token, but increases the tok_i
|
|
// inside the Parser structure.
|
|
fn nextToken(p: *Parser) TokenIndex {
|
|
const result = p.tok_i;
|
|
p.tok_i += 1;
|
|
assert(p.token_ids[result] != .LineComment);
|
|
if (p.tok_i >= p.token_ids.len) return result;
|
|
|
|
while (true) {
|
|
if (p.token_ids[p.tok_i] != .LineComment) return result;
|
|
p.tok_i += 1;
|
|
}
|
|
}
|
|
|
|
// putBackToken: come back one token (except for comment lines which are ignored).
|
|
// Example: we have a function searching for a declaration,
|
|
// the function read a token "my-variable" so it returns after putting back the token.
|
|
// Caller now have the start of a declaration in its parsing structure.
|
|
fn putBackToken(p: *Parser, putting_back: TokenIndex) void {
|
|
while (p.tok_i > 0) {
|
|
p.tok_i -= 1;
|
|
if (p.token_ids[p.tok_i] == .LineComment) continue;
|
|
assert(putting_back == p.tok_i);
|
|
return;
|
|
}
|
|
}
|
|
|
|
/// TODO Delete this function. I don't like the inversion of control.
|
|
fn expectNode(
|
|
p: *Parser,
|
|
parseFn: NodeParseFn,
|
|
/// if parsing fails
|
|
err: AstError,
|
|
) Error!*Node {
|
|
return (try p.expectNodeRecoverable(parseFn, err)) orelse return error.ParseError;
|
|
}
|
|
|
|
/// TODO Delete this function. I don't like the inversion of control.
|
|
fn expectNodeRecoverable(
|
|
p: *Parser,
|
|
parseFn: NodeParseFn,
|
|
/// if parsing fails
|
|
err: AstError,
|
|
) !?*Node {
|
|
return (try parseFn(p)) orelse {
|
|
try p.errors.append(p.gpa, err);
|
|
return null;
|
|
};
|
|
}
|
|
|
|
// WARNING: VALID token identifier expected.
|
|
fn giveTokenContent(p: *Parser, id: TokenIndex) []const u8 {
|
|
const loc = p.token_locs[id];
|
|
return p.source[loc.start..loc.end];
|
|
}
|
|
};
|
|
|
|
fn ParseFn(comptime T: type) type {
|
|
return fn (p: *Parser) Error!T;
|
|
}
|
|
|
|
test "std.zig.parser" {
|
|
_ = @import("parser_test.zig");
|
|
}
|