Parser: WIP (not to lose everything, again).

mess
Karchnu 2020-12-23 01:54:20 +01:00
parent 80d46f16e7
commit 45b0f212a8
1 changed files with 43 additions and 92 deletions

View File

@ -8,7 +8,6 @@ const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const ast = @import("ast.zig");
const Node = ast.Node;
const Tree = ast.Tree;
const AstError = ast.Error;
const TokenIndex = ast.TokenIndex;
const NodeIndex = ast.NodeIndex;
@ -18,13 +17,31 @@ const Token = lexer.Token;
const Tokenizer = lexer.Tokenizer;
const cs = @import("common-structures.zig");
const PropertyValue = cs.PropertyValue;
pub const Error = error{ParseError} || Allocator.Error;
// Start => Requires Thing => End
// Requires => require StringLiteral Requires | nil
// Thing => Definition Thing | NodeHdr Thing | nil
// Definition => define Identifier NodeHdrSimple
// NodeHdr => NodeHdrFull LBrace NodeContent RBrace | NodeHdrSimple LBrace NodeContent RBrace
// NodeHdrSimple: a node without identifier, for a definition or when this won't be used later.
// NodeHdrSimple => Identifier
// NodeHdrFull: a node with an identifier.
// NodeHdrFull => Identifier LParen Identifier RParen
// NodeContent => NodeHdr | statement NodeContent | nil
// Property keyword adds a new property to the definition,
// without it, default property values can be changed.
// statement => Keyword_property Identifier Identifier Colon value
// value => StringLiteral | Keyword_null | IntegerLiteral | FloatLiteral
/// Result should be freed with tree.deinit() when there are
/// no more references to any of the tokens or nodes.
pub fn parse(gpa: *Allocator, source: []const u8) Allocator.Error!*Tree {
pub fn parse(gpa: *Allocator, source: []const u8) Allocator.Error!cs.Tree {
var token_ids = std.ArrayList(Token.Id).init(gpa);
defer token_ids.deinit();
@ -62,22 +79,12 @@ pub fn parse(gpa: *Allocator, source: []const u8) Allocator.Error!*Tree {
// Ignore the first line comments from our code.
while (token_ids.items[parser.tok_i] == .LineComment) parser.tok_i += 1;
// Perform parsing, called once.
const root_node = try parser.parseRoot();
// Perform parsing of the source, extract data, create a Tree structure and its content.
// Create a parsing Tree, with the nodes parsed early on.
// toOwnedSlice: free the memory and return the list. Arrays are empty,
// allocator can be free, arrays are owned by a different allocator.
const tree = try parser.arena.allocator.create(Tree);
tree.* = .{
.gpa = gpa,
.source = source,
.token_ids = token_ids.toOwnedSlice(),
.token_locs = token_locs.toOwnedSlice(),
.errors = parser.errors.toOwnedSlice(gpa),
.root_node = root_node,
.arena = parser.arena.state,
};
const tree = try parser.parseTree();
return tree;
}
@ -86,7 +93,7 @@ const Assignment = struct {
id_value: TokenIndex,
};
/// Represents in-progress parsing, will be converted to an ast.Tree after completion.
/// Represents in-progress parsing, will be converted to an cs.Tree after completion.
const Parser = struct {
arena: std.heap.ArenaAllocator,
gpa: *Allocator,
@ -97,81 +104,29 @@ const Parser = struct {
errors: std.ArrayListUnmanaged(AstError),
indent: u16, // Indentation for debug.
/// Root <- skip ContainerMembers eof
fn parseRoot(p: *Parser) Allocator.Error!*Node.Root {
// Parse declarations.
const decls = try parseContainerMembers(p, true);
// parseTree: create a cs.Tree with all its content.
fn parseTree(p: *Parser) Allocator.Error!cs.Tree {
// Parse the content.
const decls = try parseTopLevel(p, true);
defer p.gpa.free(decls);
// parseContainerMembers will try to skip as much
// parseTopLevel will try to skip as much
// invalid tokens as it can so this can only be the EOF
// eatToken returns next token or null (if current token id isn't parameter).
// If current token is .Eof, next token is actually the first.
const eof_token = p.eatToken(.Eof).?;
const tree = try cs.Tree.create(&p.arena.allocator, decls_len, eof_token);
// Nb of declarations becomes an ast.NodeIndex integer variable (usize).
const decls_len = @intCast(NodeIndex, decls.len);
const node = try Node.Root.create(&p.arena.allocator, decls_len, eof_token);
// std.mem.copy: T, dest, src
std.mem.copy(*Node, node.decls(), decls);
return node; // Root node.
return tree;
}
/// ContainerMembers
/// <- TestDecl ContainerMembers
/// / TopLevelComptime ContainerMembers
/// / KEYWORD_pub? TopLevelDecl ContainerMembers
/// / ContainerField COMMA ContainerMembers
/// / ContainerField
/// /
// parseContainerMembers: actual parsing code starts here.
fn parseContainerMembers(p: *Parser, top_level: bool) ![]*Node {
// std.debug.print("parseContainerMembers: is top? {}\n", .{top_level});
// parseTopLevel: actual parsing code starts here.
fn parseTopLevel(p: *Parser, top_level: bool) ![]*Node {
// std.debug.print("parseTopLevel: is top? {}\n", .{top_level});
// list: all nodes in the ast.
var list = std.ArrayList(*Node).init(p.gpa);
defer list.deinit();
// field_state: union of enum.
// Tagged union: eligible to use in switch expressions and coerce their value.
// Example: switch (some_tagged_union) { SomeType => |value| print("{}\n", value); }
// If a '*' is placed before the variable name, it's a pointer to the value inside
// the tagged union.
// Example: switch (some_tagged_union) { SomeType => |*value| value.* += 1; }
// @TagType can be used to get the right enum type.
var field_state: union(enum) {
/// no fields have been seen
none,
/// currently parsing fields
seen,
/// saw fields and then a declaration after them.
/// payload is first token of previous declaration.
end: TokenIndex, // TokenIndex is defined as usize in std.zig.ast.
/// there was a declaration between fields, don't report more errors
err,
} = .none;
// Start => Requires Thing => End
// Requires => require StringLiteral Requires | nil
// Thing => Definition Thing | ClassHdr Thing | nil
// Definition => define Identifier ClassHdrSimple
// ClassHdr => ClassHdrFull LBrace ClassCon RBrace |
// ClassHdrSimple LBrace ClassCon RBrace
// ClassHdrSimple: a node without identifier,
// for a definition or when this won't be used later.
// ClassHdrSimple => Identifier
// ClassHdrFull: a node with an identifier.
// ClassHdrFull => Identifier LParen Identifier RParen
// ClassCon => ClassHdr | statement ClassCon | nil
// Property keyword adds a new property to the definition,
// without it, default property values can be changed.
// statement => Keyword_property Identifier Identifier Colon value
// value => StringLiteral | Keyword_null | IntegerLiteral | FloatLiteral
// True start of parsing.
while (true) {
const token = p.nextToken();
@ -323,7 +278,7 @@ const Parser = struct {
// p.putBackToken(following);
// p.putBackToken(token);
const value: PropertyValue = try p.parseValue();
const value: cs.PropertyValue = try p.parseValue();
p.say("attribute {:>20} => {}\n"
, .{ p.giveTokenContent(token)
, value});
@ -342,7 +297,7 @@ const Parser = struct {
const colon = p.expectToken(.Colon);
const value: PropertyValue = try p.parseValue();
const value: cs.PropertyValue = try p.parseValue();
p.say("attribute {:>20} => {}\n"
, .{ p.source[attribute_loc.start..attribute_loc.end]
, value});
@ -389,20 +344,20 @@ const Parser = struct {
// or a reference (for property binding stuff).
// Simple values are copied and casted into the real type.
// For a reference, we keep a copy of the string representation.
fn parseValue(p: *Parser) !PropertyValue {
fn parseValue(p: *Parser) !cs.PropertyValue {
while(true) {
const token = p.nextToken();
switch (p.token_ids[token]) {
// .Keyword_null => {
// return PropertyValue{.nil};
// return cs.PropertyValue{.nil};
// },
.StringLiteral => {
return PropertyValue{.string = p.giveTokenContent(token)};
return cs.PropertyValue{.string = p.giveTokenContent(token)};
},
.IntegerLiteral => {
return PropertyValue{
return cs.PropertyValue{
.integer = try std.fmt.parseInt(u64, p.giveTokenContent(token), 10)
};
},
@ -412,7 +367,7 @@ const Parser = struct {
// , .{p.giveTokenContent(node_name)
// , p.giveTokenContent(attribute_name)
// , p.giveTokenContent(id_value)});
return PropertyValue{
return cs.PropertyValue{
.float = try std.fmt.parseFloat(f64, p.giveTokenContent(token))
};
},
@ -424,7 +379,7 @@ const Parser = struct {
// p.say("loc: {}\n", .{loc});
// const val: []const u8 = try p.parseReference();
// p.say("value: {}\n", .{val});
return PropertyValue{
return cs.PropertyValue{
.reference = p.source[loc.start..loc.end]
};
},
@ -500,14 +455,13 @@ const Parser = struct {
const type_name = try p.expectToken(.Identifier);
const attribute_name = try p.expectToken(.Identifier);
const colon = try p.expectToken(.Colon);
const value: PropertyValue = try p.parseValue();
const value: cs.PropertyValue = try p.parseValue();
p.say("- {} (type {}) = {}\n"
, .{ p.giveTokenContent(attribute_name)
, p.giveTokenContent(type_name)
, value});
}
fn parseFullNodeHeader(p: *Parser) !?[]const u8 {
if (p.eatToken(.LParen) == null)
return null;
@ -520,9 +474,6 @@ const Parser = struct {
return p.giveTokenContent(identifier);
}
// fn parseStatement(p: *Parser) Error!?*Node {
// }
fn eatToken(p: *Parser, id: Token.Id) ?TokenIndex {
return if (p.token_ids[p.tok_i] == id) p.nextToken() else null;