Parser: WIP (not to lose everything, again).
parent
80d46f16e7
commit
45b0f212a8
135
src/parse.zig
135
src/parse.zig
|
@ -8,7 +8,6 @@ const assert = std.debug.assert;
|
|||
const Allocator = std.mem.Allocator;
|
||||
const ast = @import("ast.zig");
|
||||
const Node = ast.Node;
|
||||
const Tree = ast.Tree;
|
||||
const AstError = ast.Error;
|
||||
const TokenIndex = ast.TokenIndex;
|
||||
const NodeIndex = ast.NodeIndex;
|
||||
|
@ -18,13 +17,31 @@ const Token = lexer.Token;
|
|||
const Tokenizer = lexer.Tokenizer;
|
||||
|
||||
const cs = @import("common-structures.zig");
|
||||
const PropertyValue = cs.PropertyValue;
|
||||
|
||||
pub const Error = error{ParseError} || Allocator.Error;
|
||||
|
||||
// Start => Requires Thing => End
|
||||
// Requires => require StringLiteral Requires | nil
|
||||
// Thing => Definition Thing | NodeHdr Thing | nil
|
||||
|
||||
// Definition => define Identifier NodeHdrSimple
|
||||
|
||||
// NodeHdr => NodeHdrFull LBrace NodeContent RBrace | NodeHdrSimple LBrace NodeContent RBrace
|
||||
// NodeHdrSimple: a node without identifier, for a definition or when this won't be used later.
|
||||
// NodeHdrSimple => Identifier
|
||||
// NodeHdrFull: a node with an identifier.
|
||||
// NodeHdrFull => Identifier LParen Identifier RParen
|
||||
// NodeContent => NodeHdr | statement NodeContent | nil
|
||||
|
||||
// Property keyword adds a new property to the definition,
|
||||
// without it, default property values can be changed.
|
||||
|
||||
// statement => Keyword_property Identifier Identifier Colon value
|
||||
// value => StringLiteral | Keyword_null | IntegerLiteral | FloatLiteral
|
||||
|
||||
/// Result should be freed with tree.deinit() when there are
|
||||
/// no more references to any of the tokens or nodes.
|
||||
pub fn parse(gpa: *Allocator, source: []const u8) Allocator.Error!*Tree {
|
||||
pub fn parse(gpa: *Allocator, source: []const u8) Allocator.Error!cs.Tree {
|
||||
|
||||
var token_ids = std.ArrayList(Token.Id).init(gpa);
|
||||
defer token_ids.deinit();
|
||||
|
@ -62,22 +79,12 @@ pub fn parse(gpa: *Allocator, source: []const u8) Allocator.Error!*Tree {
|
|||
// Ignore the first line comments from our code.
|
||||
while (token_ids.items[parser.tok_i] == .LineComment) parser.tok_i += 1;
|
||||
|
||||
// Perform parsing, called once.
|
||||
const root_node = try parser.parseRoot();
|
||||
|
||||
// Perform parsing of the source, extract data, create a Tree structure and its content.
|
||||
// Create a parsing Tree, with the nodes parsed early on.
|
||||
// toOwnedSlice: free the memory and return the list. Arrays are empty,
|
||||
// allocator can be free, arrays are owned by a different allocator.
|
||||
const tree = try parser.arena.allocator.create(Tree);
|
||||
tree.* = .{
|
||||
.gpa = gpa,
|
||||
.source = source,
|
||||
.token_ids = token_ids.toOwnedSlice(),
|
||||
.token_locs = token_locs.toOwnedSlice(),
|
||||
.errors = parser.errors.toOwnedSlice(gpa),
|
||||
.root_node = root_node,
|
||||
.arena = parser.arena.state,
|
||||
};
|
||||
const tree = try parser.parseTree();
|
||||
|
||||
return tree;
|
||||
}
|
||||
|
||||
|
@ -86,7 +93,7 @@ const Assignment = struct {
|
|||
id_value: TokenIndex,
|
||||
};
|
||||
|
||||
/// Represents in-progress parsing, will be converted to an ast.Tree after completion.
|
||||
/// Represents in-progress parsing, will be converted to an cs.Tree after completion.
|
||||
const Parser = struct {
|
||||
arena: std.heap.ArenaAllocator,
|
||||
gpa: *Allocator,
|
||||
|
@ -97,81 +104,29 @@ const Parser = struct {
|
|||
errors: std.ArrayListUnmanaged(AstError),
|
||||
indent: u16, // Indentation for debug.
|
||||
|
||||
/// Root <- skip ContainerMembers eof
|
||||
fn parseRoot(p: *Parser) Allocator.Error!*Node.Root {
|
||||
// Parse declarations.
|
||||
const decls = try parseContainerMembers(p, true);
|
||||
// parseTree: create a cs.Tree with all its content.
|
||||
fn parseTree(p: *Parser) Allocator.Error!cs.Tree {
|
||||
// Parse the content.
|
||||
const decls = try parseTopLevel(p, true);
|
||||
defer p.gpa.free(decls);
|
||||
|
||||
// parseContainerMembers will try to skip as much
|
||||
// parseTopLevel will try to skip as much
|
||||
// invalid tokens as it can so this can only be the EOF
|
||||
// eatToken returns next token or null (if current token id isn't parameter).
|
||||
// If current token is .Eof, next token is actually the first.
|
||||
const eof_token = p.eatToken(.Eof).?;
|
||||
const tree = try cs.Tree.create(&p.arena.allocator, decls_len, eof_token);
|
||||
|
||||
// Nb of declarations becomes an ast.NodeIndex integer variable (usize).
|
||||
const decls_len = @intCast(NodeIndex, decls.len);
|
||||
const node = try Node.Root.create(&p.arena.allocator, decls_len, eof_token);
|
||||
// std.mem.copy: T, dest, src
|
||||
std.mem.copy(*Node, node.decls(), decls);
|
||||
|
||||
return node; // Root node.
|
||||
return tree;
|
||||
}
|
||||
|
||||
/// ContainerMembers
|
||||
/// <- TestDecl ContainerMembers
|
||||
/// / TopLevelComptime ContainerMembers
|
||||
/// / KEYWORD_pub? TopLevelDecl ContainerMembers
|
||||
/// / ContainerField COMMA ContainerMembers
|
||||
/// / ContainerField
|
||||
/// /
|
||||
// parseContainerMembers: actual parsing code starts here.
|
||||
fn parseContainerMembers(p: *Parser, top_level: bool) ![]*Node {
|
||||
// std.debug.print("parseContainerMembers: is top? {}\n", .{top_level});
|
||||
// parseTopLevel: actual parsing code starts here.
|
||||
fn parseTopLevel(p: *Parser, top_level: bool) ![]*Node {
|
||||
// std.debug.print("parseTopLevel: is top? {}\n", .{top_level});
|
||||
// list: all nodes in the ast.
|
||||
var list = std.ArrayList(*Node).init(p.gpa);
|
||||
defer list.deinit();
|
||||
|
||||
// field_state: union of enum.
|
||||
// Tagged union: eligible to use in switch expressions and coerce their value.
|
||||
// Example: switch (some_tagged_union) { SomeType => |value| print("{}\n", value); }
|
||||
// If a '*' is placed before the variable name, it's a pointer to the value inside
|
||||
// the tagged union.
|
||||
// Example: switch (some_tagged_union) { SomeType => |*value| value.* += 1; }
|
||||
// @TagType can be used to get the right enum type.
|
||||
var field_state: union(enum) {
|
||||
/// no fields have been seen
|
||||
none,
|
||||
/// currently parsing fields
|
||||
seen,
|
||||
/// saw fields and then a declaration after them.
|
||||
/// payload is first token of previous declaration.
|
||||
end: TokenIndex, // TokenIndex is defined as usize in std.zig.ast.
|
||||
/// there was a declaration between fields, don't report more errors
|
||||
err,
|
||||
} = .none;
|
||||
|
||||
// Start => Requires Thing => End
|
||||
// Requires => require StringLiteral Requires | nil
|
||||
// Thing => Definition Thing | ClassHdr Thing | nil
|
||||
|
||||
// Definition => define Identifier ClassHdrSimple
|
||||
|
||||
// ClassHdr => ClassHdrFull LBrace ClassCon RBrace |
|
||||
// ClassHdrSimple LBrace ClassCon RBrace
|
||||
// ClassHdrSimple: a node without identifier,
|
||||
// for a definition or when this won't be used later.
|
||||
// ClassHdrSimple => Identifier
|
||||
// ClassHdrFull: a node with an identifier.
|
||||
// ClassHdrFull => Identifier LParen Identifier RParen
|
||||
// ClassCon => ClassHdr | statement ClassCon | nil
|
||||
|
||||
// Property keyword adds a new property to the definition,
|
||||
// without it, default property values can be changed.
|
||||
|
||||
// statement => Keyword_property Identifier Identifier Colon value
|
||||
// value => StringLiteral | Keyword_null | IntegerLiteral | FloatLiteral
|
||||
|
||||
// True start of parsing.
|
||||
while (true) {
|
||||
const token = p.nextToken();
|
||||
|
@ -323,7 +278,7 @@ const Parser = struct {
|
|||
// p.putBackToken(following);
|
||||
// p.putBackToken(token);
|
||||
|
||||
const value: PropertyValue = try p.parseValue();
|
||||
const value: cs.PropertyValue = try p.parseValue();
|
||||
p.say("attribute {:>20} => {}\n"
|
||||
, .{ p.giveTokenContent(token)
|
||||
, value});
|
||||
|
@ -342,7 +297,7 @@ const Parser = struct {
|
|||
|
||||
const colon = p.expectToken(.Colon);
|
||||
|
||||
const value: PropertyValue = try p.parseValue();
|
||||
const value: cs.PropertyValue = try p.parseValue();
|
||||
p.say("attribute {:>20} => {}\n"
|
||||
, .{ p.source[attribute_loc.start..attribute_loc.end]
|
||||
, value});
|
||||
|
@ -389,20 +344,20 @@ const Parser = struct {
|
|||
// or a reference (for property binding stuff).
|
||||
// Simple values are copied and casted into the real type.
|
||||
// For a reference, we keep a copy of the string representation.
|
||||
fn parseValue(p: *Parser) !PropertyValue {
|
||||
fn parseValue(p: *Parser) !cs.PropertyValue {
|
||||
while(true) {
|
||||
const token = p.nextToken();
|
||||
switch (p.token_ids[token]) {
|
||||
// .Keyword_null => {
|
||||
// return PropertyValue{.nil};
|
||||
// return cs.PropertyValue{.nil};
|
||||
// },
|
||||
|
||||
.StringLiteral => {
|
||||
return PropertyValue{.string = p.giveTokenContent(token)};
|
||||
return cs.PropertyValue{.string = p.giveTokenContent(token)};
|
||||
},
|
||||
|
||||
.IntegerLiteral => {
|
||||
return PropertyValue{
|
||||
return cs.PropertyValue{
|
||||
.integer = try std.fmt.parseInt(u64, p.giveTokenContent(token), 10)
|
||||
};
|
||||
},
|
||||
|
@ -412,7 +367,7 @@ const Parser = struct {
|
|||
// , .{p.giveTokenContent(node_name)
|
||||
// , p.giveTokenContent(attribute_name)
|
||||
// , p.giveTokenContent(id_value)});
|
||||
return PropertyValue{
|
||||
return cs.PropertyValue{
|
||||
.float = try std.fmt.parseFloat(f64, p.giveTokenContent(token))
|
||||
};
|
||||
},
|
||||
|
@ -424,7 +379,7 @@ const Parser = struct {
|
|||
// p.say("loc: {}\n", .{loc});
|
||||
// const val: []const u8 = try p.parseReference();
|
||||
// p.say("value: {}\n", .{val});
|
||||
return PropertyValue{
|
||||
return cs.PropertyValue{
|
||||
.reference = p.source[loc.start..loc.end]
|
||||
};
|
||||
},
|
||||
|
@ -500,14 +455,13 @@ const Parser = struct {
|
|||
const type_name = try p.expectToken(.Identifier);
|
||||
const attribute_name = try p.expectToken(.Identifier);
|
||||
const colon = try p.expectToken(.Colon);
|
||||
const value: PropertyValue = try p.parseValue();
|
||||
const value: cs.PropertyValue = try p.parseValue();
|
||||
p.say("- {} (type {}) = {}\n"
|
||||
, .{ p.giveTokenContent(attribute_name)
|
||||
, p.giveTokenContent(type_name)
|
||||
, value});
|
||||
}
|
||||
|
||||
|
||||
fn parseFullNodeHeader(p: *Parser) !?[]const u8 {
|
||||
if (p.eatToken(.LParen) == null)
|
||||
return null;
|
||||
|
@ -520,9 +474,6 @@ const Parser = struct {
|
|||
return p.giveTokenContent(identifier);
|
||||
}
|
||||
|
||||
// fn parseStatement(p: *Parser) Error!?*Node {
|
||||
// }
|
||||
|
||||
|
||||
fn eatToken(p: *Parser, id: Token.Id) ?TokenIndex {
|
||||
return if (p.token_ids[p.tok_i] == id) p.nextToken() else null;
|
||||
|
|
Loading…
Reference in New Issue