lily/src/parser.cpp

284 lines
11 KiB
C++

extern "C" {
#include "parser.h"
}
#include "parser.hpp"
#include "pattern.hpp"
namespace lily {
std::string tree_str(pgs_tree* tree, const char* source) {
size_t from = PGS_TREE_T_FROM(*tree);
size_t to = PGS_TREE_T_TO(*tree);
return std::string(source + from, to - from);
}
static ast_ptr expr_tree(pgs_tree*, const char*);
static ast_ptr expr_app_bottom(pgs_tree* base, const char* source) {
if(PGS_TREE_NT_COUNT(*base) == 2) {
return expr_tree(PGS_TREE_NT_CHILD(*base, 1), source);
} else {
pgs_tree* child = PGS_TREE_NT_CHILD(*base, 0);
if(isdigit(source[PGS_TREE_T_FROM(*child)])) {
return ast_ptr(new ast_num(atoi(source + PGS_TREE_T_FROM(*child))));
} else {
return ast_ptr(new ast_var(tree_str(child, source)));
}
}
}
static ast_ptr expr_app(pgs_tree* app, const char* source) {
ast_ptr into;
ast_ptr* into_ptr = &into;
while(PGS_TREE_NT_COUNT(*app) > 1) {
ast_app* new_app = new ast_app(nullptr, nullptr);
*into_ptr = ast_ptr(new_app);
into_ptr = &new_app->left;
new_app->right = expr_app_bottom(PGS_TREE_NT_CHILD(*app, 1), source);
app = PGS_TREE_NT_CHILD(*app, 0);
}
*into_ptr = expr_app_bottom(PGS_TREE_NT_CHILD(*app, 0), source);
return into;
}
static ast_ptr expr_mul(pgs_tree* mul, const char* source) {
if(PGS_TREE_NT_COUNT(*mul) == 1) {
return expr_app(PGS_TREE_NT_CHILD(*mul, 0), source);
} else {
pgs_tree* left = PGS_TREE_NT_CHILD(*mul, 0);
pgs_tree* right = PGS_TREE_NT_CHILD(*mul, 2);
pgs_tree* op = PGS_TREE_NT_CHILD(*mul, 1);
enum ast_op::op o =
source[PGS_TREE_T_FROM(*op)] == '*' ? ast_op::op::times : ast_op::op::divide;
ast_ptr aleft = expr_mul(left, source);
ast_ptr aright = expr_app(right, source);
return ast_ptr(new ast_op(o, std::move(aleft), std::move(aright)));
}
}
static ast_ptr expr_add(pgs_tree* add, const char* source) {
if(PGS_TREE_NT_COUNT(*add) == 1) {
return expr_mul(PGS_TREE_NT_CHILD(*add, 0), source);
} else {
pgs_tree* left = PGS_TREE_NT_CHILD(*add, 0);
pgs_tree* right = PGS_TREE_NT_CHILD(*add, 2);
pgs_tree* op = PGS_TREE_NT_CHILD(*add, 1);
enum ast_op::op o =
source[PGS_TREE_T_FROM(*op)] == '+' ? ast_op::op::add : ast_op::op::subtract;
ast_ptr aleft = expr_add(left, source);
ast_ptr aright = expr_mul(right, source);
return ast_ptr(new ast_op(o, std::move(aleft), std::move(aright)));
}
}
template <typename T, int Offset>
static ast_ptr expr_let(pgs_tree* let, const char* source) {
std::string name = tree_str(PGS_TREE_NT_CHILD(*let, Offset + 1), source);
ast_ptr eq_expr = expr_tree(PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*let, Offset + 3), 1), source);
ast_ptr in_expr = expr_tree(PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*let, Offset + 5), 1), source);
return ast_ptr(new T(name, std::move(eq_expr), std::move(in_expr)));
}
static pattern_ptr pattern_tree(pgs_tree* tree, const char* source) {
if(PGS_TREE_NT_COUNT(*tree) == 1) {
std::string new_str = tree_str(PGS_TREE_NT_CHILD(*tree, 0), source);
if(isupper(new_str[0])) return pattern_ptr(new pattern_cons(new_str));
else return pattern_ptr(new pattern_var(new_str));
} else {
std::string cons_name = tree_str(PGS_TREE_NT_CHILD(*tree, 0), source);
pattern_cons* new_pattern = new pattern_cons(cons_name);
pattern_ptr ptr = pattern_ptr(new_pattern);
pgs_tree* patterns = PGS_TREE_NT_CHILD(*tree, 2);
while(PGS_TREE_NT_COUNT(*patterns) > 1) {
new_pattern->vnames.push_back(tree_str(PGS_TREE_NT_CHILD(*tree, 0), source));
patterns = PGS_TREE_NT_CHILD(*tree, 2);
}
new_pattern->vnames.push_back(tree_str(PGS_TREE_NT_CHILD(*tree, 0), source));
return ptr;
}
}
static ast_case::branch expr_branch(pgs_tree* c, const char* source) {
ast_case::branch branch;
branch.pattern = pattern_tree(PGS_TREE_NT_CHILD(*c, 0), source);
branch.expr = expr_tree(PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*c, 2), 1), source);
return branch;
}
static ast_ptr expr_case(pgs_tree* c, const char* source) {
ast_case* new_case = new ast_case();
ast_ptr case_ptr = ast_ptr(new_case);
new_case->of = expr_tree(PGS_TREE_NT_CHILD(*c, 1), source);
pgs_tree* branches = PGS_TREE_NT_CHILD(*c, 4);
while(PGS_TREE_NT_COUNT(*branches) == 2) {
pgs_tree* branch = PGS_TREE_NT_CHILD(*branches, 0);
branches = PGS_TREE_NT_CHILD(*branches, 1);
new_case->branches.push_back(expr_branch(branch, source));
}
new_case->branches.push_back(expr_branch(PGS_TREE_NT_CHILD(*branches, 0), source));
return case_ptr;
}
static ast_ptr expr_tree(pgs_tree* body, const char* source) {
pgs_tree* expr = PGS_TREE_NT_CHILD(*body, 0);
int type = PGS_TREE_NT(*expr);
if(type == PGS_NONTERMINAL_EXPR_ADD) {
return expr_add(expr, source);
} else if(type == PGS_NONTERMINAL_EXPR_LET) {
return expr_let<ast_let, 0>(expr, source);
} else if(type == PGS_NONTERMINAL_EXPR_LETREC) {
return expr_let<ast_letrec, 1>(expr, source);
} else if(type == PGS_NONTERMINAL_EXPR_CASE) {
return expr_case(expr, source);
}
throw error("unknown expression type");
}
static void collect_params(std::vector<std::string>& into, pgs_tree* def, const char* source) {
if(PGS_TREE_NT_COUNT(*def) == 5) {
pgs_tree* params = PGS_TREE_NT_CHILD(*def, 2);
do {
pgs_tree* param = PGS_TREE_NT_CHILD(*params, 0);
params =
(PGS_TREE_NT_COUNT(*params) == 2) ? PGS_TREE_NT_CHILD(*params, 1) : nullptr;
size_t from = PGS_TREE_T_FROM(*param);
size_t to = PGS_TREE_T_TO(*param);
std::string new_string =
std::string(source + from, to - from);
into.push_back(new_string);
} while(params);
}
}
static type_ptr type_tree(program& prog, pgs_tree* type, const char* source) {
std::string str = tree_str(PGS_TREE_NT_CHILD(*type, 0), source);
if(!prog.types.count(str)) throw error("unknown type");
return prog.types[str];
}
static void collect_type_params(program& prog, std::vector<type_ptr>& into, pgs_tree* params, const char* source) {
while(true) {
pgs_tree* param = PGS_TREE_NT_CHILD(*params, 0);
into.push_back(type_tree(prog, param, source));
if(PGS_TREE_NT_COUNT(*params) == 1) break;
params = PGS_TREE_NT_CHILD(*params, 2);
}
}
static void collect_constructors(program& prog, int parent, pgs_tree* def, const char* source) {
int id = 0;
while(true) {
constructor cons;
cons.tag = id++;
cons.parent_type = parent;
pgs_tree* elem = PGS_TREE_NT_CHILD(*def, 0);
std::string name = tree_str(PGS_TREE_NT_CHILD(*elem, 0), source);
if(PGS_TREE_NT_COUNT(*elem) > 1)
collect_type_params(prog, cons.params, PGS_TREE_NT_CHILD(*elem, 2), source);
prog.constructors[name] = std::move(cons);
if(PGS_TREE_NT_COUNT(*def) == 1) break;
def = PGS_TREE_NT_CHILD(*def, 2);
}
}
static void add_definition(program& prog, pgs_tree* def, const char* source) {
size_t str_from = PGS_TREE_T_FROM(*PGS_TREE_NT_CHILD(*def, 0));
if(source[str_from]== 't') {
// skip for now
} else if(source[str_from + 1] == 'e') {
std::string function_name = tree_str(PGS_TREE_NT_CHILD(*def, 1), source);
if(prog.functions.count(function_name)) throw error("cannot redefine function");
function new_function;
collect_params(new_function.params, def, source);
new_function.body = expr_tree(PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*def, PGS_TREE_NT_COUNT(*def) - 1), 1), source);
prog.functions[function_name] = std::move(new_function);
} else {
std::string data_name = tree_str(PGS_TREE_NT_CHILD(*def, 1), source);
if(prog.types.count(data_name)) throw error("cannot redefine type");
int new_type = prog.next_free_type_id++;
collect_constructors(prog, new_type,
PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*def, PGS_TREE_NT_COUNT(*def) - 1), 1), source);
prog.types[data_name] = type_ptr(new type_int(new_type));
}
}
static program_ptr build_program(pgs_tree* tree, const char* source) {
program_ptr prog = std::make_unique<program>();
pgs_tree* program = PGS_TREE_NT_CHILD(*tree, 0);
do {
pgs_tree* definition = PGS_TREE_NT_CHILD(*program, 0);
program =
(PGS_TREE_NT_COUNT(*program) == 2) ? PGS_TREE_NT_CHILD(*program, 1) : nullptr;
add_definition(*prog, definition, source);
} while(program);
return prog;
}
static void dump_program(program& prog) {
std::cout << "Constructors:" << std::endl;
for(auto& pair : prog.constructors) {
std::cout << " " << pair.first << std::endl;
}
std::cout << std::endl;
std::cout << "Types:" << std::endl;
for(auto& pair : prog.types) {
std::cout << " " << pair.first << std::endl;
}
std::cout << std::endl;
std::cout << "Functions:" << std::endl;
for(auto& pair : prog.functions) {
function& f = pair.second;
std::cout << " " << pair.first;
for(auto& param : f.params) {
std::cout << " " << param;
}
std::cout << std::endl;
}
std::cout << std::endl;
}
program_ptr parse(std::string s) {
pgs_state state;
pgs_tree* into;
pgs_error err;
pgs_state_init(&state);
if((err = pgs_do_all(&state, &into, s.c_str())) != PGS_NONE) {
throw error("failed to parse input string");
}
program_ptr prog = build_program(into, s.c_str());
dump_program(*prog);
pgs_free_tree(into);
return prog;
}
program::program() {
types["Int"] = type_ptr(new type_int(type_id_int));
types["String"] = type_ptr(new type_int(type_id_str));
next_free_type_id = type_id_last;
}
}