lily/src/parser.cpp

393 lines
16 KiB
C++

extern "C" {
#include "parser.h"
}
#include "parser.hpp"
#include "pattern.hpp"
#include "type_checker.hpp"
#include "llvm.hpp"
#include "llvm/IR/Verifier.h"
#include <memory>
namespace lily {
std::string tree_str(pgs_tree* tree, const char* source) {
size_t from = PGS_TREE_T_FROM(*tree);
size_t to = PGS_TREE_T_TO(*tree);
return std::string(source + from, to - from);
}
static ast_ptr expr_tree(pgs_tree*, const char*);
static ast_ptr expr_app_bottom(pgs_tree* base, const char* source) {
if(PGS_TREE_NT_COUNT(*base) == 3) {
return expr_tree(PGS_TREE_NT_CHILD(*base, 1), source);
} else {
pgs_tree* child = PGS_TREE_NT_CHILD(*base, 0);
if(isdigit(source[PGS_TREE_T_FROM(*child)])) {
return ast_ptr(new ast_num(atoi(source + PGS_TREE_T_FROM(*child))));
} else {
return ast_ptr(new ast_var(tree_str(child, source)));
}
}
}
static ast_ptr expr_app(pgs_tree* app, const char* source) {
ast_ptr into;
ast_ptr* into_ptr = &into;
while(PGS_TREE_NT_COUNT(*app) > 1) {
ast_app* new_app = new ast_app(nullptr, nullptr);
*into_ptr = ast_ptr(new_app);
into_ptr = &new_app->left;
new_app->right = expr_app_bottom(PGS_TREE_NT_CHILD(*app, 1), source);
app = PGS_TREE_NT_CHILD(*app, 0);
}
*into_ptr = expr_app_bottom(PGS_TREE_NT_CHILD(*app, 0), source);
return into;
}
static ast_ptr expr_mul(pgs_tree* mul, const char* source) {
if(PGS_TREE_NT_COUNT(*mul) == 1) {
return expr_app(PGS_TREE_NT_CHILD(*mul, 0), source);
} else {
pgs_tree* left = PGS_TREE_NT_CHILD(*mul, 0);
pgs_tree* right = PGS_TREE_NT_CHILD(*mul, 2);
pgs_tree* op = PGS_TREE_NT_CHILD(*mul, 1);
enum binop o =
source[PGS_TREE_T_FROM(*op)] == '*' ? binop::times : binop::divide;
ast_ptr aleft = expr_mul(left, source);
ast_ptr aright = expr_app(right, source);
return ast_ptr(new ast_op(o, std::move(aleft), std::move(aright)));
}
}
static ast_ptr expr_add(pgs_tree* add, const char* source) {
if(PGS_TREE_NT_COUNT(*add) == 1) {
return expr_mul(PGS_TREE_NT_CHILD(*add, 0), source);
} else {
pgs_tree* left = PGS_TREE_NT_CHILD(*add, 0);
pgs_tree* right = PGS_TREE_NT_CHILD(*add, 2);
pgs_tree* op = PGS_TREE_NT_CHILD(*add, 1);
enum binop o =
source[PGS_TREE_T_FROM(*op)] == '+' ? binop::add : binop::subtract;
ast_ptr aleft = expr_add(left, source);
ast_ptr aright = expr_mul(right, source);
return ast_ptr(new ast_op(o, std::move(aleft), std::move(aright)));
}
}
template <typename T, int Offset>
static ast_ptr expr_let(pgs_tree* let, const char* source) {
std::string name = tree_str(PGS_TREE_NT_CHILD(*let, Offset + 1), source);
ast_ptr eq_expr = expr_tree(PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*let, Offset + 3), 1), source);
ast_ptr in_expr = expr_tree(PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*let, Offset + 5), 1), source);
return ast_ptr(new T(name, std::move(eq_expr), std::move(in_expr)));
}
static pattern_ptr pattern_tree(pgs_tree* tree, const char* source) {
if(PGS_TREE_NT_COUNT(*tree) == 1) {
std::string new_str = tree_str(PGS_TREE_NT_CHILD(*tree, 0), source);
if(isupper(new_str[0])) return pattern_ptr(new pattern_cons(new_str));
else return pattern_ptr(new pattern_var(new_str));
} else {
std::string cons_name = tree_str(PGS_TREE_NT_CHILD(*tree, 0), source);
pattern_cons* new_pattern = new pattern_cons(cons_name);
pattern_ptr ptr = pattern_ptr(new_pattern);
pgs_tree* patterns = PGS_TREE_NT_CHILD(*tree, 2);
while(PGS_TREE_NT_COUNT(*patterns) > 1) {
new_pattern->vnames.push_back(tree_str(PGS_TREE_NT_CHILD(*patterns, 0), source));
patterns = PGS_TREE_NT_CHILD(*patterns, 2);
}
new_pattern->vnames.push_back(tree_str(PGS_TREE_NT_CHILD(*patterns, 0), source));
return ptr;
}
}
static ast_case::branch expr_branch(pgs_tree* c, const char* source) {
ast_case::branch branch;
branch.pattern = pattern_tree(PGS_TREE_NT_CHILD(*c, 0), source);
branch.expr = expr_tree(PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*c, 2), 1), source);
return branch;
}
static ast_ptr expr_case(pgs_tree* c, const char* source) {
ast_case* new_case = new ast_case();
ast_ptr case_ptr = ast_ptr(new_case);
new_case->of = expr_tree(PGS_TREE_NT_CHILD(*c, 1), source);
pgs_tree* branches = PGS_TREE_NT_CHILD(*c, 4);
while(PGS_TREE_NT_COUNT(*branches) == 2) {
pgs_tree* branch = PGS_TREE_NT_CHILD(*branches, 0);
branches = PGS_TREE_NT_CHILD(*branches, 1);
new_case->branches.push_back(expr_branch(branch, source));
}
new_case->branches.push_back(expr_branch(PGS_TREE_NT_CHILD(*branches, 0), source));
return case_ptr;
}
static ast_ptr expr_tree(pgs_tree* body, const char* source) {
pgs_tree* expr = PGS_TREE_NT_CHILD(*body, 0);
int type = PGS_TREE_NT(*expr);
if(type == PGS_NONTERMINAL_EXPR_ADD) {
return expr_add(expr, source);
} else if(type == PGS_NONTERMINAL_EXPR_LET) {
return expr_let<ast_let, 0>(expr, source);
} else if(type == PGS_NONTERMINAL_EXPR_LETREC) {
return expr_let<ast_letrec, 1>(expr, source);
} else if(type == PGS_NONTERMINAL_EXPR_CASE) {
return expr_case(expr, source);
}
throw error("unknown expression type");
}
static void collect_params(std::vector<std::string>& into, pgs_tree* def, const char* source) {
if(PGS_TREE_NT_COUNT(*def) == 5) {
pgs_tree* params = PGS_TREE_NT_CHILD(*def, 2);
do {
pgs_tree* param = PGS_TREE_NT_CHILD(*params, 0);
params =
(PGS_TREE_NT_COUNT(*params) == 2) ? PGS_TREE_NT_CHILD(*params, 1) : nullptr;
size_t from = PGS_TREE_T_FROM(*param);
size_t to = PGS_TREE_T_TO(*param);
std::string new_string =
std::string(source + from, to - from);
into.push_back(new_string);
} while(params);
}
}
static type* type_tree(program& prog, pgs_tree* type, const char* source) {
std::string str = tree_str(PGS_TREE_NT_CHILD(*type, 0), source);
return prog.type_mgr.require_type(str);
}
static void collect_type_params(program& prog, std::vector<type*>& into, pgs_tree* params, const char* source) {
while(true) {
pgs_tree* param = PGS_TREE_NT_CHILD(*params, 0);
into.push_back(type_tree(prog, param, source));
if(PGS_TREE_NT_COUNT(*params) == 1) break;
params = PGS_TREE_NT_CHILD(*params, 2);
}
}
static void collect_constructors(program& prog, type_data* parent, pgs_tree* def, const char* source) {
int id = 0;
while(true) {
pgs_tree* elem = PGS_TREE_NT_CHILD(*def, 0);
std::vector<type*> params;
std::string name = tree_str(PGS_TREE_NT_CHILD(*elem, 0), source);
if(PGS_TREE_NT_COUNT(*elem) > 1)
collect_type_params(prog, params, PGS_TREE_NT_CHILD(*elem, 2), source);
parent->create_constructor(name, std::move(params));
if(PGS_TREE_NT_COUNT(*def) == 1) break;
def = PGS_TREE_NT_CHILD(*def, 2);
}
}
static void add_definition(program& prog, pgs_tree* def, const char* source) {
size_t str_from = PGS_TREE_T_FROM(*PGS_TREE_NT_CHILD(*def, 0));
if(source[str_from]== 't') {
// skip for now
} else if(source[str_from + 1] == 'e') {
std::string function_name = tree_str(PGS_TREE_NT_CHILD(*def, 1), source);
if(prog.functions.count(function_name)) throw error("cannot redefine function");
function new_function;
collect_params(new_function.params, def, source);
new_function.body = expr_tree(PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*def, PGS_TREE_NT_COUNT(*def) - 1), 1), source);
prog.functions[function_name] = std::move(new_function);
} else {
std::string data_name = tree_str(PGS_TREE_NT_CHILD(*def, 1), source);
type_data* new_type = prog.type_mgr.create_data_type(data_name);
collect_constructors(prog, new_type,
PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*def, PGS_TREE_NT_COUNT(*def) - 1), 1), source);
}
}
static program_ptr build_program(pgs_tree* tree, const char* source) {
program_ptr prog = std::unique_ptr<program>(new program);
pgs_tree* program = PGS_TREE_NT_CHILD(*tree, 0);
do {
pgs_tree* definition = PGS_TREE_NT_CHILD(*program, 0);
program =
(PGS_TREE_NT_COUNT(*program) == 2) ? PGS_TREE_NT_CHILD(*program, 1) : nullptr;
add_definition(*prog, definition, source);
} while(program);
return prog;
}
program_ptr parse(std::string s) {
pgs_state state;
pgs_tree* into;
pgs_error err;
pgs_state_init(&state);
if((err = pgs_do_all(&state, &into, s.c_str())) != PGS_NONE) {
throw error("failed to parse input string");
}
program_ptr prog = build_program(into, s.c_str());
prog->check();
pgs_free_tree(into);
return prog;
}
program::program() {
type_data* data = type_mgr.create_data_type("Bool");
std::vector<type*> params;
data->create_constructor("True", std::move(params));
data->create_constructor("False", std::move(params));
}
void program::check() {
// Each function has an environment, which will be used
// as base for type checking.
std::map<std::string, std::shared_ptr<type_env>> function_envs;
std::map<std::string, type*> function_output_types;
auto base_env = std::make_shared<type_env>();
// First step is to collect all function types.
for(auto& pair : functions) {
// Create a local function environment.
auto function_env = std::make_shared<type_env>();
function_env->set_parent(base_env.get());
function_envs[pair.first] = function_env;
// We'll be building up the function type.
// Create the return type parameter
type* return_type = type_mgr.create_type<type_parameter>();
type* current_type = return_type;
function_output_types[pair.first] = return_type;
// Create type parameters for every variable
// We also want to place them in a local environment
for(int i = 0; i < pair.second.params.size(); i++) {
const std::string& str =
pair.second.params[pair.second.params.size() - i - 1];
type* variable_param = type_mgr.create_type<type_parameter>();
function_env->set_type(str, variable_param);
current_type =
type_mgr.create_type<type_func>(variable_param, current_type);
}
// Store function in env.
base_env->set_type(pair.first, current_type);
}
// Also add internal functions
type* eq_return_type = type_mgr.require_type("Bool");
type* eq_param_type = type_mgr.require_type("Int");
type* eq_app_1 = type_mgr.create_type<type_func>(eq_param_type, eq_return_type);
type* eq_app_2 = type_mgr.create_type<type_func>(eq_param_type, eq_app_1);
base_env->set_type("eq", eq_app_2);
// We also want to gather all the constructor calls.
type_mgr.register_constructors(base_env);
// Now that we have collected the functions, check their bodies.
for(auto& pair : functions) {
type* body_type =
pair.second.body->typecheck(type_mgr, function_envs[pair.first]);
if(!function_output_types[pair.first]->unify_with(body_type))
throw error("unable to unify function type");
}
}
template <binop o>
static void generate_binop(instruction_manager& mgr, std::map<std::string, std::vector<instruction*>>& into) {
std::vector<instruction*> dest;
dest.push_back(mgr.add_instruction<instruction_push>(1));
dest.push_back(mgr.add_instruction<instruction_eval>());
dest.push_back(mgr.add_instruction<instruction_push>(1));
dest.push_back(mgr.add_instruction<instruction_eval>());
dest.push_back(mgr.add_instruction<instruction_op>(o));
dest.push_back(mgr.add_instruction<instruction_update>(2));
dest.push_back(mgr.add_instruction<instruction_pop>(2));
into[op_supercombinator(o)] = std::move(dest);
}
static void register_internal(instruction_manager& mgr, std::map<std::string, std::vector<instruction*>>& into) {
generate_binop<binop::add>(mgr, into);
generate_binop<binop::subtract>(mgr, into);
generate_binop<binop::times>(mgr, into);
generate_binop<binop::divide>(mgr, into);
std::vector<instruction*> dest;
dest.push_back(mgr.add_instruction<instruction_push>(1));
dest.push_back(mgr.add_instruction<instruction_eval>());
dest.push_back(mgr.add_instruction<instruction_push>(1));
dest.push_back(mgr.add_instruction<instruction_eval>());
dest.push_back(mgr.add_instruction<instruction_eq>());
dest.push_back(mgr.add_instruction<instruction_update>(2));
dest.push_back(mgr.add_instruction<instruction_pop>(2));
into["eq"] = std::move(dest);
}
void program::compile(instruction_manager& mgr, std::map<std::string, std::vector<instruction*>>& into) {
register_internal(mgr, into);
type_mgr.register_constructor_supercombs(mgr, into);
for(auto& pair : functions) {
std::shared_ptr<compile_env> fresh_env = std::make_shared<compile_env_offset>(0);
size_t count = pair.second.params.size();
for(size_t i = 0; i < count; i++) {
auto new_env = std::make_shared<compile_env_var>(pair.second.params[count - i - 1]);
new_env->set_parent(fresh_env);
fresh_env = new_env;
}
std::vector<instruction*> destination;
pair.second.body->compile(mgr, destination, fresh_env);
destination.push_back(mgr.add_instruction<instruction_update>(count));
destination.push_back(mgr.add_instruction<instruction_pop>(count));
destination.push_back(mgr.add_instruction<instruction_unwind>());
into[pair.first] = std::move(destination);
}
}
void program::gen_llvm() {
llvm_init();
llvm_context ctx;
instruction_manager mgr;
std::map<std::string, std::vector<instruction*>> gcode;
compile(mgr, gcode);
for(auto& pair : gcode) {
int arity = functions.count(pair.first) ? functions[pair.first].params.size() : type_mgr.constructor_arity(pair.first);
if(arity == -1) arity = 2;
ctx.add_supercombinator(pair.first, arity);
}
for(auto& pair : gcode) {
std::vector<instruction*>& comb_gcode = pair.second;
llvm::Function* current_function = ctx.get_supercombinator_function(pair.first);
ctx.set_current_function(current_function);
llvm::BasicBlock* new_block =
llvm::BasicBlock::Create(context, "entry", current_function);
builder.SetInsertPoint(new_block);
for(auto& op : comb_gcode) {
op->gen_llvm(ctx);
}
builder.CreateRetVoid();
}
llvm::verifyModule(module, &llvm::outs());
llvm_generate("lily.o");
module.print(llvm::outs(), NULL);
}
}