extern "C" { #include "parser.h" } #include "parser.hpp" #include "pattern.hpp" #include "type_checker.hpp" #include "llvm.hpp" #include "llvm/IR/Verifier.h" #include namespace lily { std::string tree_str(pgs_tree* tree, const char* source) { size_t from = PGS_TREE_T_FROM(*tree); size_t to = PGS_TREE_T_TO(*tree); return std::string(source + from, to - from); } static ast_ptr expr_tree(pgs_tree*, const char*); static ast_ptr expr_app_bottom(pgs_tree* base, const char* source) { if(PGS_TREE_NT_COUNT(*base) == 3) { return expr_tree(PGS_TREE_NT_CHILD(*base, 1), source); } else { pgs_tree* child = PGS_TREE_NT_CHILD(*base, 0); if(isdigit(source[PGS_TREE_T_FROM(*child)])) { return ast_ptr(new ast_num(atoi(source + PGS_TREE_T_FROM(*child)))); } else { return ast_ptr(new ast_var(tree_str(child, source))); } } } static ast_ptr expr_app(pgs_tree* app, const char* source) { ast_ptr into; ast_ptr* into_ptr = &into; while(PGS_TREE_NT_COUNT(*app) > 1) { ast_app* new_app = new ast_app(nullptr, nullptr); *into_ptr = ast_ptr(new_app); into_ptr = &new_app->left; new_app->right = expr_app_bottom(PGS_TREE_NT_CHILD(*app, 1), source); app = PGS_TREE_NT_CHILD(*app, 0); } *into_ptr = expr_app_bottom(PGS_TREE_NT_CHILD(*app, 0), source); return into; } static ast_ptr expr_mul(pgs_tree* mul, const char* source) { if(PGS_TREE_NT_COUNT(*mul) == 1) { return expr_app(PGS_TREE_NT_CHILD(*mul, 0), source); } else { pgs_tree* left = PGS_TREE_NT_CHILD(*mul, 0); pgs_tree* right = PGS_TREE_NT_CHILD(*mul, 2); pgs_tree* op = PGS_TREE_NT_CHILD(*mul, 1); enum binop o = source[PGS_TREE_T_FROM(*op)] == '*' ? binop::times : binop::divide; ast_ptr aleft = expr_mul(left, source); ast_ptr aright = expr_app(right, source); return ast_ptr(new ast_op(o, std::move(aleft), std::move(aright))); } } static ast_ptr expr_add(pgs_tree* add, const char* source) { if(PGS_TREE_NT_COUNT(*add) == 1) { return expr_mul(PGS_TREE_NT_CHILD(*add, 0), source); } else { pgs_tree* left = PGS_TREE_NT_CHILD(*add, 0); pgs_tree* right = PGS_TREE_NT_CHILD(*add, 2); pgs_tree* op = PGS_TREE_NT_CHILD(*add, 1); enum binop o = source[PGS_TREE_T_FROM(*op)] == '+' ? binop::add : binop::subtract; ast_ptr aleft = expr_add(left, source); ast_ptr aright = expr_mul(right, source); return ast_ptr(new ast_op(o, std::move(aleft), std::move(aright))); } } template static ast_ptr expr_let(pgs_tree* let, const char* source) { std::string name = tree_str(PGS_TREE_NT_CHILD(*let, Offset + 1), source); ast_ptr eq_expr = expr_tree(PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*let, Offset + 3), 1), source); ast_ptr in_expr = expr_tree(PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*let, Offset + 5), 1), source); return ast_ptr(new T(name, std::move(eq_expr), std::move(in_expr))); } static pattern_ptr pattern_tree(pgs_tree* tree, const char* source) { if(PGS_TREE_NT_COUNT(*tree) == 1) { std::string new_str = tree_str(PGS_TREE_NT_CHILD(*tree, 0), source); if(isupper(new_str[0])) return pattern_ptr(new pattern_cons(new_str)); else return pattern_ptr(new pattern_var(new_str)); } else { std::string cons_name = tree_str(PGS_TREE_NT_CHILD(*tree, 0), source); pattern_cons* new_pattern = new pattern_cons(cons_name); pattern_ptr ptr = pattern_ptr(new_pattern); pgs_tree* patterns = PGS_TREE_NT_CHILD(*tree, 2); while(PGS_TREE_NT_COUNT(*patterns) > 1) { new_pattern->vnames.push_back(tree_str(PGS_TREE_NT_CHILD(*patterns, 0), source)); patterns = PGS_TREE_NT_CHILD(*patterns, 2); } new_pattern->vnames.push_back(tree_str(PGS_TREE_NT_CHILD(*patterns, 0), source)); return ptr; } } static ast_case::branch expr_branch(pgs_tree* c, const char* source) { ast_case::branch branch; branch.pattern = pattern_tree(PGS_TREE_NT_CHILD(*c, 0), source); branch.expr = expr_tree(PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*c, 2), 1), source); return branch; } static ast_ptr expr_case(pgs_tree* c, const char* source) { ast_case* new_case = new ast_case(); ast_ptr case_ptr = ast_ptr(new_case); new_case->of = expr_tree(PGS_TREE_NT_CHILD(*c, 1), source); pgs_tree* branches = PGS_TREE_NT_CHILD(*c, 4); while(PGS_TREE_NT_COUNT(*branches) == 2) { pgs_tree* branch = PGS_TREE_NT_CHILD(*branches, 0); branches = PGS_TREE_NT_CHILD(*branches, 1); new_case->branches.push_back(expr_branch(branch, source)); } new_case->branches.push_back(expr_branch(PGS_TREE_NT_CHILD(*branches, 0), source)); return case_ptr; } static ast_ptr expr_tree(pgs_tree* body, const char* source) { pgs_tree* expr = PGS_TREE_NT_CHILD(*body, 0); int type = PGS_TREE_NT(*expr); if(type == PGS_NONTERMINAL_EXPR_ADD) { return expr_add(expr, source); } else if(type == PGS_NONTERMINAL_EXPR_LET) { return expr_let(expr, source); } else if(type == PGS_NONTERMINAL_EXPR_LETREC) { return expr_let(expr, source); } else if(type == PGS_NONTERMINAL_EXPR_CASE) { return expr_case(expr, source); } throw error("unknown expression type"); } static void collect_params(std::vector& into, pgs_tree* def, const char* source) { if(PGS_TREE_NT_COUNT(*def) == 5) { pgs_tree* params = PGS_TREE_NT_CHILD(*def, 2); do { pgs_tree* param = PGS_TREE_NT_CHILD(*params, 0); params = (PGS_TREE_NT_COUNT(*params) == 2) ? PGS_TREE_NT_CHILD(*params, 1) : nullptr; size_t from = PGS_TREE_T_FROM(*param); size_t to = PGS_TREE_T_TO(*param); std::string new_string = std::string(source + from, to - from); into.push_back(new_string); } while(params); } } static type* type_tree(program& prog, pgs_tree* type, const char* source) { std::string str = tree_str(PGS_TREE_NT_CHILD(*type, 0), source); return prog.type_mgr.require_type(str); } static void collect_type_params(program& prog, std::vector& into, pgs_tree* params, const char* source) { while(true) { pgs_tree* param = PGS_TREE_NT_CHILD(*params, 0); into.push_back(type_tree(prog, param, source)); if(PGS_TREE_NT_COUNT(*params) == 1) break; params = PGS_TREE_NT_CHILD(*params, 2); } } static void collect_constructors(program& prog, type_data* parent, pgs_tree* def, const char* source) { int id = 0; while(true) { pgs_tree* elem = PGS_TREE_NT_CHILD(*def, 0); std::vector params; std::string name = tree_str(PGS_TREE_NT_CHILD(*elem, 0), source); if(PGS_TREE_NT_COUNT(*elem) > 1) collect_type_params(prog, params, PGS_TREE_NT_CHILD(*elem, 2), source); parent->create_constructor(name, std::move(params)); if(PGS_TREE_NT_COUNT(*def) == 1) break; def = PGS_TREE_NT_CHILD(*def, 2); } } static void add_definition(program& prog, pgs_tree* def, const char* source) { size_t str_from = PGS_TREE_T_FROM(*PGS_TREE_NT_CHILD(*def, 0)); if(source[str_from]== 't') { // skip for now } else if(source[str_from + 1] == 'e') { std::string function_name = tree_str(PGS_TREE_NT_CHILD(*def, 1), source); if(prog.functions.count(function_name)) throw error("cannot redefine function"); function new_function; collect_params(new_function.params, def, source); new_function.body = expr_tree(PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*def, PGS_TREE_NT_COUNT(*def) - 1), 1), source); prog.functions[function_name] = std::move(new_function); } else { std::string data_name = tree_str(PGS_TREE_NT_CHILD(*def, 1), source); type_data* new_type = prog.type_mgr.create_data_type(data_name); collect_constructors(prog, new_type, PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*def, PGS_TREE_NT_COUNT(*def) - 1), 1), source); } } static program_ptr build_program(pgs_tree* tree, const char* source) { program_ptr prog = std::unique_ptr(new program); pgs_tree* program = PGS_TREE_NT_CHILD(*tree, 0); do { pgs_tree* definition = PGS_TREE_NT_CHILD(*program, 0); program = (PGS_TREE_NT_COUNT(*program) == 2) ? PGS_TREE_NT_CHILD(*program, 1) : nullptr; add_definition(*prog, definition, source); } while(program); return prog; } program_ptr parse(std::string s) { pgs_state state; pgs_tree* into; pgs_error err; pgs_state_init(&state); if((err = pgs_do_all(&state, &into, s.c_str())) != PGS_NONE) { throw error("failed to parse input string"); } program_ptr prog = build_program(into, s.c_str()); prog->check(); pgs_free_tree(into); return prog; } program::program() { type_data* data = type_mgr.create_data_type("Bool"); std::vector params; data->create_constructor("True", std::move(params)); data->create_constructor("False", std::move(params)); } void program::check() { // Each function has an environment, which will be used // as base for type checking. std::map> function_envs; std::map function_output_types; auto base_env = std::make_shared(); // First step is to collect all function types. for(auto& pair : functions) { // Create a local function environment. auto function_env = std::make_shared(); function_env->set_parent(base_env.get()); function_envs[pair.first] = function_env; // We'll be building up the function type. // Create the return type parameter type* return_type = type_mgr.create_type(); type* current_type = return_type; function_output_types[pair.first] = return_type; // Create type parameters for every variable // We also want to place them in a local environment for(int i = 0; i < pair.second.params.size(); i++) { const std::string& str = pair.second.params[pair.second.params.size() - i - 1]; type* variable_param = type_mgr.create_type(); function_env->set_type(str, variable_param); current_type = type_mgr.create_type(variable_param, current_type); } // Store function in env. base_env->set_type(pair.first, current_type); } // Also add internal functions type* eq_return_type = type_mgr.require_type("Bool"); type* eq_param_type = type_mgr.require_type("Int"); type* eq_app_1 = type_mgr.create_type(eq_param_type, eq_return_type); type* eq_app_2 = type_mgr.create_type(eq_param_type, eq_app_1); base_env->set_type("eq", eq_app_2); // We also want to gather all the constructor calls. type_mgr.register_constructors(base_env); // Now that we have collected the functions, check their bodies. for(auto& pair : functions) { type* body_type = pair.second.body->typecheck(type_mgr, function_envs[pair.first]); if(!function_output_types[pair.first]->unify_with(body_type)) throw error("unable to unify function type"); } } template static void generate_binop(instruction_manager& mgr, std::map>& into) { std::vector dest; dest.push_back(mgr.add_instruction(1)); dest.push_back(mgr.add_instruction()); dest.push_back(mgr.add_instruction(1)); dest.push_back(mgr.add_instruction()); dest.push_back(mgr.add_instruction(o)); dest.push_back(mgr.add_instruction(2)); dest.push_back(mgr.add_instruction(2)); into[op_supercombinator(o)] = std::move(dest); } static void register_internal(instruction_manager& mgr, std::map>& into) { generate_binop(mgr, into); generate_binop(mgr, into); generate_binop(mgr, into); generate_binop(mgr, into); std::vector dest; dest.push_back(mgr.add_instruction(1)); dest.push_back(mgr.add_instruction()); dest.push_back(mgr.add_instruction(1)); dest.push_back(mgr.add_instruction()); dest.push_back(mgr.add_instruction()); dest.push_back(mgr.add_instruction(2)); dest.push_back(mgr.add_instruction(2)); into["eq"] = std::move(dest); } void program::compile(instruction_manager& mgr, std::map>& into) { register_internal(mgr, into); type_mgr.register_constructor_supercombs(mgr, into); for(auto& pair : functions) { std::shared_ptr fresh_env = std::make_shared(0); size_t count = pair.second.params.size(); for(size_t i = 0; i < count; i++) { auto new_env = std::make_shared(pair.second.params[count - i - 1]); new_env->set_parent(fresh_env); fresh_env = new_env; } std::vector destination; pair.second.body->compile(mgr, destination, fresh_env); destination.push_back(mgr.add_instruction(count)); destination.push_back(mgr.add_instruction(count)); destination.push_back(mgr.add_instruction()); into[pair.first] = std::move(destination); } } void program::gen_llvm() { llvm_init(); llvm_context ctx; instruction_manager mgr; std::map> gcode; compile(mgr, gcode); for(auto& pair : gcode) { int arity = functions.count(pair.first) ? functions[pair.first].params.size() : type_mgr.constructor_arity(pair.first); if(arity == -1) arity = 2; ctx.add_supercombinator(pair.first, arity); } for(auto& pair : gcode) { std::vector& comb_gcode = pair.second; llvm::Function* current_function = ctx.get_supercombinator_function(pair.first); ctx.set_current_function(current_function); llvm::BasicBlock* new_block = llvm::BasicBlock::Create(context, "entry", current_function); builder.SetInsertPoint(new_block); for(auto& op : comb_gcode) { op->gen_llvm(ctx); } builder.CreateRetVoid(); } llvm::verifyModule(module, &llvm::outs()); llvm_generate("lily.o"); module.print(llvm::outs(), NULL); } }