367 lines
15 KiB
C++
367 lines
15 KiB
C++
extern "C" {
|
|
#include "parser.h"
|
|
}
|
|
#include "parser.hpp"
|
|
#include "pattern.hpp"
|
|
#include "type_checker.hpp"
|
|
#include "llvm.hpp"
|
|
#include "llvm/IR/Verifier.h"
|
|
#include <memory>
|
|
|
|
namespace lily {
|
|
std::string tree_str(pgs_tree* tree, const char* source) {
|
|
size_t from = PGS_TREE_T_FROM(*tree);
|
|
size_t to = PGS_TREE_T_TO(*tree);
|
|
return std::string(source + from, to - from);
|
|
}
|
|
|
|
static ast_ptr expr_tree(pgs_tree*, const char*);
|
|
|
|
static ast_ptr expr_app_bottom(pgs_tree* base, const char* source) {
|
|
if(PGS_TREE_NT_COUNT(*base) == 2) {
|
|
return expr_tree(PGS_TREE_NT_CHILD(*base, 1), source);
|
|
} else {
|
|
pgs_tree* child = PGS_TREE_NT_CHILD(*base, 0);
|
|
if(isdigit(source[PGS_TREE_T_FROM(*child)])) {
|
|
return ast_ptr(new ast_num(atoi(source + PGS_TREE_T_FROM(*child))));
|
|
} else {
|
|
return ast_ptr(new ast_var(tree_str(child, source)));
|
|
}
|
|
}
|
|
}
|
|
|
|
static ast_ptr expr_app(pgs_tree* app, const char* source) {
|
|
ast_ptr into;
|
|
ast_ptr* into_ptr = &into;
|
|
while(PGS_TREE_NT_COUNT(*app) > 1) {
|
|
ast_app* new_app = new ast_app(nullptr, nullptr);
|
|
*into_ptr = ast_ptr(new_app);
|
|
into_ptr = &new_app->left;
|
|
new_app->right = expr_app_bottom(PGS_TREE_NT_CHILD(*app, 1), source);
|
|
app = PGS_TREE_NT_CHILD(*app, 0);
|
|
}
|
|
*into_ptr = expr_app_bottom(PGS_TREE_NT_CHILD(*app, 0), source);
|
|
|
|
return into;
|
|
}
|
|
|
|
static ast_ptr expr_mul(pgs_tree* mul, const char* source) {
|
|
if(PGS_TREE_NT_COUNT(*mul) == 1) {
|
|
return expr_app(PGS_TREE_NT_CHILD(*mul, 0), source);
|
|
} else {
|
|
pgs_tree* left = PGS_TREE_NT_CHILD(*mul, 0);
|
|
pgs_tree* right = PGS_TREE_NT_CHILD(*mul, 2);
|
|
pgs_tree* op = PGS_TREE_NT_CHILD(*mul, 1);
|
|
|
|
enum binop o =
|
|
source[PGS_TREE_T_FROM(*op)] == '*' ? binop::times : binop::divide;
|
|
ast_ptr aleft = expr_mul(left, source);
|
|
ast_ptr aright = expr_app(right, source);
|
|
return ast_ptr(new ast_op(o, std::move(aleft), std::move(aright)));
|
|
}
|
|
}
|
|
|
|
static ast_ptr expr_add(pgs_tree* add, const char* source) {
|
|
if(PGS_TREE_NT_COUNT(*add) == 1) {
|
|
return expr_mul(PGS_TREE_NT_CHILD(*add, 0), source);
|
|
} else {
|
|
pgs_tree* left = PGS_TREE_NT_CHILD(*add, 0);
|
|
pgs_tree* right = PGS_TREE_NT_CHILD(*add, 2);
|
|
pgs_tree* op = PGS_TREE_NT_CHILD(*add, 1);
|
|
|
|
enum binop o =
|
|
source[PGS_TREE_T_FROM(*op)] == '+' ? binop::add : binop::subtract;
|
|
ast_ptr aleft = expr_add(left, source);
|
|
ast_ptr aright = expr_mul(right, source);
|
|
return ast_ptr(new ast_op(o, std::move(aleft), std::move(aright)));
|
|
}
|
|
}
|
|
|
|
template <typename T, int Offset>
|
|
static ast_ptr expr_let(pgs_tree* let, const char* source) {
|
|
std::string name = tree_str(PGS_TREE_NT_CHILD(*let, Offset + 1), source);
|
|
ast_ptr eq_expr = expr_tree(PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*let, Offset + 3), 1), source);
|
|
ast_ptr in_expr = expr_tree(PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*let, Offset + 5), 1), source);
|
|
return ast_ptr(new T(name, std::move(eq_expr), std::move(in_expr)));
|
|
}
|
|
|
|
static pattern_ptr pattern_tree(pgs_tree* tree, const char* source) {
|
|
if(PGS_TREE_NT_COUNT(*tree) == 1) {
|
|
std::string new_str = tree_str(PGS_TREE_NT_CHILD(*tree, 0), source);
|
|
if(isupper(new_str[0])) return pattern_ptr(new pattern_cons(new_str));
|
|
else return pattern_ptr(new pattern_var(new_str));
|
|
} else {
|
|
std::string cons_name = tree_str(PGS_TREE_NT_CHILD(*tree, 0), source);
|
|
pattern_cons* new_pattern = new pattern_cons(cons_name);
|
|
pattern_ptr ptr = pattern_ptr(new_pattern);
|
|
|
|
pgs_tree* patterns = PGS_TREE_NT_CHILD(*tree, 2);
|
|
while(PGS_TREE_NT_COUNT(*patterns) > 1) {
|
|
new_pattern->vnames.push_back(tree_str(PGS_TREE_NT_CHILD(*patterns, 0), source));
|
|
patterns = PGS_TREE_NT_CHILD(*patterns, 2);
|
|
}
|
|
new_pattern->vnames.push_back(tree_str(PGS_TREE_NT_CHILD(*patterns, 0), source));
|
|
return ptr;
|
|
}
|
|
}
|
|
|
|
static ast_case::branch expr_branch(pgs_tree* c, const char* source) {
|
|
ast_case::branch branch;
|
|
|
|
branch.pattern = pattern_tree(PGS_TREE_NT_CHILD(*c, 0), source);
|
|
branch.expr = expr_tree(PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*c, 2), 1), source);
|
|
return branch;
|
|
}
|
|
|
|
static ast_ptr expr_case(pgs_tree* c, const char* source) {
|
|
ast_case* new_case = new ast_case();
|
|
ast_ptr case_ptr = ast_ptr(new_case);
|
|
new_case->of = expr_tree(PGS_TREE_NT_CHILD(*c, 1), source);
|
|
|
|
pgs_tree* branches = PGS_TREE_NT_CHILD(*c, 4);
|
|
while(PGS_TREE_NT_COUNT(*branches) == 2) {
|
|
pgs_tree* branch = PGS_TREE_NT_CHILD(*branches, 0);
|
|
branches = PGS_TREE_NT_CHILD(*branches, 1);
|
|
|
|
new_case->branches.push_back(expr_branch(branch, source));
|
|
}
|
|
new_case->branches.push_back(expr_branch(PGS_TREE_NT_CHILD(*branches, 0), source));
|
|
|
|
return case_ptr;
|
|
}
|
|
|
|
static ast_ptr expr_tree(pgs_tree* body, const char* source) {
|
|
pgs_tree* expr = PGS_TREE_NT_CHILD(*body, 0);
|
|
int type = PGS_TREE_NT(*expr);
|
|
if(type == PGS_NONTERMINAL_EXPR_ADD) {
|
|
return expr_add(expr, source);
|
|
} else if(type == PGS_NONTERMINAL_EXPR_LET) {
|
|
return expr_let<ast_let, 0>(expr, source);
|
|
} else if(type == PGS_NONTERMINAL_EXPR_LETREC) {
|
|
return expr_let<ast_letrec, 1>(expr, source);
|
|
} else if(type == PGS_NONTERMINAL_EXPR_CASE) {
|
|
return expr_case(expr, source);
|
|
}
|
|
|
|
throw error("unknown expression type");
|
|
}
|
|
|
|
static void collect_params(std::vector<std::string>& into, pgs_tree* def, const char* source) {
|
|
if(PGS_TREE_NT_COUNT(*def) == 5) {
|
|
pgs_tree* params = PGS_TREE_NT_CHILD(*def, 2);
|
|
do {
|
|
pgs_tree* param = PGS_TREE_NT_CHILD(*params, 0);
|
|
params =
|
|
(PGS_TREE_NT_COUNT(*params) == 2) ? PGS_TREE_NT_CHILD(*params, 1) : nullptr;
|
|
|
|
size_t from = PGS_TREE_T_FROM(*param);
|
|
size_t to = PGS_TREE_T_TO(*param);
|
|
std::string new_string =
|
|
std::string(source + from, to - from);
|
|
|
|
into.push_back(new_string);
|
|
} while(params);
|
|
}
|
|
}
|
|
|
|
static type* type_tree(program& prog, pgs_tree* type, const char* source) {
|
|
std::string str = tree_str(PGS_TREE_NT_CHILD(*type, 0), source);
|
|
return prog.type_mgr.require_type(str);
|
|
}
|
|
|
|
static void collect_type_params(program& prog, std::vector<type*>& into, pgs_tree* params, const char* source) {
|
|
while(true) {
|
|
pgs_tree* param = PGS_TREE_NT_CHILD(*params, 0);
|
|
into.push_back(type_tree(prog, param, source));
|
|
|
|
if(PGS_TREE_NT_COUNT(*params) == 1) break;
|
|
params = PGS_TREE_NT_CHILD(*params, 2);
|
|
}
|
|
}
|
|
|
|
static void collect_constructors(program& prog, type_data* parent, pgs_tree* def, const char* source) {
|
|
int id = 0;
|
|
while(true) {
|
|
pgs_tree* elem = PGS_TREE_NT_CHILD(*def, 0);
|
|
std::vector<type*> params;
|
|
std::string name = tree_str(PGS_TREE_NT_CHILD(*elem, 0), source);
|
|
if(PGS_TREE_NT_COUNT(*elem) > 1)
|
|
collect_type_params(prog, params, PGS_TREE_NT_CHILD(*elem, 2), source);
|
|
parent->create_constructor(name, std::move(params));
|
|
|
|
if(PGS_TREE_NT_COUNT(*def) == 1) break;
|
|
def = PGS_TREE_NT_CHILD(*def, 2);
|
|
}
|
|
}
|
|
|
|
static void add_definition(program& prog, pgs_tree* def, const char* source) {
|
|
size_t str_from = PGS_TREE_T_FROM(*PGS_TREE_NT_CHILD(*def, 0));
|
|
if(source[str_from]== 't') {
|
|
// skip for now
|
|
} else if(source[str_from + 1] == 'e') {
|
|
std::string function_name = tree_str(PGS_TREE_NT_CHILD(*def, 1), source);
|
|
if(prog.functions.count(function_name)) throw error("cannot redefine function");
|
|
|
|
function new_function;
|
|
collect_params(new_function.params, def, source);
|
|
new_function.body = expr_tree(PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*def, PGS_TREE_NT_COUNT(*def) - 1), 1), source);
|
|
|
|
prog.functions[function_name] = std::move(new_function);
|
|
} else {
|
|
std::string data_name = tree_str(PGS_TREE_NT_CHILD(*def, 1), source);
|
|
type_data* new_type = prog.type_mgr.create_data_type(data_name);
|
|
collect_constructors(prog, new_type,
|
|
PGS_TREE_NT_CHILD(*PGS_TREE_NT_CHILD(*def, PGS_TREE_NT_COUNT(*def) - 1), 1), source);
|
|
}
|
|
}
|
|
|
|
static program_ptr build_program(pgs_tree* tree, const char* source) {
|
|
program_ptr prog = std::make_unique<program>();
|
|
pgs_tree* program = PGS_TREE_NT_CHILD(*tree, 0);
|
|
|
|
do {
|
|
pgs_tree* definition = PGS_TREE_NT_CHILD(*program, 0);
|
|
program =
|
|
(PGS_TREE_NT_COUNT(*program) == 2) ? PGS_TREE_NT_CHILD(*program, 1) : nullptr;
|
|
|
|
add_definition(*prog, definition, source);
|
|
} while(program);
|
|
|
|
return prog;
|
|
}
|
|
|
|
program_ptr parse(std::string s) {
|
|
pgs_state state;
|
|
pgs_tree* into;
|
|
pgs_error err;
|
|
|
|
pgs_state_init(&state);
|
|
if((err = pgs_do_all(&state, &into, s.c_str())) != PGS_NONE) {
|
|
throw error("failed to parse input string");
|
|
}
|
|
|
|
program_ptr prog = build_program(into, s.c_str());
|
|
prog->check();
|
|
pgs_free_tree(into);
|
|
return prog;
|
|
}
|
|
|
|
void program::check() {
|
|
// Each function has an environment, which will be used
|
|
// as base for type checking.
|
|
std::map<std::string, std::shared_ptr<type_env>> function_envs;
|
|
std::map<std::string, type*> function_output_types;
|
|
auto base_env = std::make_shared<type_env>();
|
|
|
|
// First step is to collect all function types.
|
|
for(auto& pair : functions) {
|
|
// Create a local function environment.
|
|
auto function_env = std::make_shared<type_env>();
|
|
function_env->set_parent(base_env.get());
|
|
function_envs[pair.first] = function_env;
|
|
|
|
// We'll be building up the function type.
|
|
// Create the return type parameter
|
|
type* return_type = type_mgr.create_type<type_parameter>();
|
|
type* current_type = return_type;
|
|
function_output_types[pair.first] = return_type;
|
|
|
|
// Create type parameters for every variable
|
|
// We also want to place them in a local environment
|
|
for(int i = 0; i < pair.second.params.size(); i++) {
|
|
const std::string& str =
|
|
pair.second.params[pair.second.params.size() - i - 1];
|
|
type* variable_param = type_mgr.create_type<type_parameter>();
|
|
function_env->set_type(str, variable_param);
|
|
current_type =
|
|
type_mgr.create_type<type_func>(variable_param, current_type);
|
|
}
|
|
|
|
// Store function in env.
|
|
base_env->set_type(pair.first, current_type);
|
|
}
|
|
|
|
// We also want to gather all the constructor calls.
|
|
type_mgr.register_constructors(base_env);
|
|
|
|
// Now that we have collected the functions, check their bodies.
|
|
for(auto& pair : functions) {
|
|
type* body_type =
|
|
pair.second.body->check(type_mgr, function_envs[pair.first]);
|
|
if(!function_output_types[pair.first]->unify_with(body_type))
|
|
throw error("unable to unify function type");
|
|
}
|
|
}
|
|
|
|
template <binop o>
|
|
static void generate_binop(instruction_manager& mgr, std::map<std::string, std::vector<instruction*>>& into) {
|
|
std::vector<instruction*> dest;
|
|
dest.push_back(mgr.add_instruction<instruction_push>(1));
|
|
dest.push_back(mgr.add_instruction<instruction_eval>());
|
|
dest.push_back(mgr.add_instruction<instruction_push>(1));
|
|
dest.push_back(mgr.add_instruction<instruction_eval>());
|
|
dest.push_back(mgr.add_instruction<instruction_op>(o));
|
|
dest.push_back(mgr.add_instruction<instruction_update>(2));
|
|
dest.push_back(mgr.add_instruction<instruction_pop>(2));
|
|
into[op_supercombinator(o)] = std::move(dest);
|
|
}
|
|
|
|
static void register_internal(instruction_manager& mgr, std::map<std::string, std::vector<instruction*>>& into) {
|
|
generate_binop<binop::add>(mgr, into);
|
|
generate_binop<binop::subtract>(mgr, into);
|
|
generate_binop<binop::times>(mgr, into);
|
|
generate_binop<binop::divide>(mgr, into);
|
|
}
|
|
|
|
void program::compile(instruction_manager& mgr, std::map<std::string, std::vector<instruction*>>& into) {
|
|
register_internal(mgr, into);
|
|
for(auto& pair : functions) {
|
|
std::shared_ptr<compile_env> fresh_env = std::make_shared<compile_env_offset>(0);
|
|
size_t count = pair.second.params.size();
|
|
for(size_t i = 0; i < count; i++) {
|
|
auto new_env = std::make_shared<compile_env_var>(pair.second.params[count - i - 1]);
|
|
new_env->set_parent(fresh_env);
|
|
fresh_env = new_env;
|
|
}
|
|
|
|
std::vector<instruction*> destination;
|
|
pair.second.body->compile(mgr, destination, fresh_env);
|
|
destination.push_back(mgr.add_instruction<instruction_update>(count));
|
|
destination.push_back(mgr.add_instruction<instruction_pop>(count));
|
|
destination.push_back(mgr.add_instruction<instruction_unwind>());
|
|
into[pair.first] = std::move(destination);
|
|
}
|
|
}
|
|
|
|
void program::gen_llvm() {
|
|
llvm_init();
|
|
llvm_context ctx;
|
|
instruction_manager mgr;
|
|
std::map<std::string, std::vector<instruction*>> gcode;
|
|
|
|
compile(mgr, gcode);
|
|
|
|
for(auto& pair : gcode) {
|
|
int arity = functions.count(pair.first) ? functions[pair.first].params.size() : 2;
|
|
ctx.add_supercombinator(pair.first, arity);
|
|
}
|
|
|
|
for(auto& pair : gcode) {
|
|
std::vector<instruction*>& comb_gcode = pair.second;
|
|
llvm::Function* current_function = ctx.get_supercombinator_function(pair.first);
|
|
ctx.set_current_function(current_function);
|
|
llvm::BasicBlock* new_block =
|
|
llvm::BasicBlock::Create(context, "entry", current_function);
|
|
builder.SetInsertPoint(new_block);
|
|
for(auto& op : comb_gcode) {
|
|
op->gen_llvm(ctx);
|
|
}
|
|
builder.CreateRetVoid();
|
|
}
|
|
|
|
llvm::verifyModule(module, &llvm::outs());
|
|
llvm_generate("lily.o");
|
|
module.print(llvm::outs(), NULL);
|
|
}
|
|
}
|