diff --git a/09/CMakeLists.txt b/09/CMakeLists.txt new file mode 100644 index 0000000..1347f11 --- /dev/null +++ b/09/CMakeLists.txt @@ -0,0 +1,42 @@ +cmake_minimum_required(VERSION 3.1) +project(compiler) + +# Find all the required packages +find_package(BISON) +find_package(FLEX) +find_package(LLVM REQUIRED CONFIG) + +# Set up the flex and bison targets +bison_target(parser + ${CMAKE_CURRENT_SOURCE_DIR}/parser.y + ${CMAKE_CURRENT_BINARY_DIR}/parser.cpp + COMPILE_FLAGS "-d") +flex_target(scanner + ${CMAKE_CURRENT_SOURCE_DIR}/scanner.l + ${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp) +add_flex_bison_dependency(scanner parser) + +# Find all the relevant LLVM components +llvm_map_components_to_libnames(LLVM_LIBS core x86asmparser x86codegen) + +# Create compiler executable +add_executable(compiler + ast.cpp ast.hpp definition.cpp + llvm_context.cpp llvm_context.hpp + type_env.cpp type_env.hpp + env.cpp env.hpp + type.cpp type.hpp + error.cpp error.hpp + binop.cpp binop.hpp + instruction.cpp instruction.hpp + ${BISON_parser_OUTPUTS} + ${FLEX_scanner_OUTPUTS} + main.cpp +) + +# Configure compiler executable +target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) +target_include_directories(compiler PUBLIC ${LLVM_INCLUDE_DIRS}) +target_compile_definitions(compiler PUBLIC ${LLVM_DEFINITIONS}) +target_link_libraries(compiler ${LLVM_LIBS}) diff --git a/09/ast.cpp b/09/ast.cpp new file mode 100644 index 0000000..453a12b --- /dev/null +++ b/09/ast.cpp @@ -0,0 +1,264 @@ +#include "ast.hpp" +#include +#include "binop.hpp" +#include "error.hpp" + +static void print_indent(int n, std::ostream& to) { + while(n--) to << " "; +} + +type_ptr ast::typecheck_common(type_mgr& mgr, const type_env& env) { + node_type = typecheck(mgr, env); + return node_type; +} + +void ast::resolve_common(const type_mgr& mgr) { + type_var* var; + type_ptr resolved_type = mgr.resolve(node_type, var); + if(var) throw type_error("ambiguously typed program"); + + resolve(mgr); + node_type = std::move(resolved_type); +} + +void ast_int::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "INT: " << value << std::endl; +} + +type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) const { + return type_ptr(new type_base("Int")); +} + +void ast_int::resolve(const type_mgr& mgr) const { + +} + +void ast_int::compile(const env_ptr& env, std::vector& into) const { + into.push_back(instruction_ptr(new instruction_pushint(value))); +} + +void ast_lid::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "LID: " << id << std::endl; +} + +type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) const { + return env.lookup(id); +} + +void ast_lid::resolve(const type_mgr& mgr) const { + +} + +void ast_lid::compile(const env_ptr& env, std::vector& into) const { + into.push_back(instruction_ptr( + env->has_variable(id) ? + (instruction*) new instruction_push(env->get_offset(id)) : + (instruction*) new instruction_pushglobal(id))); +} + +void ast_uid::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "UID: " << id << std::endl; +} + +type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) const { + return env.lookup(id); +} + +void ast_uid::resolve(const type_mgr& mgr) const { + +} + +void ast_uid::compile(const env_ptr& env, std::vector& into) const { + into.push_back(instruction_ptr(new instruction_pushglobal(id))); +} + +void ast_binop::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "BINOP: " << op_name(op) << std::endl; + left->print(indent + 1, to); + right->print(indent + 1, to); +} + +type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const { + type_ptr ltype = left->typecheck_common(mgr, env); + type_ptr rtype = right->typecheck_common(mgr, env); + type_ptr ftype = env.lookup(op_name(op)); + if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op)); + + type_ptr return_type = mgr.new_type(); + type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type)); + type_ptr arrow_two = type_ptr(new type_arr(ltype, arrow_one)); + + mgr.unify(arrow_two, ftype); + return return_type; +} + +void ast_binop::resolve(const type_mgr& mgr) const { + left->resolve_common(mgr); + right->resolve_common(mgr); +} + +void ast_binop::compile(const env_ptr& env, std::vector& into) const { + right->compile(env, into); + left->compile(env_ptr(new env_offset(1, env)), into); + + into.push_back(instruction_ptr(new instruction_pushglobal(op_action(op)))); + into.push_back(instruction_ptr(new instruction_mkapp())); + into.push_back(instruction_ptr(new instruction_mkapp())); +} + +void ast_app::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "APP:" << std::endl; + left->print(indent + 1, to); + right->print(indent + 1, to); +} + +type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const { + type_ptr ltype = left->typecheck_common(mgr, env); + type_ptr rtype = right->typecheck_common(mgr, env); + + type_ptr return_type = mgr.new_type(); + type_ptr arrow = type_ptr(new type_arr(rtype, return_type)); + mgr.unify(arrow, ltype); + return return_type; +} + +void ast_app::resolve(const type_mgr& mgr) const { + left->resolve_common(mgr); + right->resolve_common(mgr); +} + +void ast_app::compile(const env_ptr& env, std::vector& into) const { + right->compile(env, into); + left->compile(env_ptr(new env_offset(1, env)), into); + into.push_back(instruction_ptr(new instruction_mkapp())); +} + +void ast_case::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "CASE: " << std::endl; + for(auto& branch : branches) { + print_indent(indent + 1, to); + branch->pat->print(to); + to << std::endl; + branch->expr->print(indent + 2, to); + } +} + +type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const { + type_var* var; + type_ptr case_type = mgr.resolve(of->typecheck_common(mgr, env), var); + type_ptr branch_type = mgr.new_type(); + + for(auto& branch : branches) { + type_env new_env = env.scope(); + branch->pat->match(case_type, mgr, new_env); + type_ptr curr_branch_type = branch->expr->typecheck_common(mgr, new_env); + mgr.unify(branch_type, curr_branch_type); + } + + case_type = mgr.resolve(case_type, var); + if(!dynamic_cast(case_type.get())) { + throw type_error("attempting case analysis of non-data type"); + } + + return branch_type; +} + +void ast_case::resolve(const type_mgr& mgr) const { + of->resolve_common(mgr); + for(auto& branch : branches) { + branch->expr->resolve_common(mgr); + } +} + +void ast_case::compile(const env_ptr& env, std::vector& into) const { + type_data* type = dynamic_cast(of->node_type.get()); + + of->compile(env, into); + into.push_back(instruction_ptr(new instruction_eval())); + + instruction_jump* jump_instruction = new instruction_jump(); + into.push_back(instruction_ptr(jump_instruction)); + for(auto& branch : branches) { + std::vector branch_instructions; + pattern_var* vpat; + pattern_constr* cpat; + + if((vpat = dynamic_cast(branch->pat.get()))) { + branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions); + + for(auto& constr_pair : type->constructors) { + if(jump_instruction->tag_mappings.find(constr_pair.second.tag) != + jump_instruction->tag_mappings.end()) + break; + + jump_instruction->tag_mappings[constr_pair.second.tag] = + jump_instruction->branches.size(); + } + jump_instruction->branches.push_back(std::move(branch_instructions)); + } else if((cpat = dynamic_cast(branch->pat.get()))) { + env_ptr new_env = env; + for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) { + new_env = env_ptr(new env_var(*it, new_env)); + } + + branch_instructions.push_back(instruction_ptr(new instruction_split( + cpat->params.size()))); + branch->expr->compile(new_env, branch_instructions); + branch_instructions.push_back(instruction_ptr(new instruction_slide( + cpat->params.size()))); + + int new_tag = type->constructors[cpat->constr].tag; + if(jump_instruction->tag_mappings.find(new_tag) != + jump_instruction->tag_mappings.end()) + throw type_error("technically not a type error: duplicate pattern"); + + jump_instruction->tag_mappings[new_tag] = + jump_instruction->branches.size(); + jump_instruction->branches.push_back(std::move(branch_instructions)); + } + } + + for(auto& constr_pair : type->constructors) { + if(jump_instruction->tag_mappings.find(constr_pair.second.tag) == + jump_instruction->tag_mappings.end()) + throw type_error("non-total pattern"); + } +} + +void pattern_var::print(std::ostream& to) const { + to << var; +} + +void pattern_var::match(type_ptr t, type_mgr& mgr, type_env& env) const { + env.bind(var, t); +} + +void pattern_constr::print(std::ostream& to) const { + to << constr; + for(auto& param : params) { + to << " " << param; + } +} + +void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const { + type_ptr constructor_type = env.lookup(constr); + if(!constructor_type) { + throw type_error(std::string("pattern using unknown constructor ") + constr); + } + + for(int i = 0; i < params.size(); i++) { + type_arr* arr = dynamic_cast(constructor_type.get()); + if(!arr) throw type_error("too many parameters in constructor pattern"); + + env.bind(params[i], arr->left); + constructor_type = arr->right; + } + + mgr.unify(t, constructor_type); +} diff --git a/09/ast.hpp b/09/ast.hpp new file mode 100644 index 0000000..c88fc99 --- /dev/null +++ b/09/ast.hpp @@ -0,0 +1,141 @@ +#pragma once +#include +#include +#include "type.hpp" +#include "type_env.hpp" +#include "binop.hpp" +#include "instruction.hpp" +#include "env.hpp" + +struct ast { + type_ptr node_type; + + virtual ~ast() = default; + + virtual void print(int indent, std::ostream& to) const = 0; + virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const = 0; + virtual void resolve(const type_mgr& mgr) const = 0; + virtual void compile(const env_ptr& env, + std::vector& into) const = 0; + + type_ptr typecheck_common(type_mgr& mgr, const type_env& env); + void resolve_common(const type_mgr& mgr); +}; + +using ast_ptr = std::unique_ptr; + +struct pattern { + virtual ~pattern() = default; + + virtual void print(std::ostream& to) const = 0; + virtual void match(type_ptr t, type_mgr& mgr, type_env& env) const = 0; +}; + +using pattern_ptr = std::unique_ptr; + +struct branch { + pattern_ptr pat; + ast_ptr expr; + + branch(pattern_ptr p, ast_ptr a) + : pat(std::move(p)), expr(std::move(a)) {} +}; + +using branch_ptr = std::unique_ptr; + +struct ast_int : public ast { + int value; + + explicit ast_int(int v) + : value(v) {} + + void print(int indent, std::ostream& to) const; + type_ptr typecheck(type_mgr& mgr, const type_env& env) const; + void resolve(const type_mgr& mgr) const; + void compile(const env_ptr& env, std::vector& into) const; +}; + +struct ast_lid : public ast { + std::string id; + + explicit ast_lid(std::string i) + : id(std::move(i)) {} + + void print(int indent, std::ostream& to) const; + type_ptr typecheck(type_mgr& mgr, const type_env& env) const; + void resolve(const type_mgr& mgr) const; + void compile(const env_ptr& env, std::vector& into) const; +}; + +struct ast_uid : public ast { + std::string id; + + explicit ast_uid(std::string i) + : id(std::move(i)) {} + + void print(int indent, std::ostream& to) const; + type_ptr typecheck(type_mgr& mgr, const type_env& env) const; + void resolve(const type_mgr& mgr) const; + void compile(const env_ptr& env, std::vector& into) const; +}; + +struct ast_binop : public ast { + binop op; + ast_ptr left; + ast_ptr right; + + ast_binop(binop o, ast_ptr l, ast_ptr r) + : op(o), left(std::move(l)), right(std::move(r)) {} + + void print(int indent, std::ostream& to) const; + type_ptr typecheck(type_mgr& mgr, const type_env& env) const; + void resolve(const type_mgr& mgr) const; + void compile(const env_ptr& env, std::vector& into) const; +}; + +struct ast_app : public ast { + ast_ptr left; + ast_ptr right; + + ast_app(ast_ptr l, ast_ptr r) + : left(std::move(l)), right(std::move(r)) {} + + void print(int indent, std::ostream& to) const; + type_ptr typecheck(type_mgr& mgr, const type_env& env) const; + void resolve(const type_mgr& mgr) const; + void compile(const env_ptr& env, std::vector& into) const; +}; + +struct ast_case : public ast { + ast_ptr of; + std::vector branches; + + ast_case(ast_ptr o, std::vector b) + : of(std::move(o)), branches(std::move(b)) {} + + void print(int indent, std::ostream& to) const; + type_ptr typecheck(type_mgr& mgr, const type_env& env) const; + void resolve(const type_mgr& mgr) const; + void compile(const env_ptr& env, std::vector& into) const; +}; + +struct pattern_var : public pattern { + std::string var; + + pattern_var(std::string v) + : var(std::move(v)) {} + + void print(std::ostream &to) const; + void match(type_ptr t, type_mgr& mgr, type_env& env) const; +}; + +struct pattern_constr : public pattern { + std::string constr; + std::vector params; + + pattern_constr(std::string c, std::vector p) + : constr(std::move(c)), params(std::move(p)) {} + + void print(std::ostream &to) const; + void match(type_ptr t, type_mgr&, type_env& env) const; +}; diff --git a/09/binop.cpp b/09/binop.cpp new file mode 100644 index 0000000..3a5f0ca --- /dev/null +++ b/09/binop.cpp @@ -0,0 +1,21 @@ +#include "binop.hpp" + +std::string op_name(binop op) { + switch(op) { + case PLUS: return "+"; + case MINUS: return "-"; + case TIMES: return "*"; + case DIVIDE: return "/"; + } + return "??"; +} + +std::string op_action(binop op) { + switch(op) { + case PLUS: return "plus"; + case MINUS: return "minus"; + case TIMES: return "times"; + case DIVIDE: return "divide"; + } + return "??"; +} diff --git a/09/binop.hpp b/09/binop.hpp new file mode 100644 index 0000000..8d07858 --- /dev/null +++ b/09/binop.hpp @@ -0,0 +1,12 @@ +#pragma once +#include + +enum binop { + PLUS, + MINUS, + TIMES, + DIVIDE +}; + +std::string op_name(binop op); +std::string op_action(binop op); diff --git a/09/definition.cpp b/09/definition.cpp new file mode 100644 index 0000000..fe116c8 --- /dev/null +++ b/09/definition.cpp @@ -0,0 +1,116 @@ +#include "definition.hpp" +#include "error.hpp" +#include "ast.hpp" +#include "llvm_context.hpp" +#include +#include +#include + +void definition_defn::typecheck_first(type_mgr& mgr, type_env& env) { + return_type = mgr.new_type(); + type_ptr full_type = return_type; + + for(auto it = params.rbegin(); it != params.rend(); it++) { + type_ptr param_type = mgr.new_type(); + full_type = type_ptr(new type_arr(param_type, full_type)); + param_types.push_back(param_type); + } + + env.bind(name, full_type); +} + +void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const { + type_env new_env = env.scope(); + auto param_it = params.begin(); + auto type_it = param_types.rbegin(); + + while(param_it != params.end() && type_it != param_types.rend()) { + new_env.bind(*param_it, *type_it); + param_it++; + type_it++; + } + + type_ptr body_type = body->typecheck_common(mgr, new_env); + mgr.unify(return_type, body_type); +} + +void definition_defn::resolve(const type_mgr& mgr) { + type_var* var; + body->resolve_common(mgr); + + return_type = mgr.resolve(return_type, var); + if(var) throw type_error("ambiguously typed program"); + for(auto& param_type : param_types) { + param_type = mgr.resolve(param_type, var); + if(var) throw type_error("ambiguously typed program"); + } +} + +void definition_defn::compile() { + env_ptr new_env = env_ptr(new env_offset(0, nullptr)); + for(auto it = params.rbegin(); it != params.rend(); it++) { + new_env = env_ptr(new env_var(*it, new_env)); + } + body->compile(new_env, instructions); + instructions.push_back(instruction_ptr(new instruction_update(params.size()))); + instructions.push_back(instruction_ptr(new instruction_pop(params.size()))); +} +void definition_defn::gen_llvm_first(llvm_context& ctx) { + generated_function = ctx.create_custom_function(name, params.size()); +} + +void definition_defn::gen_llvm_second(llvm_context& ctx) { + ctx.builder.SetInsertPoint(&generated_function->getEntryBlock()); + for(auto& instruction : instructions) { + instruction->gen_llvm(ctx, generated_function); + } + ctx.builder.CreateRetVoid(); +} + +void definition_data::typecheck_first(type_mgr& mgr, type_env& env) { + type_data* this_type = new type_data(name); + type_ptr return_type = type_ptr(this_type); + int next_tag = 0; + + for(auto& constructor : constructors) { + constructor->tag = next_tag; + this_type->constructors[constructor->name] = { next_tag++ }; + + type_ptr full_type = return_type; + for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) { + type_ptr type = type_ptr(new type_base(*it)); + full_type = type_ptr(new type_arr(type, full_type)); + } + + env.bind(constructor->name, full_type); + } +} + +void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const { + // Nothing +} + +void definition_data::resolve(const type_mgr& mgr) { + // Nothing +} + +void definition_data::compile() { + +} + +void definition_data::gen_llvm_first(llvm_context& ctx) { + for(auto& constructor : constructors) { + auto new_function = + ctx.create_custom_function(constructor->name, constructor->types.size()); + ctx.builder.SetInsertPoint(&new_function->getEntryBlock()); + ctx.create_pack(new_function, + ctx.create_size(constructor->types.size()), + ctx.create_i8(constructor->tag) + ); + ctx.builder.CreateRetVoid(); + } +} + +void definition_data::gen_llvm_second(llvm_context& ctx) { + // Nothing +} diff --git a/09/definition.hpp b/09/definition.hpp new file mode 100644 index 0000000..6004d2f --- /dev/null +++ b/09/definition.hpp @@ -0,0 +1,73 @@ +#pragma once +#include +#include +#include "instruction.hpp" +#include "llvm_context.hpp" +#include "type_env.hpp" + +struct ast; +using ast_ptr = std::unique_ptr; + +struct definition { + virtual ~definition() = default; + + virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0; + virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0; + virtual void resolve(const type_mgr& mgr) = 0; + virtual void compile() = 0; + virtual void gen_llvm_first(llvm_context& ctx) = 0; + virtual void gen_llvm_second(llvm_context& ctx) = 0; +}; + +using definition_ptr = std::unique_ptr; + +struct constructor { + std::string name; + std::vector types; + int8_t tag; + + constructor(std::string n, std::vector ts) + : name(std::move(n)), types(std::move(ts)) {} +}; + +using constructor_ptr = std::unique_ptr; + +struct definition_defn : public definition { + std::string name; + std::vector params; + ast_ptr body; + + type_ptr return_type; + std::vector param_types; + + std::vector instructions; + + llvm::Function* generated_function; + + definition_defn(std::string n, std::vector p, ast_ptr b) + : name(std::move(n)), params(std::move(p)), body(std::move(b)) { + + } + + void typecheck_first(type_mgr& mgr, type_env& env); + void typecheck_second(type_mgr& mgr, const type_env& env) const; + void resolve(const type_mgr& mgr); + void compile(); + void gen_llvm_first(llvm_context& ctx); + void gen_llvm_second(llvm_context& ctx); +}; + +struct definition_data : public definition { + std::string name; + std::vector constructors; + + definition_data(std::string n, std::vector cs) + : name(std::move(n)), constructors(std::move(cs)) {} + + void typecheck_first(type_mgr& mgr, type_env& env); + void typecheck_second(type_mgr& mgr, const type_env& env) const; + void resolve(const type_mgr& mgr); + void compile(); + void gen_llvm_first(llvm_context& ctx); + void gen_llvm_second(llvm_context& ctx); +}; diff --git a/09/env.cpp b/09/env.cpp new file mode 100644 index 0000000..818cf5d --- /dev/null +++ b/09/env.cpp @@ -0,0 +1,23 @@ +#include "env.hpp" + +int env_var::get_offset(const std::string& name) const { + if(name == this->name) return 0; + if(parent) return parent->get_offset(name) + 1; + throw 0; +} + +bool env_var::has_variable(const std::string& name) const { + if(name == this->name) return true; + if(parent) return parent->has_variable(name); + return false; +} + +int env_offset::get_offset(const std::string& name) const { + if(parent) return parent->get_offset(name) + offset; + throw 0; +} + +bool env_offset::has_variable(const std::string& name) const { + if(parent) return parent->has_variable(name); + return false; +} diff --git a/09/env.hpp b/09/env.hpp new file mode 100644 index 0000000..a8fbbec --- /dev/null +++ b/09/env.hpp @@ -0,0 +1,34 @@ +#pragma once +#include +#include + +struct env { + virtual ~env() = default; + + virtual int get_offset(const std::string& name) const = 0; + virtual bool has_variable(const std::string& name) const = 0; +}; + +using env_ptr = std::shared_ptr; + +struct env_var : public env { + std::string name; + env_ptr parent; + + env_var(std::string& n, env_ptr p) + : name(std::move(n)), parent(std::move(p)) {} + + int get_offset(const std::string& name) const; + bool has_variable(const std::string& name) const; +}; + +struct env_offset : public env { + int offset; + env_ptr parent; + + env_offset(int o, env_ptr p) + : offset(o), parent(std::move(p)) {} + + int get_offset(const std::string& name) const; + bool has_variable(const std::string& name) const; +}; diff --git a/09/error.cpp b/09/error.cpp new file mode 100644 index 0000000..f5125e3 --- /dev/null +++ b/09/error.cpp @@ -0,0 +1,5 @@ +#include "error.hpp" + +const char* type_error::what() const noexcept { + return "an error occured while checking the types of the program"; +} diff --git a/09/error.hpp b/09/error.hpp new file mode 100644 index 0000000..5bfbc7e --- /dev/null +++ b/09/error.hpp @@ -0,0 +1,21 @@ +#pragma once +#include +#include "type.hpp" + +struct type_error : std::exception { + std::string description; + + type_error(std::string d) + : description(std::move(d)) {} + + const char* what() const noexcept override; +}; + +struct unification_error : public type_error { + type_ptr left; + type_ptr right; + + unification_error(type_ptr l, type_ptr r) + : left(std::move(l)), right(std::move(r)), + type_error("failed to unify types") {} +}; diff --git a/09/examples/bad1.txt b/09/examples/bad1.txt new file mode 100644 index 0000000..86d4bc4 --- /dev/null +++ b/09/examples/bad1.txt @@ -0,0 +1,2 @@ +data Bool = { True, False } +defn main = { 3 + True } diff --git a/09/examples/bad2.txt b/09/examples/bad2.txt new file mode 100644 index 0000000..def8785 --- /dev/null +++ b/09/examples/bad2.txt @@ -0,0 +1 @@ +defn main = { 1 2 3 4 5 } diff --git a/09/examples/bad3.txt b/09/examples/bad3.txt new file mode 100644 index 0000000..6f82b3d --- /dev/null +++ b/09/examples/bad3.txt @@ -0,0 +1,8 @@ +data List = { Nil, Cons Int List } + +defn head l = { + case l of { + Nil -> { 0 } + Cons x y z -> { x } + } +} diff --git a/09/examples/runtime1.c b/09/examples/runtime1.c new file mode 100644 index 0000000..0157051 --- /dev/null +++ b/09/examples/runtime1.c @@ -0,0 +1,31 @@ +#include "../runtime.h" + +void f_add(struct stack* s) { + struct node_num* left = (struct node_num*) eval(stack_peek(s, 0)); + struct node_num* right = (struct node_num*) eval(stack_peek(s, 1)); + stack_push(s, (struct node_base*) alloc_num(left->value + right->value)); +} + +void f_main(struct stack* s) { + // PushInt 320 + stack_push(s, (struct node_base*) alloc_num(320)); + + // PushInt 6 + stack_push(s, (struct node_base*) alloc_num(6)); + + // PushGlobal f_add (the function for +) + stack_push(s, (struct node_base*) alloc_global(f_add, 2)); + + struct node_base* left; + struct node_base* right; + + // MkApp + left = stack_pop(s); + right = stack_pop(s); + stack_push(s, (struct node_base*) alloc_app(left, right)); + + // MkApp + left = stack_pop(s); + right = stack_pop(s); + stack_push(s, (struct node_base*) alloc_app(left, right)); +} diff --git a/09/examples/works1.txt b/09/examples/works1.txt new file mode 100644 index 0000000..4e4e099 --- /dev/null +++ b/09/examples/works1.txt @@ -0,0 +1,3 @@ +defn main = { sum 320 6 } +defn sum x y = { x + y } + diff --git a/09/examples/works2.txt b/09/examples/works2.txt new file mode 100644 index 0000000..8332fde --- /dev/null +++ b/09/examples/works2.txt @@ -0,0 +1,3 @@ +defn add x y = { x + y } +defn double x = { add x x } +defn main = { double 163 } diff --git a/09/examples/works3.txt b/09/examples/works3.txt new file mode 100644 index 0000000..13514dd --- /dev/null +++ b/09/examples/works3.txt @@ -0,0 +1,8 @@ +data List = { Nil, Cons Int List } +defn length l = { + case l of { + Nil -> { 0 } + Cons x xs -> { 1 + length xs } + } +} +defn main = { length (Cons 1 (Cons 2 (Cons 3 Nil))) } diff --git a/09/examples/works4.txt b/09/examples/works4.txt new file mode 100644 index 0000000..7674691 --- /dev/null +++ b/09/examples/works4.txt @@ -0,0 +1,16 @@ +data List = { Nil, Cons Int List } + +defn add x y = { x + y } +defn mul x y = { x * y } + +defn foldr f b l = { + case l of { + Nil -> { b } + Cons x xs -> { f x (foldr f b xs) } + } +} + +defn main = { + foldr add 0 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil)))) + + foldr mul 1 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil)))) +} diff --git a/09/examples/works5.txt b/09/examples/works5.txt new file mode 100644 index 0000000..3a8aff5 --- /dev/null +++ b/09/examples/works5.txt @@ -0,0 +1,17 @@ +data List = { Nil, Cons Int List } + +defn sumZip l m = { + case l of { + Nil -> { 0 } + Cons x xs -> { + case m of { + Nil -> { 0 } + Cons y ys -> { x + y + sumZip xs ys } + } + } + } +} + +defn ones = { Cons 1 ones } + +defn main = { sumZip ones (Cons 1 (Cons 2 (Cons 3 Nil))) } diff --git a/09/instruction.cpp b/09/instruction.cpp new file mode 100644 index 0000000..23f4258 --- /dev/null +++ b/09/instruction.cpp @@ -0,0 +1,177 @@ +#include "instruction.hpp" +#include "llvm_context.hpp" +#include +#include + +using namespace llvm; + +static void print_indent(int n, std::ostream& to) { + while(n--) to << " "; +} + +void instruction_pushint::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "PushInt(" << value << ")" << std::endl; +} + +void instruction_pushint::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_push(f, ctx.create_num(ctx.create_i32(value))); +} + +void instruction_pushglobal::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "PushGlobal(" << name << ")" << std::endl; +} + +void instruction_pushglobal::gen_llvm(llvm_context& ctx, Function* f) const { + auto& global_f = ctx.custom_functions.at("f_" + name); + auto arity = ctx.create_i32(global_f->arity); + ctx.create_push(f, ctx.create_global(global_f->function, arity)); +} + +void instruction_push::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Push(" << offset << ")" << std::endl; +} + +void instruction_push::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_push(f, ctx.create_peek(f, ctx.create_size(offset))); +} + +void instruction_pop::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Pop(" << count << ")" << std::endl; +} + +void instruction_pop::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_popn(f, ctx.create_size(count)); +} + +void instruction_mkapp::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "MkApp()" << std::endl; +} + +void instruction_mkapp::gen_llvm(llvm_context& ctx, Function* f) const { + auto left = ctx.create_pop(f); + auto right = ctx.create_pop(f); + ctx.create_push(f, ctx.create_app(left, right)); +} + +void instruction_update::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Update(" << offset << ")" << std::endl; +} + +void instruction_update::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_update(f, ctx.create_size(offset)); +} + +void instruction_pack::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Pack(" << tag << ", " << size << ")" << std::endl; +} + +void instruction_pack::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_pack(f, ctx.create_size(size), ctx.create_i8(tag)); +} + +void instruction_split::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Split()" << std::endl; +} + +void instruction_split::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_split(f, ctx.create_size(size)); +} + +void instruction_jump::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Jump(" << std::endl; + for(auto& instruction_set : branches) { + for(auto& instruction : instruction_set) { + instruction->print(indent + 2, to); + } + to << std::endl; + } + print_indent(indent, to); + to << ")" << std::endl; +} + +void instruction_jump::gen_llvm(llvm_context& ctx, Function* f) const { + auto top_node = ctx.create_peek(f, ctx.create_size(0)); + auto tag = ctx.unwrap_data_tag(top_node); + auto safety_block = BasicBlock::Create(ctx.ctx, "safety", f); + auto switch_op = ctx.builder.CreateSwitch(tag, safety_block, tag_mappings.size()); + std::vector blocks; + + for(auto& branch : branches) { + auto branch_block = BasicBlock::Create(ctx.ctx, "branch", f); + ctx.builder.SetInsertPoint(branch_block); + for(auto& instruction : branch) { + instruction->gen_llvm(ctx, f); + } + ctx.builder.CreateBr(safety_block); + blocks.push_back(branch_block); + } + + for(auto& mapping : tag_mappings) { + switch_op->addCase(ctx.create_i8(mapping.first), blocks[mapping.second]); + } + + ctx.builder.SetInsertPoint(safety_block); +} + +void instruction_slide::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Slide(" << offset << ")" << std::endl; +} + +void instruction_slide::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_slide(f, ctx.create_size(offset)); +} + +void instruction_binop::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "BinOp(" << op_action(op) << ")" << std::endl; +} + +void instruction_binop::gen_llvm(llvm_context& ctx, Function* f) const { + auto left_int = ctx.unwrap_num(ctx.create_pop(f)); + auto right_int = ctx.unwrap_num(ctx.create_pop(f)); + llvm::Value* result; + switch(op) { + case PLUS: result = ctx.builder.CreateAdd(left_int, right_int); break; + case MINUS: result = ctx.builder.CreateSub(left_int, right_int); break; + case TIMES: result = ctx.builder.CreateMul(left_int, right_int); break; + case DIVIDE: result = ctx.builder.CreateSDiv(left_int, right_int); break; + } + ctx.create_push(f, ctx.create_num(result)); +} + +void instruction_eval::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Eval()" << std::endl; +} + +void instruction_eval::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_push(f, ctx.create_eval(ctx.create_pop(f))); +} + +void instruction_alloc::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Alloc(" << amount << ")" << std::endl; +} + +void instruction_alloc::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_alloc(f, ctx.create_size(amount)); +} + +void instruction_unwind::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Unwind()" << std::endl; +} + +void instruction_unwind::gen_llvm(llvm_context& ctx, Function* f) const { + // Nothing +} diff --git a/09/instruction.hpp b/09/instruction.hpp new file mode 100644 index 0000000..abe2409 --- /dev/null +++ b/09/instruction.hpp @@ -0,0 +1,142 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include "binop.hpp" +#include "llvm_context.hpp" + +struct instruction { + virtual ~instruction() = default; + + virtual void print(int indent, std::ostream& to) const = 0; + virtual void gen_llvm(llvm_context& ctx, llvm::Function* f) const = 0; +}; + +using instruction_ptr = std::unique_ptr; + +struct instruction_pushint : public instruction { + int value; + + instruction_pushint(int v) + : value(v) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_pushglobal : public instruction { + std::string name; + + instruction_pushglobal(std::string n) + : name(std::move(n)) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_push : public instruction { + int offset; + + instruction_push(int o) + : offset(o) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_pop : public instruction { + int count; + + instruction_pop(int c) + : count(c) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_mkapp : public instruction { + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_update : public instruction { + int offset; + + instruction_update(int o) + : offset(o) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_pack : public instruction { + int tag; + int size; + + instruction_pack(int t, int s) + : tag(t), size(s) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_split : public instruction { + int size; + + instruction_split(int s) + : size(s) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_jump : public instruction { + std::vector> branches; + std::map tag_mappings; + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_slide : public instruction { + int offset; + + instruction_slide(int o) + : offset(o) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_binop : public instruction { + binop op; + + instruction_binop(binop o) + : op(o) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_eval : public instruction { + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_alloc : public instruction { + int amount; + + instruction_alloc(int a) + : amount(a) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_unwind : public instruction { + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; diff --git a/09/llvm_context.cpp b/09/llvm_context.cpp new file mode 100644 index 0000000..b2982f9 --- /dev/null +++ b/09/llvm_context.cpp @@ -0,0 +1,252 @@ +#include "llvm_context.hpp" +#include + +using namespace llvm; + +void llvm_context::create_types() { + stack_type = StructType::create(ctx, "stack"); + stack_ptr_type = PointerType::getUnqual(stack_type); + tag_type = IntegerType::getInt8Ty(ctx); + struct_types["node_base"] = StructType::create(ctx, "node_base"); + struct_types["node_app"] = StructType::create(ctx, "node_app"); + struct_types["node_num"] = StructType::create(ctx, "node_num"); + struct_types["node_global"] = StructType::create(ctx, "node_global"); + struct_types["node_ind"] = StructType::create(ctx, "node_ind"); + struct_types["node_data"] = StructType::create(ctx, "node_data"); + node_ptr_type = PointerType::getUnqual(struct_types.at("node_base")); + function_type = FunctionType::get(Type::getVoidTy(ctx), { stack_ptr_type }, false); + + struct_types.at("node_base")->setBody( + IntegerType::getInt32Ty(ctx) + ); + struct_types.at("node_app")->setBody( + struct_types.at("node_base"), + node_ptr_type, + node_ptr_type + ); + struct_types.at("node_num")->setBody( + struct_types.at("node_base"), + IntegerType::getInt32Ty(ctx) + ); + struct_types.at("node_global")->setBody( + struct_types.at("node_base"), + FunctionType::get(Type::getVoidTy(ctx), { stack_ptr_type }, false) + ); + struct_types.at("node_ind")->setBody( + struct_types.at("node_base"), + node_ptr_type + ); + struct_types.at("node_data")->setBody( + struct_types.at("node_base"), + IntegerType::getInt8Ty(ctx), + PointerType::getUnqual(node_ptr_type) + ); +} + +void llvm_context::create_functions() { + auto void_type = Type::getVoidTy(ctx); + auto sizet_type = IntegerType::get(ctx, sizeof(size_t) * 8); + functions["stack_init"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_init", + &module + ); + functions["stack_free"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_free", + &module + ); + functions["stack_push"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type, node_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_push", + &module + ); + functions["stack_pop"] = Function::Create( + FunctionType::get(node_ptr_type, { stack_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_pop", + &module + ); + functions["stack_peek"] = Function::Create( + FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_peek", + &module + ); + functions["stack_popn"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_popn", + &module + ); + functions["stack_slide"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_slide", + &module + ); + functions["stack_update"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_update", + &module + ); + functions["stack_alloc"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_alloc", + &module + ); + functions["stack_pack"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type, sizet_type, tag_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_pack", + &module + ); + functions["stack_split"] = Function::Create( + FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_split", + &module + ); + + auto int32_type = IntegerType::getInt32Ty(ctx); + functions["alloc_app"] = Function::Create( + FunctionType::get(node_ptr_type, { node_ptr_type, node_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "alloc_app", + &module + ); + functions["alloc_num"] = Function::Create( + FunctionType::get(node_ptr_type, { int32_type }, false), + Function::LinkageTypes::ExternalLinkage, + "alloc_num", + &module + ); + functions["alloc_global"] = Function::Create( + FunctionType::get(node_ptr_type, { function_type, int32_type }, false), + Function::LinkageTypes::ExternalLinkage, + "alloc_global", + &module + ); + functions["alloc_ind"] = Function::Create( + FunctionType::get(node_ptr_type, { node_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "alloc_ind", + &module + ); + + functions["eval"] = Function::Create( + FunctionType::get(node_ptr_type, { node_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "eval", + &module + ); +} + +ConstantInt* llvm_context::create_i8(int8_t i) { + return ConstantInt::get(ctx, APInt(8, i)); +} +ConstantInt* llvm_context::create_i32(int32_t i) { + return ConstantInt::get(ctx, APInt(32, i)); +} +ConstantInt* llvm_context::create_size(size_t i) { + return ConstantInt::get(ctx, APInt(sizeof(size_t) * 8, i)); +} + +Value* llvm_context::create_pop(Function* f) { + auto pop_f = functions.at("stack_pop"); + return builder.CreateCall(pop_f, { f->arg_begin() }); +} +Value* llvm_context::create_peek(Function* f, Value* off) { + auto peek_f = functions.at("stack_peek"); + return builder.CreateCall(peek_f, { f->arg_begin(), off }); +} +void llvm_context::create_push(Function* f, Value* v) { + auto push_f = functions.at("stack_push"); + builder.CreateCall(push_f, { f->arg_begin(), v }); +} +void llvm_context::create_popn(Function* f, Value* off) { + auto popn_f = functions.at("stack_popn"); + builder.CreateCall(popn_f, { f->arg_begin(), off }); +} +void llvm_context::create_update(Function* f, Value* off) { + auto update_f = functions.at("stack_update"); + builder.CreateCall(update_f, { f->arg_begin(), off }); +} +void llvm_context::create_pack(Function* f, Value* c, Value* t) { + auto pack_f = functions.at("stack_pack"); + builder.CreateCall(pack_f, { f->arg_begin(), c, t }); +} +void llvm_context::create_split(Function* f, Value* c) { + auto split_f = functions.at("stack_split"); + builder.CreateCall(split_f, { f->arg_begin(), c }); +} +void llvm_context::create_slide(Function* f, Value* off) { + auto slide_f = functions.at("stack_slide"); + builder.CreateCall(slide_f, { f->arg_begin(), off }); +} +void llvm_context::create_alloc(Function* f, Value* n) { + auto alloc_f = functions.at("stack_alloc"); + builder.CreateCall(alloc_f, { f->arg_begin(), n }); +} + +Value* llvm_context::create_eval(Value* e) { + auto eval_f = functions.at("eval"); + return builder.CreateCall(eval_f, { e }); +} + +Value* llvm_context::unwrap_num(Value* v) { + auto num_ptr_type = PointerType::getUnqual(struct_types.at("node_num")); + auto cast = builder.CreatePointerCast(v, num_ptr_type); + auto offset_0 = create_i32(0); + auto offset_1 = create_i32(1); + auto int_ptr = builder.CreateGEP(cast, { offset_0, offset_1 }); + return builder.CreateLoad(int_ptr); +} +Value* llvm_context::create_num(Value* v) { + auto alloc_num_f = functions.at("alloc_num"); + return builder.CreateCall(alloc_num_f, { v }); +} + +Value* llvm_context::unwrap_data_tag(Value* v) { + auto data_ptr_type = PointerType::getUnqual(struct_types.at("node_data")); + auto cast = builder.CreatePointerCast(v, data_ptr_type); + auto offset_0 = create_i32(0); + auto offset_1 = create_i32(1); + auto tag_ptr = builder.CreateGEP(cast, { offset_0, offset_1 }); + return builder.CreateLoad(tag_ptr); +} + +Value* llvm_context::create_global(Value* f, Value* a) { + auto alloc_global_f = functions.at("alloc_global"); + return builder.CreateCall(alloc_global_f, { f, a }); +} + +Value* llvm_context::create_app(Value* l, Value* r) { + auto alloc_app_f = functions.at("alloc_app"); + return builder.CreateCall(alloc_app_f, { l, r }); +} + +llvm::Function* llvm_context::create_custom_function(std::string name, int32_t arity) { + auto void_type = llvm::Type::getVoidTy(ctx); + auto function_type = + llvm::FunctionType::get(void_type, { stack_ptr_type }, false); + auto new_function = llvm::Function::Create( + function_type, + llvm::Function::LinkageTypes::ExternalLinkage, + "f_" + name, + &module + ); + auto start_block = llvm::BasicBlock::Create(ctx, "entry", new_function); + + auto new_custom_f = custom_function_ptr(new custom_function()); + new_custom_f->arity = arity; + new_custom_f->function = new_function; + custom_functions["f_" + name] = std::move(new_custom_f); + + return new_function; +} diff --git a/09/llvm_context.hpp b/09/llvm_context.hpp new file mode 100644 index 0000000..ca75dca --- /dev/null +++ b/09/llvm_context.hpp @@ -0,0 +1,66 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +struct llvm_context { + struct custom_function { + llvm::Function* function; + int32_t arity; + }; + + using custom_function_ptr = std::unique_ptr; + + llvm::LLVMContext ctx; + llvm::IRBuilder<> builder; + llvm::Module module; + + std::map custom_functions; + std::map functions; + std::map struct_types; + + llvm::StructType* stack_type; + llvm::PointerType* stack_ptr_type; + llvm::PointerType* node_ptr_type; + llvm::IntegerType* tag_type; + llvm::FunctionType* function_type; + + llvm_context() + : builder(ctx), module("bloglang", ctx) { + create_types(); + create_functions(); + } + + void create_types(); + void create_functions(); + + llvm::ConstantInt* create_i8(int8_t); + llvm::ConstantInt* create_i32(int32_t); + llvm::ConstantInt* create_size(size_t); + + llvm::Value* create_pop(llvm::Function*); + llvm::Value* create_peek(llvm::Function*, llvm::Value*); + void create_push(llvm::Function*, llvm::Value*); + void create_popn(llvm::Function*, llvm::Value*); + void create_update(llvm::Function*, llvm::Value*); + void create_pack(llvm::Function*, llvm::Value*, llvm::Value*); + void create_split(llvm::Function*, llvm::Value*); + void create_slide(llvm::Function*, llvm::Value*); + void create_alloc(llvm::Function*, llvm::Value*); + + llvm::Value* create_eval(llvm::Value*); + + llvm::Value* unwrap_num(llvm::Value*); + llvm::Value* create_num(llvm::Value*); + + llvm::Value* unwrap_data_tag(llvm::Value*); + + llvm::Value* create_global(llvm::Value*, llvm::Value*); + + llvm::Value* create_app(llvm::Value*, llvm::Value*); + + llvm::Function* create_custom_function(std::string name, int32_t arity); +}; diff --git a/09/main.cpp b/09/main.cpp new file mode 100644 index 0000000..3f50c32 --- /dev/null +++ b/09/main.cpp @@ -0,0 +1,174 @@ +#include "ast.hpp" +#include +#include "binop.hpp" +#include "definition.hpp" +#include "instruction.hpp" +#include "llvm_context.hpp" +#include "parser.hpp" +#include "error.hpp" +#include "type.hpp" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetMachine.h" + +void yy::parser::error(const std::string& msg) { + std::cout << "An error occured: " << msg << std::endl; +} + +extern std::vector program; + +void typecheck_program( + const std::vector& prog, + type_mgr& mgr, type_env& env) { + type_ptr int_type = type_ptr(new type_base("Int")); + type_ptr binop_type = type_ptr(new type_arr( + int_type, + type_ptr(new type_arr(int_type, int_type)))); + + env.bind("+", binop_type); + env.bind("-", binop_type); + env.bind("*", binop_type); + env.bind("/", binop_type); + + for(auto& def : prog) { + def->typecheck_first(mgr, env); + } + + for(auto& def : prog) { + def->typecheck_second(mgr, env); + } + + for(auto& pair : env.names) { + std::cout << pair.first << ": "; + pair.second->print(mgr, std::cout); + std::cout << std::endl; + } + + for(auto& def : prog) { + def->resolve(mgr); + } +} + +void compile_program(const std::vector& prog) { + for(auto& def : prog) { + def->compile(); + + definition_defn* defn = dynamic_cast(def.get()); + if(!defn) continue; + for(auto& instruction : defn->instructions) { + instruction->print(0, std::cout); + } + std::cout << std::endl; + } +} + +void gen_llvm_internal_op(llvm_context& ctx, binop op) { + auto new_function = ctx.create_custom_function(op_action(op), 2); + std::vector instructions; + instructions.push_back(instruction_ptr(new instruction_push(1))); + instructions.push_back(instruction_ptr(new instruction_eval())); + instructions.push_back(instruction_ptr(new instruction_push(1))); + instructions.push_back(instruction_ptr(new instruction_eval())); + instructions.push_back(instruction_ptr(new instruction_binop(op))); + ctx.builder.SetInsertPoint(&new_function->getEntryBlock()); + for(auto& instruction : instructions) { + instruction->gen_llvm(ctx, new_function); + } + ctx.builder.CreateRetVoid(); +} + +void output_llvm(llvm_context& ctx, const std::string& filename) { + std::string targetTriple = llvm::sys::getDefaultTargetTriple(); + + llvm::InitializeNativeTarget(); + llvm::InitializeNativeTargetAsmParser(); + llvm::InitializeNativeTargetAsmPrinter(); + + std::string error; + const llvm::Target* target = + llvm::TargetRegistry::lookupTarget(targetTriple, error); + if (!target) { + std::cerr << error << std::endl; + } else { + std::string cpu = "generic"; + std::string features = ""; + llvm::TargetOptions options; + llvm::TargetMachine* targetMachine = + target->createTargetMachine(targetTriple, cpu, features, + options, llvm::Optional()); + + ctx.module.setDataLayout(targetMachine->createDataLayout()); + ctx.module.setTargetTriple(targetTriple); + + std::error_code ec; + llvm::raw_fd_ostream file(filename, ec, llvm::sys::fs::F_None); + if (ec) { + throw 0; + } else { + llvm::TargetMachine::CodeGenFileType type = llvm::TargetMachine::CGFT_ObjectFile; + llvm::legacy::PassManager pm; + if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) { + throw 0; + } else { + pm.run(ctx.module); + file.close(); + } + } + } +} + +void gen_llvm(const std::vector& prog) { + llvm_context ctx; + gen_llvm_internal_op(ctx, PLUS); + gen_llvm_internal_op(ctx, MINUS); + gen_llvm_internal_op(ctx, TIMES); + gen_llvm_internal_op(ctx, DIVIDE); + + for(auto& definition : prog) { + definition->gen_llvm_first(ctx); + } + + for(auto& definition : prog) { + definition->gen_llvm_second(ctx); + } + ctx.module.print(llvm::outs(), nullptr); + output_llvm(ctx, "program.o"); +} + +int main() { + yy::parser parser; + type_mgr mgr; + type_env env; + + parser.parse(); + for(auto& definition : program) { + definition_defn* def = dynamic_cast(definition.get()); + if(!def) continue; + + std::cout << def->name; + for(auto& param : def->params) std::cout << " " << param; + std::cout << ":" << std::endl; + + def->body->print(1, std::cout); + } + try { + typecheck_program(program, mgr, env); + compile_program(program); + gen_llvm(program); + } catch(unification_error& err) { + std::cout << "failed to unify types: " << std::endl; + std::cout << " (1) \033[34m"; + err.left->print(mgr, std::cout); + std::cout << "\033[0m" << std::endl; + std::cout << " (2) \033[32m"; + err.right->print(mgr, std::cout); + std::cout << "\033[0m" << std::endl; + } catch(type_error& err) { + std::cout << "failed to type check program: " << err.description << std::endl; + } +} diff --git a/09/parser.y b/09/parser.y new file mode 100644 index 0000000..088648d --- /dev/null +++ b/09/parser.y @@ -0,0 +1,141 @@ +%{ +#include +#include +#include "ast.hpp" +#include "definition.hpp" +#include "parser.hpp" + +std::vector program; +extern yy::parser::symbol_type yylex(); + +%} + +%token PLUS +%token TIMES +%token MINUS +%token DIVIDE +%token INT +%token DEFN +%token DATA +%token CASE +%token OF +%token OCURLY +%token CCURLY +%token OPAREN +%token CPAREN +%token COMMA +%token ARROW +%token EQUAL +%token LID +%token UID + +%language "c++" +%define api.value.type variant +%define api.token.constructor + +%type > lowercaseParams uppercaseParams +%type > program definitions +%type > branches +%type > constructors +%type aAdd aMul case app appBase +%type definition defn data +%type branch +%type pattern +%type constructor + +%start program + +%% + +program + : definitions { program = std::move($1); } + ; + +definitions + : definitions definition { $$ = std::move($1); $$.push_back(std::move($2)); } + | definition { $$ = std::vector(); $$.push_back(std::move($1)); } + ; + +definition + : defn { $$ = std::move($1); } + | data { $$ = std::move($1); } + ; + +defn + : DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY + { $$ = definition_ptr( + new definition_defn(std::move($2), std::move($3), std::move($6))); } + ; + +lowercaseParams + : %empty { $$ = std::vector(); } + | lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); } + ; + +uppercaseParams + : %empty { $$ = std::vector(); } + | uppercaseParams UID { $$ = std::move($1); $$.push_back(std::move($2)); } + ; + +aAdd + : aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); } + | aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); } + | aMul { $$ = std::move($1); } + ; + +aMul + : aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3))); } + | aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3))); } + | app { $$ = std::move($1); } + ; + +app + : app appBase { $$ = ast_ptr(new ast_app(std::move($1), std::move($2))); } + | appBase { $$ = std::move($1); } + ; + +appBase + : INT { $$ = ast_ptr(new ast_int($1)); } + | LID { $$ = ast_ptr(new ast_lid(std::move($1))); } + | UID { $$ = ast_ptr(new ast_uid(std::move($1))); } + | OPAREN aAdd CPAREN { $$ = std::move($2); } + | case { $$ = std::move($1); } + ; + +case + : CASE aAdd OF OCURLY branches CCURLY + { $$ = ast_ptr(new ast_case(std::move($2), std::move($5))); } + ; + +branches + : branches branch { $$ = std::move($1); $$.push_back(std::move($2)); } + | branch { $$ = std::vector(); $$.push_back(std::move($1));} + ; + +branch + : pattern ARROW OCURLY aAdd CCURLY + { $$ = branch_ptr(new branch(std::move($1), std::move($4))); } + ; + +pattern + : LID { $$ = pattern_ptr(new pattern_var(std::move($1))); } + | UID lowercaseParams + { $$ = pattern_ptr(new pattern_constr(std::move($1), std::move($2))); } + ; + +data + : DATA UID EQUAL OCURLY constructors CCURLY + { $$ = definition_ptr(new definition_data(std::move($2), std::move($5))); } + ; + +constructors + : constructors COMMA constructor { $$ = std::move($1); $$.push_back(std::move($3)); } + | constructor + { $$ = std::vector(); $$.push_back(std::move($1)); } + ; + +constructor + : UID uppercaseParams + { $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); } + ; + diff --git a/09/runtime.c b/09/runtime.c new file mode 100644 index 0000000..ebae7ae --- /dev/null +++ b/09/runtime.c @@ -0,0 +1,183 @@ +#include +#include +#include +#include +#include "runtime.h" + +struct node_base* alloc_node() { + struct node_base* new_node = malloc(sizeof(struct node_app)); + assert(new_node != NULL); + return new_node; +} + +struct node_app* alloc_app(struct node_base* l, struct node_base* r) { + struct node_app* node = (struct node_app*) alloc_node(); + node->base.tag = NODE_APP; + node->left = l; + node->right = r; + return node; +} + +struct node_num* alloc_num(int32_t n) { + struct node_num* node = (struct node_num*) alloc_node(); + node->base.tag = NODE_NUM; + node->value = n; + return node; +} + +struct node_global* alloc_global(void (*f)(struct stack*), int32_t a) { + struct node_global* node = (struct node_global*) alloc_node(); + node->base.tag = NODE_GLOBAL; + node->arity = a; + node->function = f; + return node; +} + +struct node_ind* alloc_ind(struct node_base* n) { + struct node_ind* node = (struct node_ind*) alloc_node(); + node->base.tag = NODE_IND; + node->next = n; + return node; +} + +void stack_init(struct stack* s) { + s->size = 4; + s->count = 0; + s->data = malloc(sizeof(*s->data) * s->size); + assert(s->data != NULL); +} + +void stack_free(struct stack* s) { + free(s->data); +} + +void stack_push(struct stack* s, struct node_base* n) { + while(s->count >= s->size) { + s->data = realloc(s->data, sizeof(*s->data) * (s->size *= 2)); + assert(s->data != NULL); + } + s->data[s->count++] = n; +} + +struct node_base* stack_pop(struct stack* s) { + assert(s->count > 0); + return s->data[--s->count]; +} + +struct node_base* stack_peek(struct stack* s, size_t o) { + assert(s->count > o); + return s->data[s->count - o - 1]; +} + +void stack_popn(struct stack* s, size_t n) { + assert(s->count >= n); + s->count -= n; +} + +void stack_slide(struct stack* s, size_t n) { + assert(s->count > n); + s->data[s->count - n - 1] = s->data[s->count - 1]; + s->count -= n; +} + +void stack_update(struct stack* s, size_t o) { + assert(s->count > o + 1); + struct node_ind* ind = (struct node_ind*) s->data[s->count - o - 2]; + ind->base.tag = NODE_IND; + ind->next = s->data[s->count -= 1]; +} + +void stack_alloc(struct stack* s, size_t o) { + while(o--) { + stack_push(s, (struct node_base*) alloc_ind(NULL)); + } +} + +void stack_pack(struct stack* s, size_t n, int8_t t) { + assert(s->count >= n); + + struct node_base** data = malloc(sizeof(*data) * n); + assert(data != NULL); + memcpy(data, &s->data[s->count - n], n * sizeof(*data)); + + struct node_data* new_node = (struct node_data*) alloc_node(); + new_node->array = data; + new_node->base.tag = NODE_DATA; + new_node->tag = t; + + stack_popn(s, n); + stack_push(s, (struct node_base*) new_node); +} + +void stack_split(struct stack* s, size_t n) { + struct node_data* node = (struct node_data*) stack_pop(s); + for(size_t i = 0; i < n; i++) { + stack_push(s, node->array[i]); + } +} + +void unwind(struct stack* s) { + while(1) { + struct node_base* peek = stack_peek(s, 0); + if(peek->tag == NODE_APP) { + struct node_app* n = (struct node_app*) peek; + stack_push(s, n->left); + } else if(peek->tag == NODE_GLOBAL) { + struct node_global* n = (struct node_global*) peek; + assert(s->count > n->arity); + + for(size_t i = 1; i <= n->arity; i++) { + s->data[s->count - i] + = ((struct node_app*) s->data[s->count - i - 1])->right; + } + + n->function(s); + } else if(peek->tag == NODE_IND) { + struct node_ind* n = (struct node_ind*) peek; + stack_pop(s); + stack_push(s, n->next); + } else { + break; + } + } +} + +struct node_base* eval(struct node_base* n) { + struct stack program_stack; + stack_init(&program_stack); + stack_push(&program_stack, n); + unwind(&program_stack); + struct node_base* result = stack_pop(&program_stack); + stack_free(&program_stack); + return result; +} + +extern void f_main(struct stack* s); + +void print_node(struct node_base* n) { + if(n->tag == NODE_APP) { + struct node_app* app = (struct node_app*) n; + print_node(app->left); + putchar(' '); + print_node(app->right); + } else if(n->tag == NODE_DATA) { + printf("(Packed)"); + } else if(n->tag == NODE_GLOBAL) { + struct node_global* global = (struct node_global*) n; + printf("(Global: %p)", global->function); + } else if(n->tag == NODE_IND) { + print_node(((struct node_ind*) n)->next); + } else if(n->tag == NODE_NUM) { + struct node_num* num = (struct node_num*) n; + printf("%d", num->value); + } +} + +int main(int argc, char** argv) { + struct node_global* first_node = alloc_global(f_main, 0); + struct node_base* result = eval((struct node_base*) first_node); + + printf("Result: "); + print_node(result); + putchar('\n'); +} diff --git a/09/runtime.h b/09/runtime.h new file mode 100644 index 0000000..baaaaae --- /dev/null +++ b/09/runtime.h @@ -0,0 +1,70 @@ +#pragma once +#include + +struct stack; + +enum node_tag { + NODE_APP, + NODE_NUM, + NODE_GLOBAL, + NODE_IND, + NODE_DATA +}; + +struct node_base { + enum node_tag tag; +}; + +struct node_app { + struct node_base base; + struct node_base* left; + struct node_base* right; +}; + +struct node_num { + struct node_base base; + int32_t value; +}; + +struct node_global { + struct node_base base; + int32_t arity; + void (*function)(struct stack*); +}; + +struct node_ind { + struct node_base base; + struct node_base* next; +}; + +struct node_data { + struct node_base base; + int8_t tag; + struct node_base** array; +}; + +struct node_base* alloc_node(); +struct node_app* alloc_app(struct node_base* l, struct node_base* r); +struct node_num* alloc_num(int32_t n); +struct node_global* alloc_global(void (*f)(struct stack*), int32_t a); +struct node_ind* alloc_ind(struct node_base* n); + +struct stack { + size_t size; + size_t count; + struct node_base** data; +}; + +void stack_init(struct stack* s); +void stack_free(struct stack* s); +void stack_push(struct stack* s, struct node_base* n); +struct node_base* stack_pop(struct stack* s); +struct node_base* stack_peek(struct stack* s, size_t o); +void stack_popn(struct stack* s, size_t n); +void stack_slide(struct stack* s, size_t n); +void stack_update(struct stack* s, size_t o); +void stack_alloc(struct stack* s, size_t o); +void stack_pack(struct stack* s, size_t n, int8_t t); +void stack_split(struct stack* s, size_t n); + +struct node_base* eval(struct node_base* n); diff --git a/09/scanner.l b/09/scanner.l new file mode 100644 index 0000000..c8a4429 --- /dev/null +++ b/09/scanner.l @@ -0,0 +1,35 @@ +%option noyywrap + +%{ +#include +#include "ast.hpp" +#include "definition.hpp" +#include "parser.hpp" + +#define YY_DECL yy::parser::symbol_type yylex() + +%} + +%% + +[ \n]+ {} +\+ { return yy::parser::make_PLUS(); } +\* { return yy::parser::make_TIMES(); } +- { return yy::parser::make_MINUS(); } +\/ { return yy::parser::make_DIVIDE(); } +[0-9]+ { return yy::parser::make_INT(atoi(yytext)); } +defn { return yy::parser::make_DEFN(); } +data { return yy::parser::make_DATA(); } +case { return yy::parser::make_CASE(); } +of { return yy::parser::make_OF(); } +\{ { return yy::parser::make_OCURLY(); } +\} { return yy::parser::make_CCURLY(); } +\( { return yy::parser::make_OPAREN(); } +\) { return yy::parser::make_CPAREN(); } +, { return yy::parser::make_COMMA(); } +-> { return yy::parser::make_ARROW(); } += { return yy::parser::make_EQUAL(); } +[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext)); } +[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext)); } + +%% diff --git a/09/type.cpp b/09/type.cpp new file mode 100644 index 0000000..f5868d5 --- /dev/null +++ b/09/type.cpp @@ -0,0 +1,99 @@ +#include "type.hpp" +#include +#include +#include "error.hpp" + +void type_var::print(const type_mgr& mgr, std::ostream& to) const { + auto it = mgr.types.find(name); + if(it != mgr.types.end()) { + it->second->print(mgr, to); + } else { + to << name; + } +} + +void type_base::print(const type_mgr& mgr, std::ostream& to) const { + to << name; +} + +void type_arr::print(const type_mgr& mgr, std::ostream& to) const { + left->print(mgr, to); + to << " -> ("; + right->print(mgr, to); + to << ")"; +} + +std::string type_mgr::new_type_name() { + int temp = last_id++; + std::string str = ""; + + while(temp != -1) { + str += (char) ('a' + (temp % 26)); + temp = temp / 26 - 1; + } + + std::reverse(str.begin(), str.end()); + return str; +} + +type_ptr type_mgr::new_type() { + return type_ptr(new type_var(new_type_name())); +} + +type_ptr type_mgr::new_arrow_type() { + return type_ptr(new type_arr(new_type(), new_type())); +} + +type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const { + type_var* cast; + + var = nullptr; + while((cast = dynamic_cast(t.get()))) { + auto it = types.find(cast->name); + + if(it == types.end()) { + var = cast; + break; + } + t = it->second; + } + + return t; +} + +void type_mgr::unify(type_ptr l, type_ptr r) { + type_var* lvar; + type_var* rvar; + type_arr* larr; + type_arr* rarr; + type_base* lid; + type_base* rid; + + l = resolve(l, lvar); + r = resolve(r, rvar); + + if(lvar) { + bind(lvar->name, r); + return; + } else if(rvar) { + bind(rvar->name, l); + return; + } else if((larr = dynamic_cast(l.get())) && + (rarr = dynamic_cast(r.get()))) { + unify(larr->left, rarr->left); + unify(larr->right, rarr->right); + return; + } else if((lid = dynamic_cast(l.get())) && + (rid = dynamic_cast(r.get()))) { + if(lid->name == rid->name) return; + } + + throw unification_error(l, r); +} + +void type_mgr::bind(const std::string& s, type_ptr t) { + type_var* other = dynamic_cast(t.get()); + + if(other && other->name == s) return; + types[s] = t; +} diff --git a/09/type.hpp b/09/type.hpp new file mode 100644 index 0000000..09e525f --- /dev/null +++ b/09/type.hpp @@ -0,0 +1,65 @@ +#pragma once +#include +#include + +struct type_mgr; + +struct type { + virtual ~type() = default; + + virtual void print(const type_mgr& mgr, std::ostream& to) const = 0; +}; + +using type_ptr = std::shared_ptr; + +struct type_var : public type { + std::string name; + + type_var(std::string n) + : name(std::move(n)) {} + + void print(const type_mgr& mgr, std::ostream& to) const; +}; + +struct type_base : public type { + std::string name; + + type_base(std::string n) + : name(std::move(n)) {} + + void print(const type_mgr& mgr, std::ostream& to) const; +}; + +struct type_data : public type_base { + struct constructor { + int tag; + }; + + std::map constructors; + + type_data(std::string n) + : type_base(std::move(n)) {} +}; + +struct type_arr : public type { + type_ptr left; + type_ptr right; + + type_arr(type_ptr l, type_ptr r) + : left(std::move(l)), right(std::move(r)) {} + + void print(const type_mgr& mgr, std::ostream& to) const; +}; + +struct type_mgr { + int last_id = 0; + std::map types; + + std::string new_type_name(); + type_ptr new_type(); + type_ptr new_arrow_type(); + + void unify(type_ptr l, type_ptr r); + type_ptr resolve(type_ptr t, type_var*& var) const; + void bind(const std::string& s, type_ptr t); +}; diff --git a/09/type_env.cpp b/09/type_env.cpp new file mode 100644 index 0000000..c11a759 --- /dev/null +++ b/09/type_env.cpp @@ -0,0 +1,16 @@ +#include "type_env.hpp" + +type_ptr type_env::lookup(const std::string& name) const { + auto it = names.find(name); + if(it != names.end()) return it->second; + if(parent) return parent->lookup(name); + return nullptr; +} + +void type_env::bind(const std::string& name, type_ptr t) { + names[name] = t; +} + +type_env type_env::scope() const { + return type_env(this); +} diff --git a/09/type_env.hpp b/09/type_env.hpp new file mode 100644 index 0000000..6470bdd --- /dev/null +++ b/09/type_env.hpp @@ -0,0 +1,16 @@ +#pragma once +#include +#include "type.hpp" + +struct type_env { + std::map names; + type_env const* parent = nullptr; + + type_env(type_env const* p) + : parent(p) {} + type_env() : type_env(nullptr) {} + + type_ptr lookup(const std::string& name) const; + void bind(const std::string& name, type_ptr t); + type_env scope() const; +};