diff --git a/code/compiler/11/CMakeLists.txt b/code/compiler/11/CMakeLists.txt new file mode 100644 index 0000000..a16b8f9 --- /dev/null +++ b/code/compiler/11/CMakeLists.txt @@ -0,0 +1,43 @@ +cmake_minimum_required(VERSION 3.1) +project(compiler) + +# Find all the required packages +find_package(BISON) +find_package(FLEX) +find_package(LLVM REQUIRED CONFIG) + +# Set up the flex and bison targets +bison_target(parser + ${CMAKE_CURRENT_SOURCE_DIR}/parser.y + ${CMAKE_CURRENT_BINARY_DIR}/parser.cpp + COMPILE_FLAGS "-d") +flex_target(scanner + ${CMAKE_CURRENT_SOURCE_DIR}/scanner.l + ${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp) +add_flex_bison_dependency(scanner parser) + +# Find all the relevant LLVM components +llvm_map_components_to_libnames(LLVM_LIBS core x86asmparser x86codegen) + +# Create compiler executable +add_executable(compiler + ast.cpp ast.hpp definition.cpp + llvm_context.cpp llvm_context.hpp + type_env.cpp type_env.hpp + env.cpp env.hpp + type.cpp type.hpp + error.cpp error.hpp + binop.cpp binop.hpp + instruction.cpp instruction.hpp + graph.cpp graph.hpp + ${BISON_parser_OUTPUTS} + ${FLEX_scanner_OUTPUTS} + main.cpp +) + +# Configure compiler executable +target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) +target_include_directories(compiler PUBLIC ${LLVM_INCLUDE_DIRS}) +target_compile_definitions(compiler PUBLIC ${LLVM_DEFINITIONS}) +target_link_libraries(compiler ${LLVM_LIBS}) diff --git a/code/compiler/11/ast.cpp b/code/compiler/11/ast.cpp new file mode 100644 index 0000000..b55c347 --- /dev/null +++ b/code/compiler/11/ast.cpp @@ -0,0 +1,266 @@ +#include "ast.hpp" +#include +#include "binop.hpp" +#include "error.hpp" +#include "type_env.hpp" + +static void print_indent(int n, std::ostream& to) { + while(n--) to << " "; +} + +void ast_int::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "INT: " << value << std::endl; +} + +void ast_int::find_free(type_mgr& mgr, type_env_ptr& env, std::set& into) { + this->env = env; +} + +type_ptr ast_int::typecheck(type_mgr& mgr) { + return type_ptr(new type_base("Int")); +} + +void ast_int::compile(const env_ptr& env, std::vector& into) const { + into.push_back(instruction_ptr(new instruction_pushint(value))); +} + +void ast_lid::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "LID: " << id << std::endl; +} + +void ast_lid::find_free(type_mgr& mgr, type_env_ptr& env, std::set& into) { + this->env = env; + if(env->lookup(id) == nullptr) into.insert(id); +} + +type_ptr ast_lid::typecheck(type_mgr& mgr) { + return env->lookup(id)->instantiate(mgr); +} + +void ast_lid::compile(const env_ptr& env, std::vector& into) const { + into.push_back(instruction_ptr( + env->has_variable(id) ? + (instruction*) new instruction_push(env->get_offset(id)) : + (instruction*) new instruction_pushglobal(id))); +} + +void ast_uid::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "UID: " << id << std::endl; +} + +void ast_uid::find_free(type_mgr& mgr, type_env_ptr& env, std::set& into) { + this->env = env; +} + +type_ptr ast_uid::typecheck(type_mgr& mgr) { + return env->lookup(id)->instantiate(mgr); +} + +void ast_uid::compile(const env_ptr& env, std::vector& into) const { + into.push_back(instruction_ptr(new instruction_pushglobal(id))); +} + +void ast_binop::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "BINOP: " << op_name(op) << std::endl; + left->print(indent + 1, to); + right->print(indent + 1, to); +} + +void ast_binop::find_free(type_mgr& mgr, type_env_ptr& env, std::set& into) { + this->env = env; + left->find_free(mgr, env, into); + right->find_free(mgr, env, into); +} + +type_ptr ast_binop::typecheck(type_mgr& mgr) { + type_ptr ltype = left->typecheck(mgr); + type_ptr rtype = right->typecheck(mgr); + type_ptr ftype = env->lookup(op_name(op))->instantiate(mgr); + if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op)); + + type_ptr return_type = mgr.new_type(); + type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type)); + type_ptr arrow_two = type_ptr(new type_arr(ltype, arrow_one)); + + mgr.unify(arrow_two, ftype); + return return_type; +} + +void ast_binop::compile(const env_ptr& env, std::vector& into) const { + right->compile(env, into); + left->compile(env_ptr(new env_offset(1, env)), into); + + into.push_back(instruction_ptr(new instruction_pushglobal(op_action(op)))); + into.push_back(instruction_ptr(new instruction_mkapp())); + into.push_back(instruction_ptr(new instruction_mkapp())); +} + +void ast_app::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "APP:" << std::endl; + left->print(indent + 1, to); + right->print(indent + 1, to); +} + +void ast_app::find_free(type_mgr& mgr, type_env_ptr& env, std::set& into) { + this->env = env; + left->find_free(mgr, env, into); + right->find_free(mgr, env, into); +} + +type_ptr ast_app::typecheck(type_mgr& mgr) { + type_ptr ltype = left->typecheck(mgr); + type_ptr rtype = right->typecheck(mgr); + + type_ptr return_type = mgr.new_type(); + type_ptr arrow = type_ptr(new type_arr(rtype, return_type)); + mgr.unify(arrow, ltype); + return return_type; +} + +void ast_app::compile(const env_ptr& env, std::vector& into) const { + right->compile(env, into); + left->compile(env_ptr(new env_offset(1, env)), into); + into.push_back(instruction_ptr(new instruction_mkapp())); +} + +void ast_case::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "CASE: " << std::endl; + for(auto& branch : branches) { + print_indent(indent + 1, to); + branch->pat->print(to); + to << std::endl; + branch->expr->print(indent + 2, to); + } +} + +void ast_case::find_free(type_mgr& mgr, type_env_ptr& env, std::set& into) { + this->env = env; + of->find_free(mgr, env, into); + for(auto& branch : branches) { + type_env_ptr new_env = type_scope(env); + branch->pat->insert_bindings(mgr, new_env); + branch->expr->find_free(mgr, new_env, into); + } +} + +type_ptr ast_case::typecheck(type_mgr& mgr) { + type_var* var; + type_ptr case_type = mgr.resolve(of->typecheck(mgr), var); + type_ptr branch_type = mgr.new_type(); + + for(auto& branch : branches) { + branch->pat->typecheck(case_type, mgr, branch->expr->env); + type_ptr curr_branch_type = branch->expr->typecheck(mgr); + mgr.unify(branch_type, curr_branch_type); + } + + input_type = mgr.resolve(case_type, var); + if(!dynamic_cast(input_type.get())) { + throw type_error("attempting case analysis of non-data type"); + } + + return branch_type; +} + +void ast_case::compile(const env_ptr& env, std::vector& into) const { + type_data* type = dynamic_cast(input_type.get()); + + of->compile(env, into); + into.push_back(instruction_ptr(new instruction_eval())); + + instruction_jump* jump_instruction = new instruction_jump(); + into.push_back(instruction_ptr(jump_instruction)); + for(auto& branch : branches) { + std::vector branch_instructions; + pattern_var* vpat; + pattern_constr* cpat; + + if((vpat = dynamic_cast(branch->pat.get()))) { + branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions); + + for(auto& constr_pair : type->constructors) { + if(jump_instruction->tag_mappings.find(constr_pair.second.tag) != + jump_instruction->tag_mappings.end()) + break; + + jump_instruction->tag_mappings[constr_pair.second.tag] = + jump_instruction->branches.size(); + } + jump_instruction->branches.push_back(std::move(branch_instructions)); + } else if((cpat = dynamic_cast(branch->pat.get()))) { + env_ptr new_env = env; + for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) { + new_env = env_ptr(new env_var(*it, new_env)); + } + + branch_instructions.push_back(instruction_ptr(new instruction_split( + cpat->params.size()))); + branch->expr->compile(new_env, branch_instructions); + branch_instructions.push_back(instruction_ptr(new instruction_slide( + cpat->params.size()))); + + int new_tag = type->constructors[cpat->constr].tag; + if(jump_instruction->tag_mappings.find(new_tag) != + jump_instruction->tag_mappings.end()) + throw type_error("technically not a type error: duplicate pattern"); + + jump_instruction->tag_mappings[new_tag] = + jump_instruction->branches.size(); + jump_instruction->branches.push_back(std::move(branch_instructions)); + } + } + + for(auto& constr_pair : type->constructors) { + if(jump_instruction->tag_mappings.find(constr_pair.second.tag) == + jump_instruction->tag_mappings.end()) + throw type_error("non-total pattern"); + } +} + +void pattern_var::print(std::ostream& to) const { + to << var; +} + +void pattern_var::insert_bindings(type_mgr& mgr, type_env_ptr& env) const { + env->bind(var, mgr.new_type()); +} + +void pattern_var::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const { + mgr.unify(env->lookup(var)->instantiate(mgr), t); +} + +void pattern_constr::print(std::ostream& to) const { + to << constr; + for(auto& param : params) { + to << " " << param; + } +} + +void pattern_constr::insert_bindings(type_mgr& mgr, type_env_ptr& env) const { + for(auto& param : params) { + env->bind(param, mgr.new_type()); + } +} + +void pattern_constr::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const { + type_ptr constructor_type = env->lookup(constr)->instantiate(mgr); + if(!constructor_type) { + throw type_error(std::string("pattern using unknown constructor ") + constr); + } + + for(auto& param : params) { + type_arr* arr = dynamic_cast(constructor_type.get()); + if(!arr) throw type_error("too many parameters in constructor pattern"); + + mgr.unify(env->lookup(param)->instantiate(mgr), arr->left); + constructor_type = arr->right; + } + + mgr.unify(t, constructor_type); +} diff --git a/code/compiler/11/ast.hpp b/code/compiler/11/ast.hpp new file mode 100644 index 0000000..6c66636 --- /dev/null +++ b/code/compiler/11/ast.hpp @@ -0,0 +1,144 @@ +#pragma once +#include +#include +#include +#include "type.hpp" +#include "type_env.hpp" +#include "binop.hpp" +#include "instruction.hpp" +#include "env.hpp" + +struct ast { + type_env_ptr env; + + virtual ~ast() = default; + + virtual void print(int indent, std::ostream& to) const = 0; + virtual void find_free(type_mgr& mgr, + type_env_ptr& env, std::set& into) = 0; + virtual type_ptr typecheck(type_mgr& mgr) = 0; + virtual void compile(const env_ptr& env, + std::vector& into) const = 0; +}; + +using ast_ptr = std::unique_ptr; + +struct pattern { + virtual ~pattern() = default; + + virtual void print(std::ostream& to) const = 0; + virtual void insert_bindings(type_mgr& mgr, type_env_ptr& env) const = 0; + virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const = 0; +}; + +using pattern_ptr = std::unique_ptr; + +struct branch { + pattern_ptr pat; + ast_ptr expr; + + branch(pattern_ptr p, ast_ptr a) + : pat(std::move(p)), expr(std::move(a)) {} +}; + +using branch_ptr = std::unique_ptr; + +struct ast_int : public ast { + int value; + + explicit ast_int(int v) + : value(v) {} + + void print(int indent, std::ostream& to) const; + void find_free(type_mgr& mgr, type_env_ptr& env, std::set& into); + type_ptr typecheck(type_mgr& mgr); + void compile(const env_ptr& env, std::vector& into) const; +}; + +struct ast_lid : public ast { + std::string id; + + explicit ast_lid(std::string i) + : id(std::move(i)) {} + + void print(int indent, std::ostream& to) const; + void find_free(type_mgr& mgr, type_env_ptr& env, std::set& into); + type_ptr typecheck(type_mgr& mgr); + void compile(const env_ptr& env, std::vector& into) const; +}; + +struct ast_uid : public ast { + std::string id; + + explicit ast_uid(std::string i) + : id(std::move(i)) {} + + void print(int indent, std::ostream& to) const; + void find_free(type_mgr& mgr, type_env_ptr& env, std::set& into); + type_ptr typecheck(type_mgr& mgr); + void compile(const env_ptr& env, std::vector& into) const; +}; + +struct ast_binop : public ast { + binop op; + ast_ptr left; + ast_ptr right; + + ast_binop(binop o, ast_ptr l, ast_ptr r) + : op(o), left(std::move(l)), right(std::move(r)) {} + + void print(int indent, std::ostream& to) const; + void find_free(type_mgr& mgr, type_env_ptr& env, std::set& into); + type_ptr typecheck(type_mgr& mgr); + void compile(const env_ptr& env, std::vector& into) const; +}; + +struct ast_app : public ast { + ast_ptr left; + ast_ptr right; + + ast_app(ast_ptr l, ast_ptr r) + : left(std::move(l)), right(std::move(r)) {} + + void print(int indent, std::ostream& to) const; + void find_free(type_mgr& mgr, type_env_ptr& env, std::set& into); + type_ptr typecheck(type_mgr& mgr); + void compile(const env_ptr& env, std::vector& into) const; +}; + +struct ast_case : public ast { + ast_ptr of; + type_ptr input_type; + std::vector branches; + + ast_case(ast_ptr o, std::vector b) + : of(std::move(o)), branches(std::move(b)) {} + + void print(int indent, std::ostream& to) const; + void find_free(type_mgr& mgr, type_env_ptr& env, std::set& into); + type_ptr typecheck(type_mgr& mgr); + void compile(const env_ptr& env, std::vector& into) const; +}; + +struct pattern_var : public pattern { + std::string var; + + pattern_var(std::string v) + : var(std::move(v)) {} + + void print(std::ostream &to) const; + void insert_bindings(type_mgr& mgr, type_env_ptr& env) const; + void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const; +}; + +struct pattern_constr : public pattern { + std::string constr; + std::vector params; + + pattern_constr(std::string c, std::vector p) + : constr(std::move(c)), params(std::move(p)) {} + + void print(std::ostream &to) const; + virtual void insert_bindings(type_mgr& mgr, type_env_ptr& env) const; + virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const; +}; diff --git a/code/compiler/11/binop.cpp b/code/compiler/11/binop.cpp new file mode 100644 index 0000000..3a5f0ca --- /dev/null +++ b/code/compiler/11/binop.cpp @@ -0,0 +1,21 @@ +#include "binop.hpp" + +std::string op_name(binop op) { + switch(op) { + case PLUS: return "+"; + case MINUS: return "-"; + case TIMES: return "*"; + case DIVIDE: return "/"; + } + return "??"; +} + +std::string op_action(binop op) { + switch(op) { + case PLUS: return "plus"; + case MINUS: return "minus"; + case TIMES: return "times"; + case DIVIDE: return "divide"; + } + return "??"; +} diff --git a/code/compiler/11/binop.hpp b/code/compiler/11/binop.hpp new file mode 100644 index 0000000..8d07858 --- /dev/null +++ b/code/compiler/11/binop.hpp @@ -0,0 +1,12 @@ +#pragma once +#include + +enum binop { + PLUS, + MINUS, + TIMES, + DIVIDE +}; + +std::string op_name(binop op); +std::string op_action(binop op); diff --git a/code/compiler/11/definition.cpp b/code/compiler/11/definition.cpp new file mode 100644 index 0000000..91b0178 --- /dev/null +++ b/code/compiler/11/definition.cpp @@ -0,0 +1,99 @@ +#include "definition.hpp" +#include "error.hpp" +#include "ast.hpp" +#include "instruction.hpp" +#include "llvm_context.hpp" +#include "type.hpp" +#include "type_env.hpp" +#include +#include +#include + +void definition_defn::find_free(type_mgr& mgr, type_env_ptr& env) { + this->env = env; + + var_env = type_scope(env); + return_type = mgr.new_type(); + full_type = return_type; + + for(auto it = params.rbegin(); it != params.rend(); it++) { + type_ptr param_type = mgr.new_type(); + full_type = type_ptr(new type_arr(param_type, full_type)); + var_env->bind(*it, param_type); + } + + body->find_free(mgr, var_env, free_variables); +} + +void definition_defn::insert_types(type_mgr& mgr) { + env->bind(name, full_type); +} + +void definition_defn::typecheck(type_mgr& mgr) { + type_ptr body_type = body->typecheck(mgr); + mgr.unify(return_type, body_type); +} + +void definition_defn::compile() { + env_ptr new_env = env_ptr(new env_offset(0, nullptr)); + for(auto it = params.rbegin(); it != params.rend(); it++) { + new_env = env_ptr(new env_var(*it, new_env)); + } + body->compile(new_env, instructions); + instructions.push_back(instruction_ptr(new instruction_update(params.size()))); + instructions.push_back(instruction_ptr(new instruction_pop(params.size()))); +} + +void definition_defn::declare_llvm(llvm_context& ctx) { + generated_function = ctx.create_custom_function(name, params.size()); +} + +void definition_defn::generate_llvm(llvm_context& ctx) { + ctx.builder.SetInsertPoint(&generated_function->getEntryBlock()); + for(auto& instruction : instructions) { + instruction->gen_llvm(ctx, generated_function); + } + ctx.builder.CreateRetVoid(); +} + +void definition_data::insert_types(type_mgr& mgr, type_env_ptr& env) { + this->env = env; + env->bind_type(name, type_ptr(new type_data(name))); +} + +void definition_data::insert_constructors() const { + type_ptr return_type = env->lookup_type(name); + type_data* this_type = static_cast(return_type.get()); + int next_tag = 0; + + for(auto& constructor : constructors) { + constructor->tag = next_tag; + this_type->constructors[constructor->name] = { next_tag++ }; + + type_ptr full_type = return_type; + for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) { + type_ptr type = env->lookup_type(*it); + if(!type) throw 0; + full_type = type_ptr(new type_arr(type, full_type)); + } + + env->bind(constructor->name, full_type); + } +} + +void definition_data::generate_llvm(llvm_context& ctx) { + for(auto& constructor : constructors) { + auto new_function = + ctx.create_custom_function(constructor->name, constructor->types.size()); + std::vector instructions; + instructions.push_back(instruction_ptr( + new instruction_pack(constructor->tag, constructor->types.size()) + )); + instructions.push_back(instruction_ptr(new instruction_update(0))); + ctx.builder.SetInsertPoint(&new_function->getEntryBlock()); + for (auto& instruction : instructions) { + instruction->gen_llvm(ctx, new_function); + } + ctx.builder.CreateRetVoid(); + } +} diff --git a/code/compiler/11/definition.hpp b/code/compiler/11/definition.hpp new file mode 100644 index 0000000..b72bed6 --- /dev/null +++ b/code/compiler/11/definition.hpp @@ -0,0 +1,67 @@ +#pragma once +#include +#include +#include +#include "instruction.hpp" +#include "llvm_context.hpp" +#include "type_env.hpp" + +struct ast; +using ast_ptr = std::unique_ptr; + +struct constructor { + std::string name; + std::vector types; + int8_t tag; + + constructor(std::string n, std::vector ts) + : name(std::move(n)), types(std::move(ts)) {} +}; + +using constructor_ptr = std::unique_ptr; + +struct definition_defn { + std::string name; + std::vector params; + ast_ptr body; + + type_env_ptr env; + type_env_ptr var_env; + std::set free_variables; + type_ptr full_type; + type_ptr return_type; + + std::vector instructions; + + llvm::Function* generated_function; + + definition_defn(std::string n, std::vector p, ast_ptr b) + : name(std::move(n)), params(std::move(p)), body(std::move(b)) { + + } + + void find_free(type_mgr& mgr, type_env_ptr& env); + void insert_types(type_mgr& mgr); + void typecheck(type_mgr& mgr); + void compile(); + void declare_llvm(llvm_context& ctx); + void generate_llvm(llvm_context& ctx); +}; + +using definition_defn_ptr = std::unique_ptr; + +struct definition_data { + std::string name; + std::vector constructors; + + type_env_ptr env; + + definition_data(std::string n, std::vector cs) + : name(std::move(n)), constructors(std::move(cs)) {} + + void insert_types(type_mgr& mgr, type_env_ptr& env); + void insert_constructors() const; + void generate_llvm(llvm_context& ctx); +}; + +using definition_data_ptr = std::unique_ptr; diff --git a/code/compiler/11/env.cpp b/code/compiler/11/env.cpp new file mode 100644 index 0000000..818cf5d --- /dev/null +++ b/code/compiler/11/env.cpp @@ -0,0 +1,23 @@ +#include "env.hpp" + +int env_var::get_offset(const std::string& name) const { + if(name == this->name) return 0; + if(parent) return parent->get_offset(name) + 1; + throw 0; +} + +bool env_var::has_variable(const std::string& name) const { + if(name == this->name) return true; + if(parent) return parent->has_variable(name); + return false; +} + +int env_offset::get_offset(const std::string& name) const { + if(parent) return parent->get_offset(name) + offset; + throw 0; +} + +bool env_offset::has_variable(const std::string& name) const { + if(parent) return parent->has_variable(name); + return false; +} diff --git a/code/compiler/11/env.hpp b/code/compiler/11/env.hpp new file mode 100644 index 0000000..a8fbbec --- /dev/null +++ b/code/compiler/11/env.hpp @@ -0,0 +1,34 @@ +#pragma once +#include +#include + +struct env { + virtual ~env() = default; + + virtual int get_offset(const std::string& name) const = 0; + virtual bool has_variable(const std::string& name) const = 0; +}; + +using env_ptr = std::shared_ptr; + +struct env_var : public env { + std::string name; + env_ptr parent; + + env_var(std::string& n, env_ptr p) + : name(std::move(n)), parent(std::move(p)) {} + + int get_offset(const std::string& name) const; + bool has_variable(const std::string& name) const; +}; + +struct env_offset : public env { + int offset; + env_ptr parent; + + env_offset(int o, env_ptr p) + : offset(o), parent(std::move(p)) {} + + int get_offset(const std::string& name) const; + bool has_variable(const std::string& name) const; +}; diff --git a/code/compiler/11/error.cpp b/code/compiler/11/error.cpp new file mode 100644 index 0000000..f5125e3 --- /dev/null +++ b/code/compiler/11/error.cpp @@ -0,0 +1,5 @@ +#include "error.hpp" + +const char* type_error::what() const noexcept { + return "an error occured while checking the types of the program"; +} diff --git a/code/compiler/11/error.hpp b/code/compiler/11/error.hpp new file mode 100644 index 0000000..5bfbc7e --- /dev/null +++ b/code/compiler/11/error.hpp @@ -0,0 +1,21 @@ +#pragma once +#include +#include "type.hpp" + +struct type_error : std::exception { + std::string description; + + type_error(std::string d) + : description(std::move(d)) {} + + const char* what() const noexcept override; +}; + +struct unification_error : public type_error { + type_ptr left; + type_ptr right; + + unification_error(type_ptr l, type_ptr r) + : left(std::move(l)), right(std::move(r)), + type_error("failed to unify types") {} +}; diff --git a/code/compiler/11/examples/bad1.txt b/code/compiler/11/examples/bad1.txt new file mode 100644 index 0000000..86d4bc4 --- /dev/null +++ b/code/compiler/11/examples/bad1.txt @@ -0,0 +1,2 @@ +data Bool = { True, False } +defn main = { 3 + True } diff --git a/code/compiler/11/examples/bad2.txt b/code/compiler/11/examples/bad2.txt new file mode 100644 index 0000000..def8785 --- /dev/null +++ b/code/compiler/11/examples/bad2.txt @@ -0,0 +1 @@ +defn main = { 1 2 3 4 5 } diff --git a/code/compiler/11/examples/bad3.txt b/code/compiler/11/examples/bad3.txt new file mode 100644 index 0000000..6f82b3d --- /dev/null +++ b/code/compiler/11/examples/bad3.txt @@ -0,0 +1,8 @@ +data List = { Nil, Cons Int List } + +defn head l = { + case l of { + Nil -> { 0 } + Cons x y z -> { x } + } +} diff --git a/code/compiler/11/examples/if.txt b/code/compiler/11/examples/if.txt new file mode 100644 index 0000000..c00df35 --- /dev/null +++ b/code/compiler/11/examples/if.txt @@ -0,0 +1,8 @@ +data Bool = { True, False } +defn if c t e = { + case c of { + True -> { t } + False -> { e } + } +} +defn main = { if (if True False True) 11 3 } diff --git a/code/compiler/11/examples/mutual_recursion.txt b/code/compiler/11/examples/mutual_recursion.txt new file mode 100644 index 0000000..7fabf43 --- /dev/null +++ b/code/compiler/11/examples/mutual_recursion.txt @@ -0,0 +1,25 @@ +data Bool = { True, False } +data List = { Nil, Cons Int List } + +defn if c t e = { + case c of { + True -> { t } + False -> { e } + } +} + +defn oddEven l e = { + case l of { + Nil -> { e } + Cons x xs -> { evenOdd xs e } + } +} + +defn evenOdd l e = { + case l of { + Nil -> { e } + Cons x xs -> { oddEven xs e } + } +} + +defn main = { if (oddEven (Cons 1 (Cons 2 (Cons 3 Nil))) True) (oddEven (Cons 1 (Cons 2 (Cons 3 Nil))) 1) 3 } diff --git a/code/compiler/11/examples/primes.txt b/code/compiler/11/examples/primes.txt new file mode 100644 index 0000000..8421849 --- /dev/null +++ b/code/compiler/11/examples/primes.txt @@ -0,0 +1,122 @@ +data List = { Nil, Cons Nat List } +data Bool = { True, False } +data Nat = { O, S Nat } + +defn if c t e = { + case c of { + True -> { t } + False -> { e } + } +} + +defn toInt n = { + case n of { + O -> { 0 } + S np -> { 1 + toInt np } + } +} + +defn lte n m = { + case m of { + O -> { + case n of { + O -> { True } + S np -> { False } + } + } + S mp -> { + case n of { + O -> { True } + S np -> { lte np mp } + } + } + } +} + +defn minus n m = { + case m of { + O -> { n } + S mp -> { + case n of { + O -> { O } + S np -> { + minus np mp + } + } + } + } +} + +defn mod n m = { + if (lte m n) (mod (minus n m) m) n +} + +defn notDivisibleBy n m = { + case (mod m n) of { + O -> { False } + S mp -> { True } + } +} + +defn filter f l = { + case l of { + Nil -> { Nil } + Cons x xs -> { if (f x) (Cons x (filter f xs)) (filter f xs) } + } +} + +defn map f l = { + case l of { + Nil -> { Nil } + Cons x xs -> { Cons (f x) (map f xs) } + } +} + +defn nats = { + Cons (S (S O)) (map S nats) +} + +defn primesRec l = { + case l of { + Nil -> { Nil } + Cons p xs -> { Cons p (primesRec (filter (notDivisibleBy p) xs)) } + } +} + +defn primes = { + primesRec nats +} + +defn take n l = { + case l of { + Nil -> { Nil } + Cons x xs -> { + case n of { + O -> { Nil } + S np -> { Cons x (take np xs) } + } + } + } +} + +defn head l = { + case l of { + Nil -> { O } + Cons x xs -> { x } + } +} + +defn reverseAcc a l = { + case l of { + Nil -> { a } + Cons x xs -> { reverseAcc (Cons x a) xs } + } +} + +defn reverse l = { + reverseAcc Nil l +} + +defn main = { + toInt (head (reverse (take ((S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S O))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) primes))) +} diff --git a/code/compiler/11/examples/runtime1.c b/code/compiler/11/examples/runtime1.c new file mode 100644 index 0000000..0157051 --- /dev/null +++ b/code/compiler/11/examples/runtime1.c @@ -0,0 +1,31 @@ +#include "../runtime.h" + +void f_add(struct stack* s) { + struct node_num* left = (struct node_num*) eval(stack_peek(s, 0)); + struct node_num* right = (struct node_num*) eval(stack_peek(s, 1)); + stack_push(s, (struct node_base*) alloc_num(left->value + right->value)); +} + +void f_main(struct stack* s) { + // PushInt 320 + stack_push(s, (struct node_base*) alloc_num(320)); + + // PushInt 6 + stack_push(s, (struct node_base*) alloc_num(6)); + + // PushGlobal f_add (the function for +) + stack_push(s, (struct node_base*) alloc_global(f_add, 2)); + + struct node_base* left; + struct node_base* right; + + // MkApp + left = stack_pop(s); + right = stack_pop(s); + stack_push(s, (struct node_base*) alloc_app(left, right)); + + // MkApp + left = stack_pop(s); + right = stack_pop(s); + stack_push(s, (struct node_base*) alloc_app(left, right)); +} diff --git a/code/compiler/11/examples/works1.txt b/code/compiler/11/examples/works1.txt new file mode 100644 index 0000000..a2d647f --- /dev/null +++ b/code/compiler/11/examples/works1.txt @@ -0,0 +1,2 @@ +defn main = { sum 320 6 } +defn sum x y = { x + y } diff --git a/code/compiler/11/examples/works2.txt b/code/compiler/11/examples/works2.txt new file mode 100644 index 0000000..8332fde --- /dev/null +++ b/code/compiler/11/examples/works2.txt @@ -0,0 +1,3 @@ +defn add x y = { x + y } +defn double x = { add x x } +defn main = { double 163 } diff --git a/code/compiler/11/examples/works3.txt b/code/compiler/11/examples/works3.txt new file mode 100644 index 0000000..2123089 --- /dev/null +++ b/code/compiler/11/examples/works3.txt @@ -0,0 +1,9 @@ +data List = { Nil, Cons Int List } +data Bool = { True, False } +defn length l = { + case l of { + Nil -> { 0 } + Cons x xs -> { 1 + length xs } + } +} +defn main = { length (Cons True (Cons False (Cons True Nil))) } diff --git a/code/compiler/11/examples/works4.txt b/code/compiler/11/examples/works4.txt new file mode 100644 index 0000000..7674691 --- /dev/null +++ b/code/compiler/11/examples/works4.txt @@ -0,0 +1,16 @@ +data List = { Nil, Cons Int List } + +defn add x y = { x + y } +defn mul x y = { x * y } + +defn foldr f b l = { + case l of { + Nil -> { b } + Cons x xs -> { f x (foldr f b xs) } + } +} + +defn main = { + foldr add 0 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil)))) + + foldr mul 1 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil)))) +} diff --git a/code/compiler/11/examples/works5.txt b/code/compiler/11/examples/works5.txt new file mode 100644 index 0000000..3a8aff5 --- /dev/null +++ b/code/compiler/11/examples/works5.txt @@ -0,0 +1,17 @@ +data List = { Nil, Cons Int List } + +defn sumZip l m = { + case l of { + Nil -> { 0 } + Cons x xs -> { + case m of { + Nil -> { 0 } + Cons y ys -> { x + y + sumZip xs ys } + } + } + } +} + +defn ones = { Cons 1 ones } + +defn main = { sumZip ones (Cons 1 (Cons 2 (Cons 3 Nil))) } diff --git a/code/compiler/11/graph.cpp b/code/compiler/11/graph.cpp new file mode 100644 index 0000000..c648acd --- /dev/null +++ b/code/compiler/11/graph.cpp @@ -0,0 +1,114 @@ +#include "graph.hpp" + +std::set function_graph::compute_transitive_edges() { + std::set transitive_edges; + transitive_edges.insert(edges.begin(), edges.end()); + for(auto& connector : adjacency_lists) { + for(auto& from : adjacency_lists) { + edge to_connector { from.first, connector.first }; + for(auto& to : adjacency_lists) { + edge full_jump { from.first, to.first }; + if(transitive_edges.find(full_jump) != transitive_edges.end()) continue; + + edge from_connector { connector.first, to.first }; + if(transitive_edges.find(to_connector) != transitive_edges.end() && + transitive_edges.find(from_connector) != transitive_edges.end()) + transitive_edges.insert(std::move(full_jump)); + } + } + } + return transitive_edges; +} + +void function_graph::create_groups( + const std::set& transitive_edges, + std::map& group_ids, + std::map& group_data_map) { + group_id id_counter = 0; + for(auto& vertex : adjacency_lists) { + if(group_ids.find(vertex.first) != group_ids.end()) + continue; + data_ptr new_group(new group_data); + new_group->functions.insert(vertex.first); + group_data_map[id_counter] = new_group; + group_ids[vertex.first] = id_counter; + for(auto& other_vertex : adjacency_lists) { + if(transitive_edges.find({vertex.first, other_vertex.first}) != transitive_edges.end() && + transitive_edges.find({other_vertex.first, vertex.first}) != transitive_edges.end()) { + group_ids[other_vertex.first] = id_counter; + new_group->functions.insert(other_vertex.first); + } + } + id_counter++; + } +} + +void function_graph::create_edges( + std::map& group_ids, + std::map& group_data_map) { + std::set> group_edges; + for(auto& vertex : adjacency_lists) { + auto vertex_id = group_ids[vertex.first]; + auto& vertex_data = group_data_map[vertex_id]; + for(auto& other_vertex : vertex.second) { + auto other_id = group_ids[other_vertex]; + if(vertex_id == other_id) continue; + if(group_edges.find({vertex_id, other_id}) != group_edges.end()) + continue; + group_edges.insert({vertex_id, other_id}); + vertex_data->adjacency_list.insert(other_id); + group_data_map[other_id]->indegree++; + } + } +} + +std::vector function_graph::generate_order( + std::map& group_ids, + std::map& group_data_map) { + std::queue id_queue; + std::vector output; + for(auto& group : group_data_map) { + if(group.second->indegree == 0) id_queue.push(group.first); + } + + while(!id_queue.empty()) { + auto new_id = id_queue.front(); + auto& group_data = group_data_map[new_id]; + group_ptr output_group(new group); + output_group->members = std::move(group_data->functions); + id_queue.pop(); + + for(auto& adjacent_group : group_data->adjacency_list) { + if(--group_data_map[adjacent_group]->indegree == 0) + id_queue.push(adjacent_group); + } + + output.push_back(std::move(output_group)); + } + + return output; +} + +std::set& function_graph::add_function(const function& f) { + auto adjacency_list_it = adjacency_lists.find(f); + if(adjacency_list_it != adjacency_lists.end()) { + return adjacency_list_it->second; + } else { + return adjacency_lists[f] = { }; + } +} + +void function_graph::add_edge(const function& from, const function& to) { + add_function(from).insert(to); + edges.insert({ from, to }); +} + +std::vector function_graph::compute_order() { + std::set transitive_edges = compute_transitive_edges(); + std::map group_ids; + std::map group_data_map; + + create_groups(transitive_edges, group_ids, group_data_map); + create_edges(group_ids, group_data_map); + return generate_order(group_ids, group_data_map); +} diff --git a/code/compiler/11/graph.hpp b/code/compiler/11/graph.hpp new file mode 100644 index 0000000..2db8d7c --- /dev/null +++ b/code/compiler/11/graph.hpp @@ -0,0 +1,52 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using function = std::string; + +struct group { + std::set members; +}; + +using group_ptr = std::unique_ptr; + +class function_graph { + using group_id = size_t; + + struct group_data { + std::set functions; + std::set adjacency_list; + size_t indegree; + }; + + using data_ptr = std::shared_ptr; + using edge = std::pair; + using group_edge = std::pair; + + std::map> adjacency_lists; + std::set edges; + + std::set compute_transitive_edges(); + void create_groups( + const std::set&, + std::map&, + std::map&); + void create_edges( + std::map&, + std::map&); + std::vector generate_order( + std::map&, + std::map&); + + public: + std::set& add_function(const function& f); + void add_edge(const function& from, const function& to); + std::vector compute_order(); +}; diff --git a/code/compiler/11/instruction.cpp b/code/compiler/11/instruction.cpp new file mode 100644 index 0000000..c2b050a --- /dev/null +++ b/code/compiler/11/instruction.cpp @@ -0,0 +1,177 @@ +#include "instruction.hpp" +#include "llvm_context.hpp" +#include +#include + +using namespace llvm; + +static void print_indent(int n, std::ostream& to) { + while(n--) to << " "; +} + +void instruction_pushint::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "PushInt(" << value << ")" << std::endl; +} + +void instruction_pushint::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_push(f, ctx.create_num(f, ctx.create_i32(value))); +} + +void instruction_pushglobal::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "PushGlobal(" << name << ")" << std::endl; +} + +void instruction_pushglobal::gen_llvm(llvm_context& ctx, Function* f) const { + auto& global_f = ctx.custom_functions.at("f_" + name); + auto arity = ctx.create_i32(global_f->arity); + ctx.create_push(f, ctx.create_global(f, global_f->function, arity)); +} + +void instruction_push::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Push(" << offset << ")" << std::endl; +} + +void instruction_push::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_push(f, ctx.create_peek(f, ctx.create_size(offset))); +} + +void instruction_pop::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Pop(" << count << ")" << std::endl; +} + +void instruction_pop::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_popn(f, ctx.create_size(count)); +} + +void instruction_mkapp::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "MkApp()" << std::endl; +} + +void instruction_mkapp::gen_llvm(llvm_context& ctx, Function* f) const { + auto left = ctx.create_pop(f); + auto right = ctx.create_pop(f); + ctx.create_push(f, ctx.create_app(f, left, right)); +} + +void instruction_update::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Update(" << offset << ")" << std::endl; +} + +void instruction_update::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_update(f, ctx.create_size(offset)); +} + +void instruction_pack::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Pack(" << tag << ", " << size << ")" << std::endl; +} + +void instruction_pack::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_pack(f, ctx.create_size(size), ctx.create_i8(tag)); +} + +void instruction_split::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Split()" << std::endl; +} + +void instruction_split::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_split(f, ctx.create_size(size)); +} + +void instruction_jump::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Jump(" << std::endl; + for(auto& instruction_set : branches) { + for(auto& instruction : instruction_set) { + instruction->print(indent + 2, to); + } + to << std::endl; + } + print_indent(indent, to); + to << ")" << std::endl; +} + +void instruction_jump::gen_llvm(llvm_context& ctx, Function* f) const { + auto top_node = ctx.create_peek(f, ctx.create_size(0)); + auto tag = ctx.unwrap_data_tag(top_node); + auto safety_block = BasicBlock::Create(ctx.ctx, "safety", f); + auto switch_op = ctx.builder.CreateSwitch(tag, safety_block, tag_mappings.size()); + std::vector blocks; + + for(auto& branch : branches) { + auto branch_block = BasicBlock::Create(ctx.ctx, "branch", f); + ctx.builder.SetInsertPoint(branch_block); + for(auto& instruction : branch) { + instruction->gen_llvm(ctx, f); + } + ctx.builder.CreateBr(safety_block); + blocks.push_back(branch_block); + } + + for(auto& mapping : tag_mappings) { + switch_op->addCase(ctx.create_i8(mapping.first), blocks[mapping.second]); + } + + ctx.builder.SetInsertPoint(safety_block); +} + +void instruction_slide::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Slide(" << offset << ")" << std::endl; +} + +void instruction_slide::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_slide(f, ctx.create_size(offset)); +} + +void instruction_binop::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "BinOp(" << op_action(op) << ")" << std::endl; +} + +void instruction_binop::gen_llvm(llvm_context& ctx, Function* f) const { + auto left_int = ctx.unwrap_num(ctx.create_pop(f)); + auto right_int = ctx.unwrap_num(ctx.create_pop(f)); + llvm::Value* result; + switch(op) { + case PLUS: result = ctx.builder.CreateAdd(left_int, right_int); break; + case MINUS: result = ctx.builder.CreateSub(left_int, right_int); break; + case TIMES: result = ctx.builder.CreateMul(left_int, right_int); break; + case DIVIDE: result = ctx.builder.CreateSDiv(left_int, right_int); break; + } + ctx.create_push(f, ctx.create_num(f, result)); +} + +void instruction_eval::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Eval()" << std::endl; +} + +void instruction_eval::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_unwind(f); +} + +void instruction_alloc::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Alloc(" << amount << ")" << std::endl; +} + +void instruction_alloc::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_alloc(f, ctx.create_size(amount)); +} + +void instruction_unwind::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Unwind()" << std::endl; +} + +void instruction_unwind::gen_llvm(llvm_context& ctx, Function* f) const { + // Nothing +} diff --git a/code/compiler/11/instruction.hpp b/code/compiler/11/instruction.hpp new file mode 100644 index 0000000..abe2409 --- /dev/null +++ b/code/compiler/11/instruction.hpp @@ -0,0 +1,142 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include "binop.hpp" +#include "llvm_context.hpp" + +struct instruction { + virtual ~instruction() = default; + + virtual void print(int indent, std::ostream& to) const = 0; + virtual void gen_llvm(llvm_context& ctx, llvm::Function* f) const = 0; +}; + +using instruction_ptr = std::unique_ptr; + +struct instruction_pushint : public instruction { + int value; + + instruction_pushint(int v) + : value(v) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_pushglobal : public instruction { + std::string name; + + instruction_pushglobal(std::string n) + : name(std::move(n)) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_push : public instruction { + int offset; + + instruction_push(int o) + : offset(o) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_pop : public instruction { + int count; + + instruction_pop(int c) + : count(c) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_mkapp : public instruction { + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_update : public instruction { + int offset; + + instruction_update(int o) + : offset(o) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_pack : public instruction { + int tag; + int size; + + instruction_pack(int t, int s) + : tag(t), size(s) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_split : public instruction { + int size; + + instruction_split(int s) + : size(s) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_jump : public instruction { + std::vector> branches; + std::map tag_mappings; + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_slide : public instruction { + int offset; + + instruction_slide(int o) + : offset(o) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_binop : public instruction { + binop op; + + instruction_binop(binop o) + : op(o) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_eval : public instruction { + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_alloc : public instruction { + int amount; + + instruction_alloc(int a) + : amount(a) {} + + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; + +struct instruction_unwind : public instruction { + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; +}; diff --git a/code/compiler/11/llvm_context.cpp b/code/compiler/11/llvm_context.cpp new file mode 100644 index 0000000..45dcb50 --- /dev/null +++ b/code/compiler/11/llvm_context.cpp @@ -0,0 +1,278 @@ +#include "llvm_context.hpp" +#include + +using namespace llvm; + +void llvm_context::create_types() { + stack_type = StructType::create(ctx, "stack"); + gmachine_type = StructType::create(ctx, "gmachine"); + stack_ptr_type = PointerType::getUnqual(stack_type); + gmachine_ptr_type = PointerType::getUnqual(gmachine_type); + tag_type = IntegerType::getInt8Ty(ctx); + struct_types["node_base"] = StructType::create(ctx, "node_base"); + struct_types["node_app"] = StructType::create(ctx, "node_app"); + struct_types["node_num"] = StructType::create(ctx, "node_num"); + struct_types["node_global"] = StructType::create(ctx, "node_global"); + struct_types["node_ind"] = StructType::create(ctx, "node_ind"); + struct_types["node_data"] = StructType::create(ctx, "node_data"); + node_ptr_type = PointerType::getUnqual(struct_types.at("node_base")); + function_type = FunctionType::get(Type::getVoidTy(ctx), { gmachine_ptr_type }, false); + + gmachine_type->setBody( + stack_ptr_type, + node_ptr_type, + IntegerType::getInt64Ty(ctx), + IntegerType::getInt64Ty(ctx) + ); + struct_types.at("node_base")->setBody( + IntegerType::getInt32Ty(ctx), + IntegerType::getInt8Ty(ctx), + node_ptr_type + ); + struct_types.at("node_app")->setBody( + struct_types.at("node_base"), + node_ptr_type, + node_ptr_type + ); + struct_types.at("node_num")->setBody( + struct_types.at("node_base"), + IntegerType::getInt32Ty(ctx) + ); + struct_types.at("node_global")->setBody( + struct_types.at("node_base"), + FunctionType::get(Type::getVoidTy(ctx), { stack_ptr_type }, false) + ); + struct_types.at("node_ind")->setBody( + struct_types.at("node_base"), + node_ptr_type + ); + struct_types.at("node_data")->setBody( + struct_types.at("node_base"), + IntegerType::getInt8Ty(ctx), + PointerType::getUnqual(node_ptr_type) + ); +} + +void llvm_context::create_functions() { + auto void_type = Type::getVoidTy(ctx); + auto sizet_type = IntegerType::get(ctx, sizeof(size_t) * 8); + functions["stack_init"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_init", + &module + ); + functions["stack_free"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_free", + &module + ); + functions["stack_push"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type, node_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_push", + &module + ); + functions["stack_pop"] = Function::Create( + FunctionType::get(node_ptr_type, { stack_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_pop", + &module + ); + functions["stack_peek"] = Function::Create( + FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_peek", + &module + ); + functions["stack_popn"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_popn", + &module + ); + functions["gmachine_slide"] = Function::Create( + FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false), + Function::LinkageTypes::ExternalLinkage, + "gmachine_slide", + &module + ); + functions["gmachine_update"] = Function::Create( + FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false), + Function::LinkageTypes::ExternalLinkage, + "gmachine_update", + &module + ); + functions["gmachine_alloc"] = Function::Create( + FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false), + Function::LinkageTypes::ExternalLinkage, + "gmachine_alloc", + &module + ); + functions["gmachine_pack"] = Function::Create( + FunctionType::get(void_type, { gmachine_ptr_type, sizet_type, tag_type }, false), + Function::LinkageTypes::ExternalLinkage, + "gmachine_pack", + &module + ); + functions["gmachine_split"] = Function::Create( + FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false), + Function::LinkageTypes::ExternalLinkage, + "gmachine_split", + &module + ); + functions["gmachine_track"] = Function::Create( + FunctionType::get(node_ptr_type, { gmachine_ptr_type, node_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "gmachine_track", + &module + ); + + auto int32_type = IntegerType::getInt32Ty(ctx); + functions["alloc_app"] = Function::Create( + FunctionType::get(node_ptr_type, { node_ptr_type, node_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "alloc_app", + &module + ); + functions["alloc_num"] = Function::Create( + FunctionType::get(node_ptr_type, { int32_type }, false), + Function::LinkageTypes::ExternalLinkage, + "alloc_num", + &module + ); + functions["alloc_global"] = Function::Create( + FunctionType::get(node_ptr_type, { function_type, int32_type }, false), + Function::LinkageTypes::ExternalLinkage, + "alloc_global", + &module + ); + functions["alloc_ind"] = Function::Create( + FunctionType::get(node_ptr_type, { node_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "alloc_ind", + &module + ); + + functions["unwind"] = Function::Create( + FunctionType::get(void_type, { gmachine_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "unwind", + &module + ); +} + +ConstantInt* llvm_context::create_i8(int8_t i) { + return ConstantInt::get(ctx, APInt(8, i)); +} +ConstantInt* llvm_context::create_i32(int32_t i) { + return ConstantInt::get(ctx, APInt(32, i)); +} +ConstantInt* llvm_context::create_size(size_t i) { + return ConstantInt::get(ctx, APInt(sizeof(size_t) * 8, i)); +} + +Value* llvm_context::create_pop(Function* f) { + auto pop_f = functions.at("stack_pop"); + return builder.CreateCall(pop_f, { unwrap_gmachine_stack_ptr(f->arg_begin()) }); +} +Value* llvm_context::create_peek(Function* f, Value* off) { + auto peek_f = functions.at("stack_peek"); + return builder.CreateCall(peek_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), off }); +} +void llvm_context::create_push(Function* f, Value* v) { + auto push_f = functions.at("stack_push"); + builder.CreateCall(push_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), v }); +} +void llvm_context::create_popn(Function* f, Value* off) { + auto popn_f = functions.at("stack_popn"); + builder.CreateCall(popn_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), off }); +} +void llvm_context::create_update(Function* f, Value* off) { + auto update_f = functions.at("gmachine_update"); + builder.CreateCall(update_f, { f->arg_begin(), off }); +} +void llvm_context::create_pack(Function* f, Value* c, Value* t) { + auto pack_f = functions.at("gmachine_pack"); + builder.CreateCall(pack_f, { f->arg_begin(), c, t }); +} +void llvm_context::create_split(Function* f, Value* c) { + auto split_f = functions.at("gmachine_split"); + builder.CreateCall(split_f, { f->arg_begin(), c }); +} +void llvm_context::create_slide(Function* f, Value* off) { + auto slide_f = functions.at("gmachine_slide"); + builder.CreateCall(slide_f, { f->arg_begin(), off }); +} +void llvm_context::create_alloc(Function* f, Value* n) { + auto alloc_f = functions.at("gmachine_alloc"); + builder.CreateCall(alloc_f, { f->arg_begin(), n }); +} +Value* llvm_context::create_track(Function* f, Value* v) { + auto track_f = functions.at("gmachine_track"); + return builder.CreateCall(track_f, { f->arg_begin(), v }); +} + +void llvm_context::create_unwind(Function* f) { + auto unwind_f = functions.at("unwind"); + builder.CreateCall(unwind_f, { f->args().begin() }); +} + +Value* llvm_context::unwrap_gmachine_stack_ptr(Value* g) { + auto offset_0 = create_i32(0); + return builder.CreateGEP(g, { offset_0, offset_0 }); +} + +Value* llvm_context::unwrap_num(Value* v) { + auto num_ptr_type = PointerType::getUnqual(struct_types.at("node_num")); + auto cast = builder.CreatePointerCast(v, num_ptr_type); + auto offset_0 = create_i32(0); + auto offset_1 = create_i32(1); + auto int_ptr = builder.CreateGEP(cast, { offset_0, offset_1 }); + return builder.CreateLoad(int_ptr); +} +Value* llvm_context::create_num(Function* f, Value* v) { + auto alloc_num_f = functions.at("alloc_num"); + auto alloc_num_call = builder.CreateCall(alloc_num_f, { v }); + return create_track(f, alloc_num_call); +} + +Value* llvm_context::unwrap_data_tag(Value* v) { + auto data_ptr_type = PointerType::getUnqual(struct_types.at("node_data")); + auto cast = builder.CreatePointerCast(v, data_ptr_type); + auto offset_0 = create_i32(0); + auto offset_1 = create_i32(1); + auto tag_ptr = builder.CreateGEP(cast, { offset_0, offset_1 }); + return builder.CreateLoad(tag_ptr); +} + +Value* llvm_context::create_global(Function* f, Value* gf, Value* a) { + auto alloc_global_f = functions.at("alloc_global"); + auto alloc_global_call = builder.CreateCall(alloc_global_f, { gf, a }); + return create_track(f, alloc_global_call); +} + +Value* llvm_context::create_app(Function* f, Value* l, Value* r) { + auto alloc_app_f = functions.at("alloc_app"); + auto alloc_app_call = builder.CreateCall(alloc_app_f, { l, r }); + return create_track(f, alloc_app_call); +} + +llvm::Function* llvm_context::create_custom_function(std::string name, int32_t arity) { + auto void_type = llvm::Type::getVoidTy(ctx); + auto new_function = llvm::Function::Create( + function_type, + llvm::Function::LinkageTypes::ExternalLinkage, + "f_" + name, + &module + ); + auto start_block = llvm::BasicBlock::Create(ctx, "entry", new_function); + + auto new_custom_f = custom_function_ptr(new custom_function()); + new_custom_f->arity = arity; + new_custom_f->function = new_function; + custom_functions["f_" + name] = std::move(new_custom_f); + + return new_function; +} diff --git a/code/compiler/11/llvm_context.hpp b/code/compiler/11/llvm_context.hpp new file mode 100644 index 0000000..fbe4cc1 --- /dev/null +++ b/code/compiler/11/llvm_context.hpp @@ -0,0 +1,72 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include + +struct llvm_context { + struct custom_function { + llvm::Function* function; + int32_t arity; + }; + + using custom_function_ptr = std::unique_ptr; + + llvm::LLVMContext ctx; + llvm::IRBuilder<> builder; + llvm::Module module; + + std::map custom_functions; + std::map functions; + std::map struct_types; + + llvm::StructType* stack_type; + llvm::StructType* gmachine_type; + llvm::PointerType* stack_ptr_type; + llvm::PointerType* gmachine_ptr_type; + llvm::PointerType* node_ptr_type; + llvm::IntegerType* tag_type; + llvm::FunctionType* function_type; + + llvm_context() + : builder(ctx), module("bloglang", ctx) { + create_types(); + create_functions(); + } + + void create_types(); + void create_functions(); + + llvm::ConstantInt* create_i8(int8_t); + llvm::ConstantInt* create_i32(int32_t); + llvm::ConstantInt* create_size(size_t); + + llvm::Value* create_pop(llvm::Function*); + llvm::Value* create_peek(llvm::Function*, llvm::Value*); + void create_push(llvm::Function*, llvm::Value*); + void create_popn(llvm::Function*, llvm::Value*); + void create_update(llvm::Function*, llvm::Value*); + void create_pack(llvm::Function*, llvm::Value*, llvm::Value*); + void create_split(llvm::Function*, llvm::Value*); + void create_slide(llvm::Function*, llvm::Value*); + void create_alloc(llvm::Function*, llvm::Value*); + llvm::Value* create_track(llvm::Function*, llvm::Value*); + + void create_unwind(llvm::Function*); + + llvm::Value* unwrap_gmachine_stack_ptr(llvm::Value*); + + llvm::Value* unwrap_num(llvm::Value*); + llvm::Value* create_num(llvm::Function*, llvm::Value*); + + llvm::Value* unwrap_data_tag(llvm::Value*); + + llvm::Value* create_global(llvm::Function*, llvm::Value*, llvm::Value*); + + llvm::Value* create_app(llvm::Function*, llvm::Value*, llvm::Value*); + + llvm::Function* create_custom_function(std::string name, int32_t arity); +}; diff --git a/code/compiler/11/main.cpp b/code/compiler/11/main.cpp new file mode 100644 index 0000000..bc181d6 --- /dev/null +++ b/code/compiler/11/main.cpp @@ -0,0 +1,204 @@ +#include "ast.hpp" +#include +#include "binop.hpp" +#include "definition.hpp" +#include "graph.hpp" +#include "instruction.hpp" +#include "llvm_context.hpp" +#include "parser.hpp" +#include "error.hpp" +#include "type.hpp" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetMachine.h" + +void yy::parser::error(const std::string& msg) { + std::cout << "An error occured: " << msg << std::endl; +} + +extern std::map defs_data; +extern std::map defs_defn; + +void typecheck_program( + const std::map& defs_data, + const std::map& defs_defn, + type_mgr& mgr, type_env_ptr& env) { + type_ptr int_type = type_ptr(new type_base("Int")); + env->bind_type("Int", int_type); + + type_ptr binop_type = type_ptr(new type_arr( + int_type, + type_ptr(new type_arr(int_type, int_type)))); + env->bind("+", binop_type); + env->bind("-", binop_type); + env->bind("*", binop_type); + env->bind("/", binop_type); + + for(auto& def_data : defs_data) { + def_data.second->insert_types(mgr, env); + } + for(auto& def_data : defs_data) { + def_data.second->insert_constructors(); + } + + function_graph dependency_graph; + + for(auto& def_defn : defs_defn) { + def_defn.second->find_free(mgr, env); + dependency_graph.add_function(def_defn.second->name); + + for(auto& dependency : def_defn.second->free_variables) { + if(defs_defn.find(dependency) == defs_defn.end()) + throw 0; + dependency_graph.add_edge(def_defn.second->name, dependency); + } + } + + std::vector groups = dependency_graph.compute_order(); + for(auto it = groups.rbegin(); it != groups.rend(); it++) { + auto& group = *it; + for(auto& def_defnn_name : group->members) { + auto& def_defn = defs_defn.find(def_defnn_name)->second; + def_defn->insert_types(mgr); + } + for(auto& def_defnn_name : group->members) { + auto& def_defn = defs_defn.find(def_defnn_name)->second; + def_defn->typecheck(mgr); + } + for(auto& def_defnn_name : group->members) { + env->generalize(def_defnn_name, mgr); + } + } + + for(auto& pair : env->names) { + std::cout << pair.first << ": "; + pair.second->print(mgr, std::cout); + std::cout << std::endl; + } +} + +void compile_program(const std::map& defs_defn) { + for(auto& def_defn : defs_defn) { + def_defn.second->compile(); + + for(auto& instruction : def_defn.second->instructions) { + instruction->print(0, std::cout); + } + std::cout << std::endl; + } +} + +void gen_llvm_internal_op(llvm_context& ctx, binop op) { + auto new_function = ctx.create_custom_function(op_action(op), 2); + std::vector instructions; + instructions.push_back(instruction_ptr(new instruction_push(1))); + instructions.push_back(instruction_ptr(new instruction_eval())); + instructions.push_back(instruction_ptr(new instruction_push(1))); + instructions.push_back(instruction_ptr(new instruction_eval())); + instructions.push_back(instruction_ptr(new instruction_binop(op))); + instructions.push_back(instruction_ptr(new instruction_update(2))); + instructions.push_back(instruction_ptr(new instruction_pop(2))); + ctx.builder.SetInsertPoint(&new_function->getEntryBlock()); + for(auto& instruction : instructions) { + instruction->gen_llvm(ctx, new_function); + } + ctx.builder.CreateRetVoid(); +} + +void output_llvm(llvm_context& ctx, const std::string& filename) { + std::string targetTriple = llvm::sys::getDefaultTargetTriple(); + + llvm::InitializeNativeTarget(); + llvm::InitializeNativeTargetAsmParser(); + llvm::InitializeNativeTargetAsmPrinter(); + + std::string error; + const llvm::Target* target = + llvm::TargetRegistry::lookupTarget(targetTriple, error); + if (!target) { + std::cerr << error << std::endl; + } else { + std::string cpu = "generic"; + std::string features = ""; + llvm::TargetOptions options; + llvm::TargetMachine* targetMachine = + target->createTargetMachine(targetTriple, cpu, features, + options, llvm::Optional()); + + ctx.module.setDataLayout(targetMachine->createDataLayout()); + ctx.module.setTargetTriple(targetTriple); + + std::error_code ec; + llvm::raw_fd_ostream file(filename, ec, llvm::sys::fs::F_None); + if (ec) { + throw 0; + } else { + llvm::TargetMachine::CodeGenFileType type = llvm::TargetMachine::CGFT_ObjectFile; + llvm::legacy::PassManager pm; + if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) { + throw 0; + } else { + pm.run(ctx.module); + file.close(); + } + } + } +} + +void gen_llvm( + const std::map& defs_data, + const std::map& defs_defn) { + llvm_context ctx; + gen_llvm_internal_op(ctx, PLUS); + gen_llvm_internal_op(ctx, MINUS); + gen_llvm_internal_op(ctx, TIMES); + gen_llvm_internal_op(ctx, DIVIDE); + + for(auto& def_data : defs_data) { + def_data.second->generate_llvm(ctx); + } + for(auto& def_defn : defs_defn) { + def_defn.second->declare_llvm(ctx); + } + for(auto& def_defn : defs_defn) { + def_defn.second->generate_llvm(ctx); + } + + ctx.module.print(llvm::outs(), nullptr); + output_llvm(ctx, "program.o"); +} + +int main() { + yy::parser parser; + type_mgr mgr; + type_env_ptr env(new type_env); + + parser.parse(); + for(auto& def_defn : defs_defn) { + std::cout << def_defn.second->name; + for(auto& param : def_defn.second->params) std::cout << " " << param; + std::cout << ":" << std::endl; + + def_defn.second->body->print(1, std::cout); + } + try { + typecheck_program(defs_data, defs_defn, mgr, env); + compile_program(defs_defn); + gen_llvm(defs_data, defs_defn); + } catch(unification_error& err) { + std::cout << "failed to unify types: " << std::endl; + std::cout << " (1) \033[34m"; + err.left->print(mgr, std::cout); + std::cout << "\033[0m" << std::endl; + std::cout << " (2) \033[32m"; + err.right->print(mgr, std::cout); + std::cout << "\033[0m" << std::endl; + } catch(type_error& err) { + std::cout << "failed to type check program: " << err.description << std::endl; + } +} diff --git a/code/compiler/11/parser.y b/code/compiler/11/parser.y new file mode 100644 index 0000000..57f4be9 --- /dev/null +++ b/code/compiler/11/parser.y @@ -0,0 +1,144 @@ +%{ +#include +#include +#include +#include "ast.hpp" +#include "definition.hpp" +#include "parser.hpp" + +std::map defs_data; +std::map defs_defn; + +extern yy::parser::symbol_type yylex(); + +%} + +%token PLUS +%token TIMES +%token MINUS +%token DIVIDE +%token INT +%token DEFN +%token DATA +%token CASE +%token OF +%token OCURLY +%token CCURLY +%token OPAREN +%token CPAREN +%token COMMA +%token ARROW +%token EQUAL +%token LID +%token UID + +%language "c++" +%define api.value.type variant +%define api.token.constructor + +%type > lowercaseParams uppercaseParams +%type > branches +%type > constructors +%type aAdd aMul case app appBase +%type data +%type defn +%type branch +%type pattern +%type constructor + +%start program + +%% + +program + : definitions { } + ; + +definitions + : definitions definition { } + | definition { } + ; + +definition + : defn { auto name = $1->name; defs_defn[name] = std::move($1); } + | data { auto name = $1->name; defs_data[name] = std::move($1); } + ; + +defn + : DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY + { $$ = definition_defn_ptr( + new definition_defn(std::move($2), std::move($3), std::move($6))); } + ; + +lowercaseParams + : %empty { $$ = std::vector(); } + | lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); } + ; + +uppercaseParams + : %empty { $$ = std::vector(); } + | uppercaseParams UID { $$ = std::move($1); $$.push_back(std::move($2)); } + ; + +aAdd + : aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); } + | aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); } + | aMul { $$ = std::move($1); } + ; + +aMul + : aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3))); } + | aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3))); } + | app { $$ = std::move($1); } + ; + +app + : app appBase { $$ = ast_ptr(new ast_app(std::move($1), std::move($2))); } + | appBase { $$ = std::move($1); } + ; + +appBase + : INT { $$ = ast_ptr(new ast_int($1)); } + | LID { $$ = ast_ptr(new ast_lid(std::move($1))); } + | UID { $$ = ast_ptr(new ast_uid(std::move($1))); } + | OPAREN aAdd CPAREN { $$ = std::move($2); } + | case { $$ = std::move($1); } + ; + +case + : CASE aAdd OF OCURLY branches CCURLY + { $$ = ast_ptr(new ast_case(std::move($2), std::move($5))); } + ; + +branches + : branches branch { $$ = std::move($1); $$.push_back(std::move($2)); } + | branch { $$ = std::vector(); $$.push_back(std::move($1));} + ; + +branch + : pattern ARROW OCURLY aAdd CCURLY + { $$ = branch_ptr(new branch(std::move($1), std::move($4))); } + ; + +pattern + : LID { $$ = pattern_ptr(new pattern_var(std::move($1))); } + | UID lowercaseParams + { $$ = pattern_ptr(new pattern_constr(std::move($1), std::move($2))); } + ; + +data + : DATA UID EQUAL OCURLY constructors CCURLY + { $$ = definition_data_ptr(new definition_data(std::move($2), std::move($5))); } + ; + +constructors + : constructors COMMA constructor { $$ = std::move($1); $$.push_back(std::move($3)); } + | constructor + { $$ = std::vector(); $$.push_back(std::move($1)); } + ; + +constructor + : UID uppercaseParams + { $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); } + ; + diff --git a/code/compiler/11/runtime.c b/code/compiler/11/runtime.c new file mode 100644 index 0000000..7b8a7c3 --- /dev/null +++ b/code/compiler/11/runtime.c @@ -0,0 +1,269 @@ +#include +#include +#include +#include +#include "runtime.h" + +struct node_base* alloc_node() { + struct node_base* new_node = malloc(sizeof(struct node_app)); + new_node->gc_next = NULL; + new_node->gc_reachable = 0; + assert(new_node != NULL); + return new_node; +} + +struct node_app* alloc_app(struct node_base* l, struct node_base* r) { + struct node_app* node = (struct node_app*) alloc_node(); + node->base.tag = NODE_APP; + node->left = l; + node->right = r; + return node; +} + +struct node_num* alloc_num(int32_t n) { + struct node_num* node = (struct node_num*) alloc_node(); + node->base.tag = NODE_NUM; + node->value = n; + return node; +} + +struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a) { + struct node_global* node = (struct node_global*) alloc_node(); + node->base.tag = NODE_GLOBAL; + node->arity = a; + node->function = f; + return node; +} + +struct node_ind* alloc_ind(struct node_base* n) { + struct node_ind* node = (struct node_ind*) alloc_node(); + node->base.tag = NODE_IND; + node->next = n; + return node; +} + +void free_node_direct(struct node_base* n) { + if(n->tag == NODE_DATA) { + free(((struct node_data*) n)->array); + } +} + +void gc_visit_node(struct node_base* n) { + if(n->gc_reachable) return; + n->gc_reachable = 1; + + if(n->tag == NODE_APP) { + struct node_app* app = (struct node_app*) n; + gc_visit_node(app->left); + gc_visit_node(app->right); + } if(n->tag == NODE_IND) { + struct node_ind* ind = (struct node_ind*) n; + gc_visit_node(ind->next); + } if(n->tag == NODE_DATA) { + struct node_data* data = (struct node_data*) n; + struct node_base** to_visit = data->array; + while(*to_visit) { + gc_visit_node(*to_visit); + to_visit++; + } + } +} + +void stack_init(struct stack* s) { + s->size = 4; + s->count = 0; + s->data = malloc(sizeof(*s->data) * s->size); + assert(s->data != NULL); +} + +void stack_free(struct stack* s) { + free(s->data); +} + +void stack_push(struct stack* s, struct node_base* n) { + while(s->count >= s->size) { + s->data = realloc(s->data, sizeof(*s->data) * (s->size *= 2)); + assert(s->data != NULL); + } + s->data[s->count++] = n; +} + +struct node_base* stack_pop(struct stack* s) { + assert(s->count > 0); + return s->data[--s->count]; +} + +struct node_base* stack_peek(struct stack* s, size_t o) { + assert(s->count > o); + return s->data[s->count - o - 1]; +} + +void stack_popn(struct stack* s, size_t n) { + assert(s->count >= n); + s->count -= n; +} + +void gmachine_init(struct gmachine* g) { + stack_init(&g->stack); + g->gc_nodes = NULL; + g->gc_node_count = 0; + g->gc_node_threshold = 128; +} + +void gmachine_free(struct gmachine* g) { + stack_free(&g->stack); + struct node_base* to_free = g->gc_nodes; + struct node_base* next; + + while(to_free) { + next = to_free->gc_next; + free_node_direct(to_free); + free(to_free); + to_free = next; + } +} + +void gmachine_slide(struct gmachine* g, size_t n) { + assert(g->stack.count > n); + g->stack.data[g->stack.count - n - 1] = g->stack.data[g->stack.count - 1]; + g->stack.count -= n; +} + +void gmachine_update(struct gmachine* g, size_t o) { + assert(g->stack.count > o + 1); + struct node_ind* ind = + (struct node_ind*) g->stack.data[g->stack.count - o - 2]; + ind->base.tag = NODE_IND; + ind->next = g->stack.data[g->stack.count -= 1]; +} + +void gmachine_alloc(struct gmachine* g, size_t o) { + while(o--) { + stack_push(&g->stack, + gmachine_track(g, (struct node_base*) alloc_ind(NULL))); + } +} + +void gmachine_pack(struct gmachine* g, size_t n, int8_t t) { + assert(g->stack.count >= n); + + struct node_base** data = malloc(sizeof(*data) * (n + 1)); + assert(data != NULL); + memcpy(data, &g->stack.data[g->stack.count - n], n * sizeof(*data)); + data[n] = NULL; + + struct node_data* new_node = (struct node_data*) alloc_node(); + new_node->array = data; + new_node->base.tag = NODE_DATA; + new_node->tag = t; + + stack_popn(&g->stack, n); + stack_push(&g->stack, gmachine_track(g, (struct node_base*) new_node)); +} + +void gmachine_split(struct gmachine* g, size_t n) { + struct node_data* node = (struct node_data*) stack_pop(&g->stack); + for(size_t i = 0; i < n; i++) { + stack_push(&g->stack, node->array[i]); + } +} + +struct node_base* gmachine_track(struct gmachine* g, struct node_base* b) { + g->gc_node_count++; + b->gc_next = g->gc_nodes; + g->gc_nodes = b; + + if(g->gc_node_count >= g->gc_node_threshold) { + uint64_t nodes_before = g->gc_node_count; + gc_visit_node(b); + gmachine_gc(g); + g->gc_node_threshold = g->gc_node_count * 2; + } + + return b; +} + +void gmachine_gc(struct gmachine* g) { + for(size_t i = 0; i < g->stack.count; i++) { + gc_visit_node(g->stack.data[i]); + } + + struct node_base** head_ptr = &g->gc_nodes; + while(*head_ptr) { + if((*head_ptr)->gc_reachable) { + (*head_ptr)->gc_reachable = 0; + head_ptr = &(*head_ptr)->gc_next; + } else { + struct node_base* to_free = *head_ptr; + *head_ptr = to_free->gc_next; + free_node_direct(to_free); + free(to_free); + g->gc_node_count--; + } + } +} + +void unwind(struct gmachine* g) { + struct stack* s = &g->stack; + + while(1) { + struct node_base* peek = stack_peek(s, 0); + if(peek->tag == NODE_APP) { + struct node_app* n = (struct node_app*) peek; + stack_push(s, n->left); + } else if(peek->tag == NODE_GLOBAL) { + struct node_global* n = (struct node_global*) peek; + assert(s->count > n->arity); + + for(size_t i = 1; i <= n->arity; i++) { + s->data[s->count - i] + = ((struct node_app*) s->data[s->count - i - 1])->right; + } + + n->function(g); + } else if(peek->tag == NODE_IND) { + struct node_ind* n = (struct node_ind*) peek; + stack_pop(s); + stack_push(s, n->next); + } else { + break; + } + } +} + +extern void f_main(struct gmachine* s); + +void print_node(struct node_base* n) { + if(n->tag == NODE_APP) { + struct node_app* app = (struct node_app*) n; + print_node(app->left); + putchar(' '); + print_node(app->right); + } else if(n->tag == NODE_DATA) { + printf("(Packed)"); + } else if(n->tag == NODE_GLOBAL) { + struct node_global* global = (struct node_global*) n; + printf("(Global: %p)", global->function); + } else if(n->tag == NODE_IND) { + print_node(((struct node_ind*) n)->next); + } else if(n->tag == NODE_NUM) { + struct node_num* num = (struct node_num*) n; + printf("%d", num->value); + } +} + +int main(int argc, char** argv) { + struct gmachine gmachine; + struct node_global* first_node = alloc_global(f_main, 0); + struct node_base* result; + + gmachine_init(&gmachine); + gmachine_track(&gmachine, (struct node_base*) first_node); + stack_push(&gmachine.stack, (struct node_base*) first_node); + unwind(&gmachine); + result = stack_pop(&gmachine.stack); + printf("Result: "); + print_node(result); + putchar('\n'); + gmachine_free(&gmachine); +} diff --git a/code/compiler/11/runtime.h b/code/compiler/11/runtime.h new file mode 100644 index 0000000..030a27c --- /dev/null +++ b/code/compiler/11/runtime.h @@ -0,0 +1,84 @@ +#pragma once +#include + +struct gmachine; + +enum node_tag { + NODE_APP, + NODE_NUM, + NODE_GLOBAL, + NODE_IND, + NODE_DATA +}; + +struct node_base { + enum node_tag tag; + int8_t gc_reachable; + struct node_base* gc_next; +}; + +struct node_app { + struct node_base base; + struct node_base* left; + struct node_base* right; +}; + +struct node_num { + struct node_base base; + int32_t value; +}; + +struct node_global { + struct node_base base; + int32_t arity; + void (*function)(struct gmachine*); +}; + +struct node_ind { + struct node_base base; + struct node_base* next; +}; + +struct node_data { + struct node_base base; + int8_t tag; + struct node_base** array; +}; + +struct node_base* alloc_node(); +struct node_app* alloc_app(struct node_base* l, struct node_base* r); +struct node_num* alloc_num(int32_t n); +struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a); +struct node_ind* alloc_ind(struct node_base* n); +void free_node_direct(struct node_base*); +void gc_visit_node(struct node_base*); + +struct stack { + size_t size; + size_t count; + struct node_base** data; +}; + +void stack_init(struct stack* s); +void stack_free(struct stack* s); +void stack_push(struct stack* s, struct node_base* n); +struct node_base* stack_pop(struct stack* s); +struct node_base* stack_peek(struct stack* s, size_t o); +void stack_popn(struct stack* s, size_t n); + +struct gmachine { + struct stack stack; + struct node_base* gc_nodes; + int64_t gc_node_count; + int64_t gc_node_threshold; +}; + +void gmachine_init(struct gmachine* g); +void gmachine_free(struct gmachine* g); +void gmachine_slide(struct gmachine* g, size_t n); +void gmachine_update(struct gmachine* g, size_t o); +void gmachine_alloc(struct gmachine* g, size_t o); +void gmachine_pack(struct gmachine* g, size_t n, int8_t t); +void gmachine_split(struct gmachine* g, size_t n); +struct node_base* gmachine_track(struct gmachine* g, struct node_base* b); +void gmachine_gc(struct gmachine* g); diff --git a/code/compiler/11/scanner.l b/code/compiler/11/scanner.l new file mode 100644 index 0000000..c8a4429 --- /dev/null +++ b/code/compiler/11/scanner.l @@ -0,0 +1,35 @@ +%option noyywrap + +%{ +#include +#include "ast.hpp" +#include "definition.hpp" +#include "parser.hpp" + +#define YY_DECL yy::parser::symbol_type yylex() + +%} + +%% + +[ \n]+ {} +\+ { return yy::parser::make_PLUS(); } +\* { return yy::parser::make_TIMES(); } +- { return yy::parser::make_MINUS(); } +\/ { return yy::parser::make_DIVIDE(); } +[0-9]+ { return yy::parser::make_INT(atoi(yytext)); } +defn { return yy::parser::make_DEFN(); } +data { return yy::parser::make_DATA(); } +case { return yy::parser::make_CASE(); } +of { return yy::parser::make_OF(); } +\{ { return yy::parser::make_OCURLY(); } +\} { return yy::parser::make_CCURLY(); } +\( { return yy::parser::make_OPAREN(); } +\) { return yy::parser::make_CPAREN(); } +, { return yy::parser::make_COMMA(); } +-> { return yy::parser::make_ARROW(); } += { return yy::parser::make_EQUAL(); } +[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext)); } +[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext)); } + +%% diff --git a/code/compiler/11/test.cpp b/code/compiler/11/test.cpp new file mode 100644 index 0000000..7972fa3 --- /dev/null +++ b/code/compiler/11/test.cpp @@ -0,0 +1,23 @@ +#include "graph.hpp" + +int main() { + function_graph graph; + graph.add_edge("f", "g"); + graph.add_edge("g", "h"); + graph.add_edge("h", "f"); + + graph.add_edge("i", "j"); + graph.add_edge("j", "i"); + + graph.add_edge("j", "f"); + + graph.add_edge("x", "f"); + graph.add_edge("x", "i"); + + for(auto& group : graph.compute_order()) { + std::cout << "Group: " << std::endl; + for(auto& member : group->members) { + std::cout << member << std::endl; + } + } +} diff --git a/code/compiler/11/type.cpp b/code/compiler/11/type.cpp new file mode 100644 index 0000000..d355e87 --- /dev/null +++ b/code/compiler/11/type.cpp @@ -0,0 +1,148 @@ +#include "type.hpp" +#include +#include +#include +#include "error.hpp" + +void type_scheme::print(const type_mgr& mgr, std::ostream& to) const { + if(forall.size() != 0) { + to << "forall "; + for(auto& var : forall) { + to << var << " "; + } + to << ". "; + } + monotype->print(mgr, to); +} + +type_ptr substitute(const type_mgr& mgr, const std::map& subst, const type_ptr& t) { + type_var* var; + type_ptr resolved = mgr.resolve(t, var); + if(var) { + auto subst_it = subst.find(var->name); + if(subst_it == subst.end()) return resolved; + return subst_it->second; + } else if(type_arr* arr = dynamic_cast(t.get())) { + auto left_result = substitute(mgr, subst, arr->left); + auto right_result = substitute(mgr, subst, arr->right); + if(left_result == arr->left && right_result == arr->right) return t; + return type_ptr(new type_arr(left_result, right_result)); + } + return t; +} + +type_ptr type_scheme::instantiate(type_mgr& mgr) const { + if(forall.size() == 0) return monotype; + std::map subst; + for(auto& var : forall) { + subst[var] = mgr.new_type(); + } + return substitute(mgr, subst, monotype); +} + +void type_var::print(const type_mgr& mgr, std::ostream& to) const { + auto it = mgr.types.find(name); + if(it != mgr.types.end()) { + it->second->print(mgr, to); + } else { + to << name; + } +} + +void type_base::print(const type_mgr& mgr, std::ostream& to) const { + to << name; +} + +void type_arr::print(const type_mgr& mgr, std::ostream& to) const { + left->print(mgr, to); + to << " -> ("; + right->print(mgr, to); + to << ")"; +} + +std::string type_mgr::new_type_name() { + int temp = last_id++; + std::string str = ""; + + while(temp != -1) { + str += (char) ('a' + (temp % 26)); + temp = temp / 26 - 1; + } + + std::reverse(str.begin(), str.end()); + return str; +} + +type_ptr type_mgr::new_type() { + return type_ptr(new type_var(new_type_name())); +} + +type_ptr type_mgr::new_arrow_type() { + return type_ptr(new type_arr(new_type(), new_type())); +} + +type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const { + type_var* cast; + + var = nullptr; + while((cast = dynamic_cast(t.get()))) { + auto it = types.find(cast->name); + + if(it == types.end()) { + var = cast; + break; + } + t = it->second; + } + + return t; +} + +void type_mgr::unify(type_ptr l, type_ptr r) { + type_var* lvar; + type_var* rvar; + type_arr* larr; + type_arr* rarr; + type_base* lid; + type_base* rid; + + l = resolve(l, lvar); + r = resolve(r, rvar); + + if(lvar) { + bind(lvar->name, r); + return; + } else if(rvar) { + bind(rvar->name, l); + return; + } else if((larr = dynamic_cast(l.get())) && + (rarr = dynamic_cast(r.get()))) { + unify(larr->left, rarr->left); + unify(larr->right, rarr->right); + return; + } else if((lid = dynamic_cast(l.get())) && + (rid = dynamic_cast(r.get()))) { + if(lid->name == rid->name) return; + } + + throw unification_error(l, r); +} + +void type_mgr::bind(const std::string& s, type_ptr t) { + type_var* other = dynamic_cast(t.get()); + + if(other && other->name == s) return; + types[s] = t; +} + +void type_mgr::find_free(const type_ptr& t, std::set& into) const { + type_var* var; + type_ptr resolved = resolve(t, var); + + if(var) { + into.insert(var->name); + } else if(type_arr* arr = dynamic_cast(resolved.get())) { + find_free(arr->left, into); + find_free(arr->right, into); + } +} diff --git a/code/compiler/11/type.hpp b/code/compiler/11/type.hpp new file mode 100644 index 0000000..94c4f85 --- /dev/null +++ b/code/compiler/11/type.hpp @@ -0,0 +1,81 @@ +#pragma once +#include +#include +#include +#include +#include + +struct type_mgr; + +struct type { + virtual ~type() = default; + + virtual void print(const type_mgr& mgr, std::ostream& to) const = 0; +}; + +using type_ptr = std::shared_ptr; + +struct type_scheme { + std::vector forall; + type_ptr monotype; + + type_scheme(type_ptr type) : forall(), monotype(std::move(type)) {} + + void print(const type_mgr& mgr, std::ostream& to) const; + type_ptr instantiate(type_mgr& mgr) const; +}; + +using type_scheme_ptr = std::shared_ptr; + +struct type_var : public type { + std::string name; + + type_var(std::string n) + : name(std::move(n)) {} + + void print(const type_mgr& mgr, std::ostream& to) const; +}; + +struct type_base : public type { + std::string name; + + type_base(std::string n) + : name(std::move(n)) {} + + void print(const type_mgr& mgr, std::ostream& to) const; +}; + +struct type_data : public type_base { + struct constructor { + int tag; + }; + + std::map constructors; + + type_data(std::string n) + : type_base(std::move(n)) {} +}; + +struct type_arr : public type { + type_ptr left; + type_ptr right; + + type_arr(type_ptr l, type_ptr r) + : left(std::move(l)), right(std::move(r)) {} + + void print(const type_mgr& mgr, std::ostream& to) const; +}; + +struct type_mgr { + int last_id = 0; + std::map types; + + std::string new_type_name(); + type_ptr new_type(); + type_ptr new_arrow_type(); + + void unify(type_ptr l, type_ptr r); + type_ptr resolve(type_ptr t, type_var*& var) const; + void bind(const std::string& s, type_ptr t); + void find_free(const type_ptr& t, std::set& into) const; +}; diff --git a/code/compiler/11/type_env.cpp b/code/compiler/11/type_env.cpp new file mode 100644 index 0000000..3e654be --- /dev/null +++ b/code/compiler/11/type_env.cpp @@ -0,0 +1,45 @@ +#include "type_env.hpp" +#include "type.hpp" + +type_scheme_ptr type_env::lookup(const std::string& name) const { + auto it = names.find(name); + if(it != names.end()) return it->second; + if(parent) return parent->lookup(name); + return nullptr; +} + +type_ptr type_env::lookup_type(const std::string& name) const { + auto it = type_names.find(name); + if(it != type_names.end()) return it->second; + if(parent) return parent->lookup_type(name); + return nullptr; +} + +void type_env::bind(const std::string& name, type_ptr t) { + names[name] = type_scheme_ptr(new type_scheme(t)); +} + +void type_env::bind(const std::string& name, type_scheme_ptr t) { + names[name] = t; +} + +void type_env::bind_type(const std::string& type_name, type_ptr t) { + if(lookup_type(type_name) != nullptr) throw 0; + type_names[type_name] = t; +} + +void type_env::generalize(const std::string& name, type_mgr& mgr) { + auto names_it = names.find(name); + if(names_it == names.end()) throw 0; + if(names_it->second->forall.size() > 0) throw 0; + + std::set free_variables; + mgr.find_free(names_it->second->monotype, free_variables); + for(auto& free : free_variables) { + names_it->second->forall.push_back(free); + } +} + +type_env_ptr type_scope(type_env_ptr parent) { + return type_env_ptr(new type_env(std::move(parent))); +} diff --git a/code/compiler/11/type_env.hpp b/code/compiler/11/type_env.hpp new file mode 100644 index 0000000..ca91ef7 --- /dev/null +++ b/code/compiler/11/type_env.hpp @@ -0,0 +1,26 @@ +#pragma once +#include +#include +#include "type.hpp" + +struct type_env; +using type_env_ptr = std::shared_ptr; + +struct type_env { + type_env_ptr parent; + std::map names; + std::map type_names; + + type_env(type_env_ptr p) : parent(std::move(p)) {} + type_env() : type_env(nullptr) {} + + type_scheme_ptr lookup(const std::string& name) const; + type_ptr lookup_type(const std::string& name) const; + void bind(const std::string& name, type_ptr t); + void bind(const std::string& name, type_scheme_ptr t); + void bind_type(const std::string& type_name, type_ptr t); + void generalize(const std::string& name, type_mgr& mgr); +}; + + +type_env_ptr type_scope(type_env_ptr parent);