From 7812b1064b8f925f0b50951177f224d3f92ee595 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Sat, 26 Oct 2019 20:30:29 -0700 Subject: [PATCH] Make progress on compiler posts --- code/compiler/07/CMakeLists.txt | 28 +++ code/compiler/07/ast.cpp | 262 +++++++++++++++++++++++++++ code/compiler/07/ast.hpp | 196 ++++++++++++++++++++ code/compiler/07/binop.cpp | 21 +++ code/compiler/07/binop.hpp | 12 ++ code/compiler/07/definition.cpp | 81 +++++++++ code/compiler/07/env.cpp | 23 +++ code/compiler/07/env.hpp | 34 ++++ code/compiler/07/error.cpp | 5 + code/compiler/07/error.hpp | 21 +++ code/compiler/07/examples/bad1.txt | 2 + code/compiler/07/examples/bad2.txt | 1 + code/compiler/07/examples/bad3.txt | 8 + code/compiler/07/examples/works1.txt | 2 + code/compiler/07/examples/works2.txt | 3 + code/compiler/07/examples/works3.txt | 7 + code/compiler/07/instruction.cpp | 78 ++++++++ code/compiler/07/instruction.hpp | 111 ++++++++++++ code/compiler/07/main.cpp | 88 +++++++++ code/compiler/07/parser.y | 140 ++++++++++++++ code/compiler/07/runtime.c | 50 +++++ code/compiler/07/scanner.l | 34 ++++ code/compiler/07/type.cpp | 99 ++++++++++ code/compiler/07/type.hpp | 65 +++++++ code/compiler/07/type_env.cpp | 16 ++ code/compiler/07/type_env.hpp | 16 ++ content/blog/07_compiler_runtime.md | 32 +++- 27 files changed, 1431 insertions(+), 4 deletions(-) create mode 100644 code/compiler/07/CMakeLists.txt create mode 100644 code/compiler/07/ast.cpp create mode 100644 code/compiler/07/ast.hpp create mode 100644 code/compiler/07/binop.cpp create mode 100644 code/compiler/07/binop.hpp create mode 100644 code/compiler/07/definition.cpp create mode 100644 code/compiler/07/env.cpp create mode 100644 code/compiler/07/env.hpp create mode 100644 code/compiler/07/error.cpp create mode 100644 code/compiler/07/error.hpp create mode 100644 code/compiler/07/examples/bad1.txt create mode 100644 code/compiler/07/examples/bad2.txt create mode 100644 code/compiler/07/examples/bad3.txt create mode 100644 code/compiler/07/examples/works1.txt create mode 100644 code/compiler/07/examples/works2.txt create mode 100644 code/compiler/07/examples/works3.txt create mode 100644 code/compiler/07/instruction.cpp create mode 100644 code/compiler/07/instruction.hpp create mode 100644 code/compiler/07/main.cpp create mode 100644 code/compiler/07/parser.y create mode 100644 code/compiler/07/runtime.c create mode 100644 code/compiler/07/scanner.l create mode 100644 code/compiler/07/type.cpp create mode 100644 code/compiler/07/type.hpp create mode 100644 code/compiler/07/type_env.cpp create mode 100644 code/compiler/07/type_env.hpp diff --git a/code/compiler/07/CMakeLists.txt b/code/compiler/07/CMakeLists.txt new file mode 100644 index 0000000..5e5dab4 --- /dev/null +++ b/code/compiler/07/CMakeLists.txt @@ -0,0 +1,28 @@ +cmake_minimum_required(VERSION 3.1) +project(compiler) + +find_package(BISON) +find_package(FLEX) +bison_target(parser + ${CMAKE_CURRENT_SOURCE_DIR}/parser.y + ${CMAKE_CURRENT_BINARY_DIR}/parser.cpp + COMPILE_FLAGS "-d") +flex_target(scanner + ${CMAKE_CURRENT_SOURCE_DIR}/scanner.l + ${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp) +add_flex_bison_dependency(scanner parser) + +add_executable(compiler + ast.cpp ast.hpp definition.cpp + type_env.cpp type_env.hpp + env.cpp env.hpp + type.cpp type.hpp + error.cpp error.hpp + binop.cpp binop.hpp + instruction.cpp instruction.hpp + ${BISON_parser_OUTPUTS} + ${FLEX_scanner_OUTPUTS} + main.cpp +) +target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/code/compiler/07/ast.cpp b/code/compiler/07/ast.cpp new file mode 100644 index 0000000..00d92a3 --- /dev/null +++ b/code/compiler/07/ast.cpp @@ -0,0 +1,262 @@ +#include "ast.hpp" +#include +#include "error.hpp" + +static void print_indent(int n, std::ostream& to) { + while(n--) to << " "; +} + +type_ptr ast::typecheck_common(type_mgr& mgr, const type_env& env) { + node_type = typecheck(mgr, env); + return node_type; +} + +void ast::resolve_common(const type_mgr& mgr) { + type_var* var; + type_ptr resolved_type = mgr.resolve(node_type, var); + if(var) throw type_error("ambiguously typed program"); + + resolve(mgr); + node_type = std::move(resolved_type); +} + +void ast_int::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "INT: " << value << std::endl; +} + +type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) const { + return type_ptr(new type_base("Int")); +} + +void ast_int::resolve(const type_mgr& mgr) const { + +} + +void ast_int::compile(const env_ptr& env, std::vector& into) const { + into.push_back(instruction_ptr(new instruction_pushint(value))); +} + +void ast_lid::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "LID: " << id << std::endl; +} + +type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) const { + return env.lookup(id); +} + +void ast_lid::resolve(const type_mgr& mgr) const { + +} + +void ast_lid::compile(const env_ptr& env, std::vector& into) const { + into.push_back(instruction_ptr( + env->has_variable(id) ? + (instruction*) new instruction_push(env->get_offset(id)) : + (instruction*) new instruction_pushglobal(id))); +} + +void ast_uid::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "UID: " << id << std::endl; +} + +type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) const { + return env.lookup(id); +} + +void ast_uid::resolve(const type_mgr& mgr) const { + +} + +void ast_uid::compile(const env_ptr& env, std::vector& into) const { + into.push_back(instruction_ptr(new instruction_pushglobal(id))); +} + +void ast_binop::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "BINOP: " << op_name(op) << std::endl; + left->print(indent + 1, to); + right->print(indent + 1, to); +} + +type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const { + type_ptr ltype = left->typecheck_common(mgr, env); + type_ptr rtype = right->typecheck_common(mgr, env); + type_ptr ftype = env.lookup(op_name(op)); + if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op)); + + type_ptr return_type = mgr.new_type(); + type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type)); + type_ptr arrow_two = type_ptr(new type_arr(ltype, arrow_one)); + + mgr.unify(arrow_two, ftype); + return return_type; +} + +void ast_binop::resolve(const type_mgr& mgr) const { + left->resolve_common(mgr); + right->resolve_common(mgr); +} + +void ast_binop::compile(const env_ptr& env, std::vector& into) const { + right->compile(env, into); + left->compile(env_ptr(new env_offset(1, env)), into); + + into.push_back(instruction_ptr(new instruction_pushglobal(op_name(op)))); + into.push_back(instruction_ptr(new instruction_mkapp())); + into.push_back(instruction_ptr(new instruction_mkapp())); +} + +void ast_app::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "APP:" << std::endl; + left->print(indent + 1, to); + right->print(indent + 1, to); +} + +type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const { + type_ptr ltype = left->typecheck_common(mgr, env); + type_ptr rtype = right->typecheck_common(mgr, env); + + type_ptr return_type = mgr.new_type(); + type_ptr arrow = type_ptr(new type_arr(rtype, return_type)); + mgr.unify(arrow, ltype); + return return_type; +} + +void ast_app::resolve(const type_mgr& mgr) const { + left->resolve_common(mgr); + right->resolve_common(mgr); +} + +void ast_app::compile(const env_ptr& env, std::vector& into) const { + right->compile(env, into); + left->compile(env_ptr(new env_offset(1, env)), into); + into.push_back(instruction_ptr(new instruction_mkapp())); +} + +void ast_case::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "CASE: " << std::endl; + for(auto& branch : branches) { + print_indent(indent + 1, to); + branch->pat->print(to); + to << std::endl; + branch->expr->print(indent + 2, to); + } +} + +type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const { + type_var* var; + type_ptr case_type = mgr.resolve(of->typecheck_common(mgr, env), var); + type_ptr branch_type = mgr.new_type(); + + for(auto& branch : branches) { + type_env new_env = env.scope(); + branch->pat->match(case_type, mgr, new_env); + type_ptr curr_branch_type = branch->expr->typecheck_common(mgr, new_env); + mgr.unify(branch_type, curr_branch_type); + } + + case_type = mgr.resolve(case_type, var); + if(!dynamic_cast(case_type.get())) { + throw type_error("attempting case analysis of non-data type"); + } + + return branch_type; +} + +void ast_case::resolve(const type_mgr& mgr) const { + of->resolve_common(mgr); + for(auto& branch : branches) { + branch->expr->resolve_common(mgr); + } +} + +void ast_case::compile(const env_ptr& env, std::vector& into) const { + type_data* type = dynamic_cast(of->node_type.get()); + + of->compile(env, into); + into.push_back(instruction_ptr(new instruction_eval())); + + instruction_jump* jump_instruction = new instruction_jump(); + into.push_back(instruction_ptr(jump_instruction)); + for(auto& branch : branches) { + std::vector branch_instructions; + pattern_var* vpat; + pattern_constr* cpat; + + if((vpat = dynamic_cast(branch->pat.get()))) { + branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions); + + for(auto& constr_pair : type->constructors) { + if(jump_instruction->tag_mappings.find(constr_pair.second.tag) != + jump_instruction->tag_mappings.end()) + break; + + jump_instruction->tag_mappings[constr_pair.second.tag] = + jump_instruction->branches.size(); + } + jump_instruction->branches.push_back(std::move(branch_instructions)); + } else if((cpat = dynamic_cast(branch->pat.get()))) { + env_ptr new_env = env; + for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) { + new_env = env_ptr(new env_var(*it, new_env)); + } + + branch_instructions.push_back(instruction_ptr(new instruction_split())); + branch->expr->compile(new_env, branch_instructions); + branch_instructions.push_back(instruction_ptr(new instruction_slide( + cpat->params.size()))); + + int new_tag = type->constructors[cpat->constr].tag; + if(jump_instruction->tag_mappings.find(new_tag) != + jump_instruction->tag_mappings.end()) + throw type_error("technically not a type error: duplicate pattern"); + + jump_instruction->tag_mappings[new_tag] = + jump_instruction->branches.size(); + jump_instruction->branches.push_back(std::move(branch_instructions)); + } + } + + for(auto& constr_pair : type->constructors) { + if(jump_instruction->tag_mappings.find(constr_pair.second.tag) == + jump_instruction->tag_mappings.end()) + throw type_error("non-total pattern"); + } +} + +void pattern_var::print(std::ostream& to) const { + to << var; +} + +void pattern_var::match(type_ptr t, type_mgr& mgr, type_env& env) const { + env.bind(var, t); +} + +void pattern_constr::print(std::ostream& to) const { + to << constr; + for(auto& param : params) { + to << " " << param; + } +} + +void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const { + type_ptr constructor_type = env.lookup(constr); + if(!constructor_type) { + throw type_error(std::string("pattern using unknown constructor ") + constr); + } + + for(int i = 0; i < params.size(); i++) { + type_arr* arr = dynamic_cast(constructor_type.get()); + if(!arr) throw type_error("too many parameters in constructor pattern"); + + env.bind(params[i], arr->left); + constructor_type = arr->right; + } + + mgr.unify(t, constructor_type); +} diff --git a/code/compiler/07/ast.hpp b/code/compiler/07/ast.hpp new file mode 100644 index 0000000..c536d17 --- /dev/null +++ b/code/compiler/07/ast.hpp @@ -0,0 +1,196 @@ +#pragma once +#include +#include +#include "type.hpp" +#include "type_env.hpp" +#include "binop.hpp" +#include "instruction.hpp" +#include "env.hpp" + +struct ast { + type_ptr node_type; + + virtual ~ast() = default; + + virtual void print(int indent, std::ostream& to) const = 0; + virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const = 0; + virtual void resolve(const type_mgr& mgr) const = 0; + virtual void compile(const env_ptr& env, + std::vector& into) const = 0; + + type_ptr typecheck_common(type_mgr& mgr, const type_env& env); + void resolve_common(const type_mgr& mgr); +}; + +using ast_ptr = std::unique_ptr; + +struct pattern { + virtual ~pattern() = default; + + virtual void print(std::ostream& to) const = 0; + virtual void match(type_ptr t, type_mgr& mgr, type_env& env) const = 0; +}; + +using pattern_ptr = std::unique_ptr; + +struct branch { + pattern_ptr pat; + ast_ptr expr; + + branch(pattern_ptr p, ast_ptr a) + : pat(std::move(p)), expr(std::move(a)) {} +}; + +using branch_ptr = std::unique_ptr; + +struct constructor { + std::string name; + std::vector types; + + constructor(std::string n, std::vector ts) + : name(std::move(n)), types(std::move(ts)) {} +}; + +using constructor_ptr = std::unique_ptr; + +struct definition { + virtual ~definition() = default; + + virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0; + virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0; + virtual void resolve(const type_mgr& mgr) = 0; + virtual void compile() = 0; +}; + +using definition_ptr = std::unique_ptr; + +struct ast_int : public ast { + int value; + + explicit ast_int(int v) + : value(v) {} + + void print(int indent, std::ostream& to) const; + type_ptr typecheck(type_mgr& mgr, const type_env& env) const; + void resolve(const type_mgr& mgr) const; + void compile(const env_ptr& env, std::vector& into) const; +}; + +struct ast_lid : public ast { + std::string id; + + explicit ast_lid(std::string i) + : id(std::move(i)) {} + + void print(int indent, std::ostream& to) const; + type_ptr typecheck(type_mgr& mgr, const type_env& env) const; + void resolve(const type_mgr& mgr) const; + void compile(const env_ptr& env, std::vector& into) const; +}; + +struct ast_uid : public ast { + std::string id; + + explicit ast_uid(std::string i) + : id(std::move(i)) {} + + void print(int indent, std::ostream& to) const; + type_ptr typecheck(type_mgr& mgr, const type_env& env) const; + void resolve(const type_mgr& mgr) const; + void compile(const env_ptr& env, std::vector& into) const; +}; + +struct ast_binop : public ast { + binop op; + ast_ptr left; + ast_ptr right; + + ast_binop(binop o, ast_ptr l, ast_ptr r) + : op(o), left(std::move(l)), right(std::move(r)) {} + + void print(int indent, std::ostream& to) const; + type_ptr typecheck(type_mgr& mgr, const type_env& env) const; + void resolve(const type_mgr& mgr) const; + void compile(const env_ptr& env, std::vector& into) const; +}; + +struct ast_app : public ast { + ast_ptr left; + ast_ptr right; + + ast_app(ast_ptr l, ast_ptr r) + : left(std::move(l)), right(std::move(r)) {} + + void print(int indent, std::ostream& to) const; + type_ptr typecheck(type_mgr& mgr, const type_env& env) const; + void resolve(const type_mgr& mgr) const; + void compile(const env_ptr& env, std::vector& into) const; +}; + +struct ast_case : public ast { + ast_ptr of; + std::vector branches; + + ast_case(ast_ptr o, std::vector b) + : of(std::move(o)), branches(std::move(b)) {} + + void print(int indent, std::ostream& to) const; + type_ptr typecheck(type_mgr& mgr, const type_env& env) const; + void resolve(const type_mgr& mgr) const; + void compile(const env_ptr& env, std::vector& into) const; +}; + +struct pattern_var : public pattern { + std::string var; + + pattern_var(std::string v) + : var(std::move(v)) {} + + void print(std::ostream &to) const; + void match(type_ptr t, type_mgr& mgr, type_env& env) const; +}; + +struct pattern_constr : public pattern { + std::string constr; + std::vector params; + + pattern_constr(std::string c, std::vector p) + : constr(std::move(c)), params(std::move(p)) {} + + void print(std::ostream &to) const; + void match(type_ptr t, type_mgr&, type_env& env) const; +}; + +struct definition_defn : public definition { + std::string name; + std::vector params; + ast_ptr body; + + type_ptr return_type; + std::vector param_types; + + std::vector instructions; + + definition_defn(std::string n, std::vector p, ast_ptr b) + : name(std::move(n)), params(std::move(p)), body(std::move(b)) { + + } + + void typecheck_first(type_mgr& mgr, type_env& env); + void typecheck_second(type_mgr& mgr, const type_env& env) const; + void resolve(const type_mgr& mgr); + void compile(); +}; + +struct definition_data : public definition { + std::string name; + std::vector constructors; + + definition_data(std::string n, std::vector cs) + : name(std::move(n)), constructors(std::move(cs)) {} + + void typecheck_first(type_mgr& mgr, type_env& env); + void typecheck_second(type_mgr& mgr, const type_env& env) const; + void resolve(const type_mgr& mgr); + void compile(); +}; diff --git a/code/compiler/07/binop.cpp b/code/compiler/07/binop.cpp new file mode 100644 index 0000000..3a5f0ca --- /dev/null +++ b/code/compiler/07/binop.cpp @@ -0,0 +1,21 @@ +#include "binop.hpp" + +std::string op_name(binop op) { + switch(op) { + case PLUS: return "+"; + case MINUS: return "-"; + case TIMES: return "*"; + case DIVIDE: return "/"; + } + return "??"; +} + +std::string op_action(binop op) { + switch(op) { + case PLUS: return "plus"; + case MINUS: return "minus"; + case TIMES: return "times"; + case DIVIDE: return "divide"; + } + return "??"; +} diff --git a/code/compiler/07/binop.hpp b/code/compiler/07/binop.hpp new file mode 100644 index 0000000..8d07858 --- /dev/null +++ b/code/compiler/07/binop.hpp @@ -0,0 +1,12 @@ +#pragma once +#include + +enum binop { + PLUS, + MINUS, + TIMES, + DIVIDE +}; + +std::string op_name(binop op); +std::string op_action(binop op); diff --git a/code/compiler/07/definition.cpp b/code/compiler/07/definition.cpp new file mode 100644 index 0000000..34c6f66 --- /dev/null +++ b/code/compiler/07/definition.cpp @@ -0,0 +1,81 @@ +#include "ast.hpp" +#include "error.hpp" + +void definition_defn::typecheck_first(type_mgr& mgr, type_env& env) { + return_type = mgr.new_type(); + type_ptr full_type = return_type; + + for(auto it = params.rbegin(); it != params.rend(); it++) { + type_ptr param_type = mgr.new_type(); + full_type = type_ptr(new type_arr(param_type, full_type)); + param_types.push_back(param_type); + } + + env.bind(name, full_type); +} + +void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const { + type_env new_env = env.scope(); + auto param_it = params.begin(); + auto type_it = param_types.rbegin(); + + while(param_it != params.end() && type_it != param_types.rend()) { + new_env.bind(*param_it, *type_it); + param_it++; + type_it++; + } + + type_ptr body_type = body->typecheck_common(mgr, new_env); + mgr.unify(return_type, body_type); +} + +void definition_defn::resolve(const type_mgr& mgr) { + type_var* var; + body->resolve_common(mgr); + + return_type = mgr.resolve(return_type, var); + if(var) throw type_error("ambiguously typed program"); + for(auto& param_type : param_types) { + param_type = mgr.resolve(param_type, var); + if(var) throw type_error("ambiguously typed program"); + } +} + +void definition_defn::compile() { + env_ptr new_env = env_ptr(new env_offset(0, nullptr)); + for(auto it = params.rbegin(); it != params.rend(); it++) { + new_env = env_ptr(new env_var(*it, new_env)); + } + body->compile(new_env, instructions); + instructions.push_back(instruction_ptr(new instruction_update(params.size()))); +} + +void definition_data::typecheck_first(type_mgr& mgr, type_env& env) { + type_data* this_type = new type_data(name); + type_ptr return_type = type_ptr(this_type); + int next_tag = 0; + + for(auto& constructor : constructors) { + this_type->constructors[constructor->name] = { next_tag++ }; + + type_ptr full_type = return_type; + for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) { + type_ptr type = type_ptr(new type_base(*it)); + full_type = type_ptr(new type_arr(type, full_type)); + } + + env.bind(constructor->name, full_type); + } +} + +void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const { + // Nothing +} + +void definition_data::resolve(const type_mgr& mgr) { + // Nothing +} + +void definition_data::compile() { + +} diff --git a/code/compiler/07/env.cpp b/code/compiler/07/env.cpp new file mode 100644 index 0000000..818cf5d --- /dev/null +++ b/code/compiler/07/env.cpp @@ -0,0 +1,23 @@ +#include "env.hpp" + +int env_var::get_offset(const std::string& name) const { + if(name == this->name) return 0; + if(parent) return parent->get_offset(name) + 1; + throw 0; +} + +bool env_var::has_variable(const std::string& name) const { + if(name == this->name) return true; + if(parent) return parent->has_variable(name); + return false; +} + +int env_offset::get_offset(const std::string& name) const { + if(parent) return parent->get_offset(name) + offset; + throw 0; +} + +bool env_offset::has_variable(const std::string& name) const { + if(parent) return parent->has_variable(name); + return false; +} diff --git a/code/compiler/07/env.hpp b/code/compiler/07/env.hpp new file mode 100644 index 0000000..a8fbbec --- /dev/null +++ b/code/compiler/07/env.hpp @@ -0,0 +1,34 @@ +#pragma once +#include +#include + +struct env { + virtual ~env() = default; + + virtual int get_offset(const std::string& name) const = 0; + virtual bool has_variable(const std::string& name) const = 0; +}; + +using env_ptr = std::shared_ptr; + +struct env_var : public env { + std::string name; + env_ptr parent; + + env_var(std::string& n, env_ptr p) + : name(std::move(n)), parent(std::move(p)) {} + + int get_offset(const std::string& name) const; + bool has_variable(const std::string& name) const; +}; + +struct env_offset : public env { + int offset; + env_ptr parent; + + env_offset(int o, env_ptr p) + : offset(o), parent(std::move(p)) {} + + int get_offset(const std::string& name) const; + bool has_variable(const std::string& name) const; +}; diff --git a/code/compiler/07/error.cpp b/code/compiler/07/error.cpp new file mode 100644 index 0000000..f5125e3 --- /dev/null +++ b/code/compiler/07/error.cpp @@ -0,0 +1,5 @@ +#include "error.hpp" + +const char* type_error::what() const noexcept { + return "an error occured while checking the types of the program"; +} diff --git a/code/compiler/07/error.hpp b/code/compiler/07/error.hpp new file mode 100644 index 0000000..5bfbc7e --- /dev/null +++ b/code/compiler/07/error.hpp @@ -0,0 +1,21 @@ +#pragma once +#include +#include "type.hpp" + +struct type_error : std::exception { + std::string description; + + type_error(std::string d) + : description(std::move(d)) {} + + const char* what() const noexcept override; +}; + +struct unification_error : public type_error { + type_ptr left; + type_ptr right; + + unification_error(type_ptr l, type_ptr r) + : left(std::move(l)), right(std::move(r)), + type_error("failed to unify types") {} +}; diff --git a/code/compiler/07/examples/bad1.txt b/code/compiler/07/examples/bad1.txt new file mode 100644 index 0000000..86d4bc4 --- /dev/null +++ b/code/compiler/07/examples/bad1.txt @@ -0,0 +1,2 @@ +data Bool = { True, False } +defn main = { 3 + True } diff --git a/code/compiler/07/examples/bad2.txt b/code/compiler/07/examples/bad2.txt new file mode 100644 index 0000000..def8785 --- /dev/null +++ b/code/compiler/07/examples/bad2.txt @@ -0,0 +1 @@ +defn main = { 1 2 3 4 5 } diff --git a/code/compiler/07/examples/bad3.txt b/code/compiler/07/examples/bad3.txt new file mode 100644 index 0000000..6f82b3d --- /dev/null +++ b/code/compiler/07/examples/bad3.txt @@ -0,0 +1,8 @@ +data List = { Nil, Cons Int List } + +defn head l = { + case l of { + Nil -> { 0 } + Cons x y z -> { x } + } +} diff --git a/code/compiler/07/examples/works1.txt b/code/compiler/07/examples/works1.txt new file mode 100644 index 0000000..bedb5d8 --- /dev/null +++ b/code/compiler/07/examples/works1.txt @@ -0,0 +1,2 @@ +defn main = { plus 320 6 } +defn plus x y = { x + y } diff --git a/code/compiler/07/examples/works2.txt b/code/compiler/07/examples/works2.txt new file mode 100644 index 0000000..8332fde --- /dev/null +++ b/code/compiler/07/examples/works2.txt @@ -0,0 +1,3 @@ +defn add x y = { x + y } +defn double x = { add x x } +defn main = { double 163 } diff --git a/code/compiler/07/examples/works3.txt b/code/compiler/07/examples/works3.txt new file mode 100644 index 0000000..cfffd20 --- /dev/null +++ b/code/compiler/07/examples/works3.txt @@ -0,0 +1,7 @@ +data List = { Nil, Cons Int List } +defn length l = { + case l of { + Nil -> { 0 } + Cons x xs -> { 1 + length xs } + } +} diff --git a/code/compiler/07/instruction.cpp b/code/compiler/07/instruction.cpp new file mode 100644 index 0000000..0272e20 --- /dev/null +++ b/code/compiler/07/instruction.cpp @@ -0,0 +1,78 @@ +#include "instruction.hpp" + +static void print_indent(int n, std::ostream& to) { + while(n--) to << " "; +} + +void instruction_pushint::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "PushInt(" << value << ")" << std::endl; +} + +void instruction_pushglobal::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "PushGlobal(" << name << ")" << std::endl; +} + +void instruction_push::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Push(" << offset << ")" << std::endl; +} + +void instruction_mkapp::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "MkApp()" << std::endl; +} + +void instruction_update::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Update(" << offset << ")" << std::endl; +} + +void instruction_pack::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Pack(" << tag << ", " << size << ")" << std::endl; +} + +void instruction_split::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Split()" << std::endl; +} + +void instruction_jump::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Jump(" << std::endl; + for(auto& instruction_set : branches) { + for(auto& instruction : instruction_set) { + instruction->print(indent + 2, to); + } + to << std::endl; + } + print_indent(indent, to); + to << ")" << std::endl; +} + +void instruction_slide::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Slide(" << offset << ")" << std::endl; +} + +void instruction_binop::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "BinOp(" << op_action(op) << ")" << std::endl; +} + +void instruction_eval::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Eval()" << std::endl; +} + +void instruction_alloc::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Alloc(" << amount << ")" << std::endl; +} + +void instruction_unwind::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "Unwind()" << std::endl; +} diff --git a/code/compiler/07/instruction.hpp b/code/compiler/07/instruction.hpp new file mode 100644 index 0000000..879cd05 --- /dev/null +++ b/code/compiler/07/instruction.hpp @@ -0,0 +1,111 @@ +#pragma once +#include +#include +#include +#include +#include +#include "binop.hpp" + +struct instruction { + virtual ~instruction() = default; + + virtual void print(int indent, std::ostream& to) const = 0; +}; + +using instruction_ptr = std::unique_ptr; + +struct instruction_pushint : public instruction { + int value; + + instruction_pushint(int v) + : value(v) {} + + void print(int indent, std::ostream& to) const; +}; + +struct instruction_pushglobal : public instruction { + std::string name; + + instruction_pushglobal(std::string n) + : name(std::move(n)) {} + + void print(int indent, std::ostream& to) const; +}; + +struct instruction_push : public instruction { + int offset; + + instruction_push(int o) + : offset(o) {} + + void print(int indent, std::ostream& to) const; +}; + +struct instruction_mkapp : public instruction { + void print(int indent, std::ostream& to) const; +}; + +struct instruction_update : public instruction { + int offset; + + instruction_update(int o) + : offset(o) {} + + void print(int indent, std::ostream& to) const; +}; + +struct instruction_pack : public instruction { + int tag; + int size; + + instruction_pack(int t, int s) + : tag(t), size(s) {} + + void print(int indent, std::ostream& to) const; +}; + +struct instruction_split : public instruction { + void print(int indent, std::ostream& to) const; +}; + +struct instruction_jump : public instruction { + std::vector> branches; + std::map tag_mappings; + + void print(int indent, std::ostream& to) const; +}; + +struct instruction_slide : public instruction { + int offset; + + instruction_slide(int o) + : offset(o) {} + + void print(int indent, std::ostream& to) const; +}; + +struct instruction_binop : public instruction { + binop op; + + instruction_binop(binop o) + : op(o) {} + + void print(int indent, std::ostream& to) const; +}; + +struct instruction_eval : public instruction { + void print(int indent, std::ostream& to) const; +}; + +struct instruction_alloc : public instruction { + int amount; + + instruction_alloc(int a) + : amount(a) {} + + void print(int indent, std::ostream& to) const; +}; + +struct instruction_unwind : public instruction { + void print(int indent, std::ostream& to) const; +}; diff --git a/code/compiler/07/main.cpp b/code/compiler/07/main.cpp new file mode 100644 index 0000000..ffbd4e5 --- /dev/null +++ b/code/compiler/07/main.cpp @@ -0,0 +1,88 @@ +#include "ast.hpp" +#include +#include "parser.hpp" +#include "error.hpp" +#include "type.hpp" + +void yy::parser::error(const std::string& msg) { + std::cout << "An error occured: " << msg << std::endl; +} + +extern std::vector program; + +void typecheck_program( + const std::vector& prog, + type_mgr& mgr, type_env& env) { + type_ptr int_type = type_ptr(new type_base("Int")); + type_ptr binop_type = type_ptr(new type_arr( + int_type, + type_ptr(new type_arr(int_type, int_type)))); + + env.bind("+", binop_type); + env.bind("-", binop_type); + env.bind("*", binop_type); + env.bind("/", binop_type); + + for(auto& def : prog) { + def->typecheck_first(mgr, env); + } + + for(auto& def : prog) { + def->typecheck_second(mgr, env); + } + + for(auto& pair : env.names) { + std::cout << pair.first << ": "; + pair.second->print(mgr, std::cout); + std::cout << std::endl; + } + + for(auto& def : prog) { + def->resolve(mgr); + } +} + +void compile_program(const std::vector& prog) { + for(auto& def : prog) { + def->compile(); + + definition_defn* defn = dynamic_cast(def.get()); + if(!defn) continue; + for(auto& instruction : defn->instructions) { + instruction->print(0, std::cout); + } + std::cout << std::endl; + } +} + +int main() { + yy::parser parser; + type_mgr mgr; + type_env env; + + parser.parse(); + for(auto& definition : program) { + definition_defn* def = dynamic_cast(definition.get()); + if(!def) continue; + + std::cout << def->name; + for(auto& param : def->params) std::cout << " " << param; + std::cout << ":" << std::endl; + + def->body->print(1, std::cout); + } + try { + typecheck_program(program, mgr, env); + compile_program(program); + } catch(unification_error& err) { + std::cout << "failed to unify types: " << std::endl; + std::cout << " (1) \033[34m"; + err.left->print(mgr, std::cout); + std::cout << "\033[0m" << std::endl; + std::cout << " (2) \033[32m"; + err.right->print(mgr, std::cout); + std::cout << "\033[0m" << std::endl; + } catch(type_error& err) { + std::cout << "failed to type check program: " << err.description << std::endl; + } +} diff --git a/code/compiler/07/parser.y b/code/compiler/07/parser.y new file mode 100644 index 0000000..3874aca --- /dev/null +++ b/code/compiler/07/parser.y @@ -0,0 +1,140 @@ +%{ +#include +#include +#include "ast.hpp" +#include "parser.hpp" + +std::vector program; +extern yy::parser::symbol_type yylex(); + +%} + +%token PLUS +%token TIMES +%token MINUS +%token DIVIDE +%token INT +%token DEFN +%token DATA +%token CASE +%token OF +%token OCURLY +%token CCURLY +%token OPAREN +%token CPAREN +%token COMMA +%token ARROW +%token EQUAL +%token LID +%token UID + +%language "c++" +%define api.value.type variant +%define api.token.constructor + +%type > lowercaseParams uppercaseParams +%type > program definitions +%type > branches +%type > constructors +%type aAdd aMul case app appBase +%type definition defn data +%type branch +%type pattern +%type constructor + +%start program + +%% + +program + : definitions { program = std::move($1); } + ; + +definitions + : definitions definition { $$ = std::move($1); $$.push_back(std::move($2)); } + | definition { $$ = std::vector(); $$.push_back(std::move($1)); } + ; + +definition + : defn { $$ = std::move($1); } + | data { $$ = std::move($1); } + ; + +defn + : DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY + { $$ = definition_ptr( + new definition_defn(std::move($2), std::move($3), std::move($6))); } + ; + +lowercaseParams + : %empty { $$ = std::vector(); } + | lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); } + ; + +uppercaseParams + : %empty { $$ = std::vector(); } + | uppercaseParams UID { $$ = std::move($1); $$.push_back(std::move($2)); } + ; + +aAdd + : aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); } + | aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); } + | aMul { $$ = std::move($1); } + ; + +aMul + : aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3))); } + | aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3))); } + | app { $$ = std::move($1); } + ; + +app + : app appBase { $$ = ast_ptr(new ast_app(std::move($1), std::move($2))); } + | appBase { $$ = std::move($1); } + ; + +appBase + : INT { $$ = ast_ptr(new ast_int($1)); } + | LID { $$ = ast_ptr(new ast_lid(std::move($1))); } + | UID { $$ = ast_ptr(new ast_uid(std::move($1))); } + | OPAREN aAdd CPAREN { $$ = std::move($2); } + | case { $$ = std::move($1); } + ; + +case + : CASE aAdd OF OCURLY branches CCURLY + { $$ = ast_ptr(new ast_case(std::move($2), std::move($5))); } + ; + +branches + : branches branch { $$ = std::move($1); $$.push_back(std::move($2)); } + | branch { $$ = std::vector(); $$.push_back(std::move($1));} + ; + +branch + : pattern ARROW OCURLY aAdd CCURLY + { $$ = branch_ptr(new branch(std::move($1), std::move($4))); } + ; + +pattern + : LID { $$ = pattern_ptr(new pattern_var(std::move($1))); } + | UID lowercaseParams + { $$ = pattern_ptr(new pattern_constr(std::move($1), std::move($2))); } + ; + +data + : DATA UID EQUAL OCURLY constructors CCURLY + { $$ = definition_ptr(new definition_data(std::move($2), std::move($5))); } + ; + +constructors + : constructors COMMA constructor { $$ = std::move($1); $$.push_back(std::move($3)); } + | constructor + { $$ = std::vector(); $$.push_back(std::move($1)); } + ; + +constructor + : UID uppercaseParams + { $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); } + ; + diff --git a/code/compiler/07/runtime.c b/code/compiler/07/runtime.c new file mode 100644 index 0000000..e125338 --- /dev/null +++ b/code/compiler/07/runtime.c @@ -0,0 +1,50 @@ +#include +#include +#include + +struct stack; + +enum node_tag { + NODE_APP, + NODE_NUM, + NODE_GLOBAL, + NODE_IND, + NODE_PACK +}; + +struct node_base { + enum node_tag tag; +}; + +struct node_app { + struct node_base base; + struct node_base* left; + struct node_base* right; +}; + +struct node_num { + struct node_base base; + int32_t value; +}; + +struct node_global { + struct node_base base; + void (*function)(struct stack*); +}; + +struct node_ind { + struct node_base base; + struct node_base* next; +}; + +struct node_data { + struct node_base base; + int8_t tag; + struct node_base** array; +}; + +struct node_base* alloc_node() { + node_base* new_node = malloc(sizeof(struct node_app)); + assert(new_node != NULL); + return new_node; +} diff --git a/code/compiler/07/scanner.l b/code/compiler/07/scanner.l new file mode 100644 index 0000000..683deeb --- /dev/null +++ b/code/compiler/07/scanner.l @@ -0,0 +1,34 @@ +%option noyywrap + +%{ +#include +#include "ast.hpp" +#include "parser.hpp" + +#define YY_DECL yy::parser::symbol_type yylex() + +%} + +%% + +[ \n]+ {} +\+ { return yy::parser::make_PLUS(); } +\* { return yy::parser::make_TIMES(); } +- { return yy::parser::make_MINUS(); } +\/ { return yy::parser::make_DIVIDE(); } +[0-9]+ { return yy::parser::make_INT(atoi(yytext)); } +defn { return yy::parser::make_DEFN(); } +data { return yy::parser::make_DATA(); } +case { return yy::parser::make_CASE(); } +of { return yy::parser::make_OF(); } +\{ { return yy::parser::make_OCURLY(); } +\} { return yy::parser::make_CCURLY(); } +\( { return yy::parser::make_OPAREN(); } +\) { return yy::parser::make_CPAREN(); } +, { return yy::parser::make_COMMA(); } +-> { return yy::parser::make_ARROW(); } += { return yy::parser::make_EQUAL(); } +[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext)); } +[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext)); } + +%% diff --git a/code/compiler/07/type.cpp b/code/compiler/07/type.cpp new file mode 100644 index 0000000..f5868d5 --- /dev/null +++ b/code/compiler/07/type.cpp @@ -0,0 +1,99 @@ +#include "type.hpp" +#include +#include +#include "error.hpp" + +void type_var::print(const type_mgr& mgr, std::ostream& to) const { + auto it = mgr.types.find(name); + if(it != mgr.types.end()) { + it->second->print(mgr, to); + } else { + to << name; + } +} + +void type_base::print(const type_mgr& mgr, std::ostream& to) const { + to << name; +} + +void type_arr::print(const type_mgr& mgr, std::ostream& to) const { + left->print(mgr, to); + to << " -> ("; + right->print(mgr, to); + to << ")"; +} + +std::string type_mgr::new_type_name() { + int temp = last_id++; + std::string str = ""; + + while(temp != -1) { + str += (char) ('a' + (temp % 26)); + temp = temp / 26 - 1; + } + + std::reverse(str.begin(), str.end()); + return str; +} + +type_ptr type_mgr::new_type() { + return type_ptr(new type_var(new_type_name())); +} + +type_ptr type_mgr::new_arrow_type() { + return type_ptr(new type_arr(new_type(), new_type())); +} + +type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const { + type_var* cast; + + var = nullptr; + while((cast = dynamic_cast(t.get()))) { + auto it = types.find(cast->name); + + if(it == types.end()) { + var = cast; + break; + } + t = it->second; + } + + return t; +} + +void type_mgr::unify(type_ptr l, type_ptr r) { + type_var* lvar; + type_var* rvar; + type_arr* larr; + type_arr* rarr; + type_base* lid; + type_base* rid; + + l = resolve(l, lvar); + r = resolve(r, rvar); + + if(lvar) { + bind(lvar->name, r); + return; + } else if(rvar) { + bind(rvar->name, l); + return; + } else if((larr = dynamic_cast(l.get())) && + (rarr = dynamic_cast(r.get()))) { + unify(larr->left, rarr->left); + unify(larr->right, rarr->right); + return; + } else if((lid = dynamic_cast(l.get())) && + (rid = dynamic_cast(r.get()))) { + if(lid->name == rid->name) return; + } + + throw unification_error(l, r); +} + +void type_mgr::bind(const std::string& s, type_ptr t) { + type_var* other = dynamic_cast(t.get()); + + if(other && other->name == s) return; + types[s] = t; +} diff --git a/code/compiler/07/type.hpp b/code/compiler/07/type.hpp new file mode 100644 index 0000000..09e525f --- /dev/null +++ b/code/compiler/07/type.hpp @@ -0,0 +1,65 @@ +#pragma once +#include +#include + +struct type_mgr; + +struct type { + virtual ~type() = default; + + virtual void print(const type_mgr& mgr, std::ostream& to) const = 0; +}; + +using type_ptr = std::shared_ptr; + +struct type_var : public type { + std::string name; + + type_var(std::string n) + : name(std::move(n)) {} + + void print(const type_mgr& mgr, std::ostream& to) const; +}; + +struct type_base : public type { + std::string name; + + type_base(std::string n) + : name(std::move(n)) {} + + void print(const type_mgr& mgr, std::ostream& to) const; +}; + +struct type_data : public type_base { + struct constructor { + int tag; + }; + + std::map constructors; + + type_data(std::string n) + : type_base(std::move(n)) {} +}; + +struct type_arr : public type { + type_ptr left; + type_ptr right; + + type_arr(type_ptr l, type_ptr r) + : left(std::move(l)), right(std::move(r)) {} + + void print(const type_mgr& mgr, std::ostream& to) const; +}; + +struct type_mgr { + int last_id = 0; + std::map types; + + std::string new_type_name(); + type_ptr new_type(); + type_ptr new_arrow_type(); + + void unify(type_ptr l, type_ptr r); + type_ptr resolve(type_ptr t, type_var*& var) const; + void bind(const std::string& s, type_ptr t); +}; diff --git a/code/compiler/07/type_env.cpp b/code/compiler/07/type_env.cpp new file mode 100644 index 0000000..c11a759 --- /dev/null +++ b/code/compiler/07/type_env.cpp @@ -0,0 +1,16 @@ +#include "type_env.hpp" + +type_ptr type_env::lookup(const std::string& name) const { + auto it = names.find(name); + if(it != names.end()) return it->second; + if(parent) return parent->lookup(name); + return nullptr; +} + +void type_env::bind(const std::string& name, type_ptr t) { + names[name] = t; +} + +type_env type_env::scope() const { + return type_env(this); +} diff --git a/code/compiler/07/type_env.hpp b/code/compiler/07/type_env.hpp new file mode 100644 index 0000000..6470bdd --- /dev/null +++ b/code/compiler/07/type_env.hpp @@ -0,0 +1,16 @@ +#pragma once +#include +#include "type.hpp" + +struct type_env { + std::map names; + type_env const* parent = nullptr; + + type_env(type_env const* p) + : parent(p) {} + type_env() : type_env(nullptr) {} + + type_ptr lookup(const std::string& name) const; + void bind(const std::string& name, type_ptr t); + type_env scope() const; +}; diff --git a/content/blog/07_compiler_runtime.md b/content/blog/07_compiler_runtime.md index 2f9b6f7..20fd70c 100644 --- a/content/blog/07_compiler_runtime.md +++ b/content/blog/07_compiler_runtime.md @@ -20,9 +20,9 @@ our own stack, and whenever a graph-building function will want to modify the stack, it will have to call library routines for our stack implementation: ```C -void stack_push(struct stack_s* s, struct node_s* n); -struct node_s* stack_slide(struct stack_s* s, size_t c); -/* other stack operation */ +void stack_push(struct stack* s, struct node_s* n); +struct node_s* stack_slide(struct stack* s, size_t c); +/* other stack operations */ ``` Furthermore, we observe that Unwind does a lot of the heavy lifting in our @@ -41,4 +41,28 @@ while(1) { ``` In this implementation, Unwind is in charge. We won't need to insert -the Unwind operations at the end of our generated functions. +the Unwind operations at the end of our generated functions, and you +may have noticed we've already been following this strategy in our +implementation of the G-machine compilation. + +We can start working on an implementation of the runtime right now, +beginning with the nodes: + +{{< codelines "C++" "compiler/07/runtime.c" 5 46 >}} + +We have a variety of different nodes that can be on the stack, but without +the magic of C++'s `vtable` and RTTI, we have to take care of the bookkeeping +ourselves. We add an enum, `node_tag`, which we will use to indicate what +type of node we're looking at. We also add a "base class" `node_base`, which +contains the fields that all nodes must contain (only `tag` at the moment). +We then add to the beginning of each node struct a member of type +`node_base`. With this, a pointer to a node struct can be interpreted as a pointer +to `node_base`, which is our lowest common denominator. To go back, we +check the `tag` of `node_base`, and cast the pointer appropriately. This way, +we mimic inheritance, in a very basic manner. + +We also add an `alloc_node`, which allocates a region of memory big enough +to be any node. We do this because we sometimes mutate nodes (replacing +expressions with the results of their evaluation), changing their type. +We then want to be able to change a node without reallocating memory. +Since the biggest node we have is `node_app`, that's the one we choose.