Compare commits

...

19 Commits

Author SHA1 Message Date
Danila Fedorin 21851e3a9c Add more content to part 12. 1 year ago
Danila Fedorin 600d5b91ea Remove unneeded parent class. 1 year ago
Danila Fedorin 09b90c3bbc Add line numbers to codelines shortode. 1 year ago
Danila Fedorin f6ca13d6dc Add more implementation content to part 12. 1 year ago
Danila Fedorin 9c4d7c514f Add more content to post 12 draft. 1 year ago
Danila Fedorin ad1946e9fb Add first draft of lambdas. 1 year ago
Danila Fedorin 68910458e8 Properly handle null types in pattern typechecking. 1 year ago
Danila Fedorin 240e87eca4 Use mangled names in variable environments. 1 year ago
Danila Fedorin 6b5f7e25b7 Maybe finish the let/in code? 2 years ago
Danila Fedorin e7229e644f Start working on translation. 2 years ago
Danila Fedorin 08c8aca144 Start working on a lifted version of a definition. 2 years ago
Danila Fedorin 7f8dae74ac Adjust type output. 2 years ago
Danila Fedorin 08503116ff Mark some definitions as global, so as not to capture them. 2 years ago
Danila Fedorin a1d679a59d No longer destroy the list of free variables. 2 years ago
Danila Fedorin 4586bd0188 Check for free variables in the environment before generalizing. 2 years ago
Danila Fedorin a97b50f497 Add parsing of let/in. 2 years ago
Danila Fedorin c84ff11d0d Add typechecking to let/in expressions. 2 years ago
Danila Fedorin e966e74487 Extract ordering functionality into definition group. 2 years ago
Danila Fedorin 3865abfb4d Add a struct to contain groups of mutually recursive definitions. 2 years ago
  1. 1
      code/compiler/12/CMakeLists.txt
  2. 266
      code/compiler/12/ast.cpp
  3. 81
      code/compiler/12/ast.hpp
  4. 114
      code/compiler/12/definition.cpp
  5. 31
      code/compiler/12/definition.hpp
  6. 2
      code/compiler/12/env.hpp
  7. 83
      code/compiler/12/global_scope.cpp
  8. 55
      code/compiler/12/global_scope.hpp
  9. 1
      code/compiler/12/graph.hpp
  10. 96
      code/compiler/12/main.cpp
  11. 33
      code/compiler/12/parser.y
  12. 3
      code/compiler/12/scanner.l
  13. 32
      code/compiler/12/type.cpp
  14. 4
      code/compiler/12/type.hpp
  15. 60
      code/compiler/12/type_env.cpp
  16. 28
      code/compiler/12/type_env.hpp
  17. 533
      content/blog/12_compiler_let_in_lambda/index.md
  18. 2
      themes/vanilla/layouts/shortcodes/codelines.html

1
code/compiler/12/CMakeLists.txt

@ -32,6 +32,7 @@ add_executable(compiler
binop.cpp binop.hpp
instruction.cpp instruction.hpp
graph.cpp graph.hpp
global_scope.cpp global_scope.hpp
${BISON_parser_OUTPUTS}
${FLEX_scanner_OUTPUTS}
main.cpp

266
code/compiler/12/ast.cpp

@ -3,6 +3,7 @@
#include "binop.hpp"
#include "error.hpp"
#include "type_env.hpp"
#include "env.hpp"
static void print_indent(int n, std::ostream& to) {
while(n--) to << " ";
@ -13,14 +14,19 @@ void ast_int::print(int indent, std::ostream& to) const {
to << "INT: " << value << std::endl;
}
void ast_int::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
this->env = env;
void ast_int::find_free(std::set<std::string>& into) {
}
type_ptr ast_int::typecheck(type_mgr& mgr) {
type_ptr ast_int::typecheck(type_mgr& mgr, type_env_ptr& env) {
this->env = env;
return type_ptr(new type_app(env->lookup_type("Int")));
}
void ast_int::translate(global_scope& scope) {
}
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(new instruction_pushint(value)));
}
@ -30,20 +36,25 @@ void ast_lid::print(int indent, std::ostream& to) const {
to << "LID: " << id << std::endl;
}
void ast_lid::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
this->env = env;
if(env->lookup(id) == nullptr) into.insert(id);
void ast_lid::find_free(std::set<std::string>& into) {
into.insert(id);
}
type_ptr ast_lid::typecheck(type_mgr& mgr) {
type_ptr ast_lid::typecheck(type_mgr& mgr, type_env_ptr& env) {
this->env = env;
return env->lookup(id)->instantiate(mgr);
}
void ast_lid::translate(global_scope& scope) {
}
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
auto mangled_name = this->env->get_mangled_name(id);
into.push_back(instruction_ptr(
env->has_variable(id) ?
(instruction*) new instruction_push(env->get_offset(id)) :
(instruction*) new instruction_pushglobal(id)));
(env->has_variable(mangled_name) && !this->env->is_global(id)) ?
(instruction*) new instruction_push(env->get_offset(mangled_name)) :
(instruction*) new instruction_pushglobal(mangled_name)));
}
void ast_uid::print(int indent, std::ostream& to) const {
@ -51,16 +62,22 @@ void ast_uid::print(int indent, std::ostream& to) const {
to << "UID: " << id << std::endl;
}
void ast_uid::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
this->env = env;
void ast_uid::find_free(std::set<std::string>& into) {
}
type_ptr ast_uid::typecheck(type_mgr& mgr) {
type_ptr ast_uid::typecheck(type_mgr& mgr, type_env_ptr& env) {
this->env = env;
return env->lookup(id)->instantiate(mgr);
}
void ast_uid::translate(global_scope& scope) {
}
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
into.push_back(instruction_ptr(
new instruction_pushglobal(this->env->get_mangled_name(id))));
}
void ast_binop::print(int indent, std::ostream& to) const {
@ -70,15 +87,15 @@ void ast_binop::print(int indent, std::ostream& to) const {
right->print(indent + 1, to);
}
void ast_binop::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
this->env = env;
left->find_free(mgr, env, into);
right->find_free(mgr, env, into);
void ast_binop::find_free(std::set<std::string>& into) {
left->find_free(into);
right->find_free(into);
}
type_ptr ast_binop::typecheck(type_mgr& mgr) {
type_ptr ltype = left->typecheck(mgr);
type_ptr rtype = right->typecheck(mgr);
type_ptr ast_binop::typecheck(type_mgr& mgr, type_env_ptr& env) {
this->env = env;
type_ptr ltype = left->typecheck(mgr, env);
type_ptr rtype = right->typecheck(mgr, env);
type_ptr ftype = env->lookup(op_name(op))->instantiate(mgr);
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
@ -90,6 +107,11 @@ type_ptr ast_binop::typecheck(type_mgr& mgr) {
return return_type;
}
void ast_binop::translate(global_scope& scope) {
left->translate(scope);
right->translate(scope);
}
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
right->compile(env, into);
left->compile(env_ptr(new env_offset(1, env)), into);
@ -106,15 +128,15 @@ void ast_app::print(int indent, std::ostream& to) const {
right->print(indent + 1, to);
}
void ast_app::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
this->env = env;
left->find_free(mgr, env, into);
right->find_free(mgr, env, into);
void ast_app::find_free(std::set<std::string>& into) {
left->find_free(into);
right->find_free(into);
}
type_ptr ast_app::typecheck(type_mgr& mgr) {
type_ptr ltype = left->typecheck(mgr);
type_ptr rtype = right->typecheck(mgr);
type_ptr ast_app::typecheck(type_mgr& mgr, type_env_ptr& env) {
this->env = env;
type_ptr ltype = left->typecheck(mgr, env);
type_ptr rtype = right->typecheck(mgr, env);
type_ptr return_type = mgr.new_type();
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
@ -122,6 +144,11 @@ type_ptr ast_app::typecheck(type_mgr& mgr) {
return return_type;
}
void ast_app::translate(global_scope& scope) {
left->translate(scope);
right->translate(scope);
}
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
right->compile(env, into);
left->compile(env_ptr(new env_offset(1, env)), into);
@ -139,24 +166,30 @@ void ast_case::print(int indent, std::ostream& to) const {
}
}
void ast_case::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
this->env = env;
of->find_free(mgr, env, into);
void ast_case::find_free(std::set<std::string>& into) {
of->find_free(into);
for(auto& branch : branches) {
type_env_ptr new_env = type_scope(env);
branch->pat->insert_bindings(mgr, new_env);
branch->expr->find_free(mgr, new_env, into);
std::set<std::string> free_in_branch;
std::set<std::string> pattern_variables;
branch->pat->find_variables(pattern_variables);
branch->expr->find_free(free_in_branch);
for(auto& free : free_in_branch) {
if(pattern_variables.find(free) == pattern_variables.end())
into.insert(free);
}
}
}
type_ptr ast_case::typecheck(type_mgr& mgr) {
type_ptr ast_case::typecheck(type_mgr& mgr, type_env_ptr& env) {
this->env = env;
type_var* var;
type_ptr case_type = mgr.resolve(of->typecheck(mgr), var);
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var);
type_ptr branch_type = mgr.new_type();
for(auto& branch : branches) {
branch->pat->typecheck(case_type, mgr, branch->expr->env);
type_ptr curr_branch_type = branch->expr->typecheck(mgr);
type_env_ptr new_env = type_scope(env);
branch->pat->typecheck(case_type, mgr, new_env);
type_ptr curr_branch_type = branch->expr->typecheck(mgr, new_env);
mgr.unify(branch_type, curr_branch_type);
}
@ -170,6 +203,13 @@ type_ptr ast_case::typecheck(type_mgr& mgr) {
return branch_type;
}
void ast_case::translate(global_scope& scope) {
of->translate(scope);
for(auto& branch : branches) {
branch->expr->translate(scope);
}
}
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
type_app* app_type = dynamic_cast<type_app*>(input_type.get());
type_data* type = dynamic_cast<type_data*>(app_type->constructor.get());
@ -199,7 +239,7 @@ void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) c
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
env_ptr new_env = env;
for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) {
new_env = env_ptr(new env_var(*it, new_env));
new_env = env_ptr(new env_var(branch->expr->env->get_mangled_name(*it), new_env));
}
branch_instructions.push_back(instruction_ptr(new instruction_split(
@ -226,16 +266,145 @@ void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) c
}
}
void ast_let::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "LET: " << std::endl;
in->print(indent + 1, to);
}
void ast_let::find_free(std::set<std::string>& into) {
definitions.find_free(into);
std::set<std::string> all_free;
in->find_free(all_free);
for(auto& free_var : all_free) {
if(definitions.defs_defn.find(free_var) == definitions.defs_defn.end())
into.insert(free_var);
}
}
type_ptr ast_let::typecheck(type_mgr& mgr, type_env_ptr& env) {
this->env = env;
definitions.typecheck(mgr, env);
return in->typecheck(mgr, definitions.env);
}
void ast_let::translate(global_scope& scope) {
for(auto& def : definitions.defs_data) {
def.second->into_globals(scope);
}
for(auto& def : definitions.defs_defn) {
size_t original_params = def.second->params.size();
std::string original_name = def.second->name;
auto& global_definition = def.second->into_global(scope);
size_t captured = global_definition.params.size() - original_params;
type_env_ptr mangled_env = type_scope(env);
mangled_env->bind(def.first, env->lookup(def.first), visibility::global);
mangled_env->set_mangled_name(def.first, global_definition.name);
ast_ptr global_app(new ast_lid(global_definition.name));
global_app->env = mangled_env;
for(auto& param : global_definition.params) {
if(!(captured--)) break;
ast_ptr new_arg(new ast_lid(param));
new_arg->env = env;
global_app = ast_ptr(new ast_app(std::move(global_app), std::move(new_arg)));
global_app->env = env;
}
translated_definitions.push_back({ def.first, std::move(global_app) });
}
in->translate(scope);
}
void ast_let::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(new instruction_alloc(translated_definitions.size())));
env_ptr new_env = env;
for(auto& def : translated_definitions) {
new_env = env_ptr(new env_var(definitions.env->get_mangled_name(def.first), std::move(new_env)));
}
int offset = translated_definitions.size() - 1;
for(auto& def : translated_definitions) {
def.second->compile(new_env, into);
into.push_back(instruction_ptr(new instruction_update(offset--)));
}
in->compile(new_env, into);
into.push_back(instruction_ptr(new instruction_slide(translated_definitions.size())));
}
void ast_lambda::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "LAMBDA";
for(auto& param : params) {
to << " " << param;
}
to << std::endl;
body->print(indent+1, to);
}
void ast_lambda::find_free(std::set<std::string>& into) {
body->find_free(free_variables);
for(auto& param : params) {
free_variables.erase(param);
}
into.insert(free_variables.begin(), free_variables.end());
}
type_ptr ast_lambda::typecheck(type_mgr& mgr, type_env_ptr& env) {
this->env = env;
var_env = type_scope(env);
type_ptr return_type = mgr.new_type();
type_ptr full_type = return_type;
for(auto it = params.rbegin(); it != params.rend(); it++) {
type_ptr param_type = mgr.new_type();
var_env->bind(*it, param_type);
full_type = type_ptr(new type_arr(std::move(param_type), full_type));
}
mgr.unify(return_type, body->typecheck(mgr, var_env));
return full_type;
}
void ast_lambda::translate(global_scope& scope) {
std::vector<std::string> function_params;
for(auto& free_variable : free_variables) {
if(env->is_global(free_variable)) continue;
function_params.push_back(free_variable);
}
size_t captured_count = function_params.size();
function_params.insert(function_params.end(), params.begin(), params.end());
auto& new_function = scope.add_function("lambda", std::move(function_params), std::move(body));
type_env_ptr mangled_env = type_scope(env);
mangled_env->bind("lambda", type_scheme_ptr(nullptr), visibility::global);
mangled_env->set_mangled_name("lambda", new_function.name);
ast_ptr new_application = ast_ptr(new ast_lid("lambda"));
new_application->env = mangled_env;
for(auto& param : new_function.params) {
if(!(captured_count--)) break;
ast_ptr new_arg = ast_ptr(new ast_lid(param));
new_arg->env = env;
new_application = ast_ptr(new ast_app(std::move(new_application), std::move(new_arg)));
new_application->env = env;
}
translated = std::move(new_application);
}
void ast_lambda::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
translated->compile(env, into);
}
void pattern_var::print(std::ostream& to) const {
to << var;
}
void pattern_var::insert_bindings(type_mgr& mgr, type_env_ptr& env) const {
env->bind(var, mgr.new_type());
void pattern_var::find_variables(std::set<std::string>& into) const {
into.insert(var);
}
void pattern_var::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const {
mgr.unify(env->lookup(var)->instantiate(mgr), t);
env->bind(var, t);
}
void pattern_constr::print(std::ostream& to) const {
@ -245,23 +414,22 @@ void pattern_constr::print(std::ostream& to) const {
}
}
void pattern_constr::insert_bindings(type_mgr& mgr, type_env_ptr& env) const {
for(auto& param : params) {
env->bind(param, mgr.new_type());
}
void pattern_constr::find_variables(std::set<std::string>& into) const {
into.insert(params.begin(), params.end());
}
void pattern_constr::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const {
type_ptr constructor_type = env->lookup(constr)->instantiate(mgr);
if(!constructor_type) {
type_scheme_ptr constructor_type_scheme = env->lookup(constr);
if(!constructor_type_scheme) {
throw type_error(std::string("pattern using unknown constructor ") + constr);
}
type_ptr constructor_type = constructor_type_scheme->instantiate(mgr);
for(auto& param : params) {
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
if(!arr) throw type_error("too many parameters in constructor pattern");
mgr.unify(env->lookup(param)->instantiate(mgr), arr->left);
env->bind(param, arr->left);
constructor_type = arr->right;
}

81
code/compiler/12/ast.hpp

@ -7,6 +7,8 @@
#include "binop.hpp"
#include "instruction.hpp"
#include "env.hpp"
#include "definition.hpp"
#include "global_scope.hpp"
struct ast {
type_env_ptr env;
@ -14,9 +16,9 @@ struct ast {
virtual ~ast() = default;
virtual void print(int indent, std::ostream& to) const = 0;
virtual void find_free(type_mgr& mgr,
type_env_ptr& env, std::set<std::string>& into) = 0;
virtual type_ptr typecheck(type_mgr& mgr) = 0;
virtual void find_free(std::set<std::string>& into) = 0;
virtual type_ptr typecheck(type_mgr& mgr, type_env_ptr& env) = 0;
virtual void translate(global_scope& scope) = 0;
virtual void compile(const env_ptr& env,
std::vector<instruction_ptr>& into) const = 0;
};
@ -27,7 +29,7 @@ struct pattern {
virtual ~pattern() = default;
virtual void print(std::ostream& to) const = 0;
virtual void insert_bindings(type_mgr& mgr, type_env_ptr& env) const = 0;
virtual void find_variables(std::set<std::string>& into) const = 0;
virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const = 0;
};
@ -50,8 +52,9 @@ struct ast_int : public ast {
: value(v) {}
void print(int indent, std::ostream& to) const;
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr);
void find_free(std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
void translate(global_scope& scope);
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
@ -62,8 +65,9 @@ struct ast_lid : public ast {
: id(std::move(i)) {}
void print(int indent, std::ostream& to) const;
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr);
void find_free(std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
void translate(global_scope& scope);
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
@ -74,8 +78,9 @@ struct ast_uid : public ast {
: id(std::move(i)) {}
void print(int indent, std::ostream& to) const;
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr);
void find_free(std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
void translate(global_scope& scope);
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
@ -88,8 +93,9 @@ struct ast_binop : public ast {
: op(o), left(std::move(l)), right(std::move(r)) {}
void print(int indent, std::ostream& to) const;
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr);
void find_free(std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
void translate(global_scope& scope);
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
@ -101,8 +107,9 @@ struct ast_app : public ast {
: left(std::move(l)), right(std::move(r)) {}
void print(int indent, std::ostream& to) const;
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr);
void find_free(std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
void translate(global_scope& scope);
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
@ -115,8 +122,46 @@ struct ast_case : public ast {
: of(std::move(o)), branches(std::move(b)) {}
void print(int indent, std::ostream& to) const;
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr);
void find_free(std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
void translate(global_scope& scope);
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct ast_let : public ast {
using basic_definition = std::pair<std::string, ast_ptr>;
definition_group definitions;
ast_ptr in;
std::vector<basic_definition> translated_definitions;
ast_let(definition_group g, ast_ptr i)
: definitions(std::move(g)), in(std::move(i)) {}
void print(int indent, std::ostream& to) const;
void find_free(std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
void translate(global_scope& scope);
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct ast_lambda : public ast {
std::vector<std::string> params;
ast_ptr body;
type_env_ptr var_env;
std::set<std::string> free_variables;
ast_ptr translated;
ast_lambda(std::vector<std::string> ps, ast_ptr b)
: params(std::move(ps)), body(std::move(b)) {}
void print(int indent, std::ostream& to) const;
void find_free(std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
void translate(global_scope& scope);
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
@ -127,7 +172,7 @@ struct pattern_var : public pattern {
: var(std::move(v)) {}
void print(std::ostream &to) const;
void insert_bindings(type_mgr& mgr, type_env_ptr& env) const;
void find_variables(std::set<std::string>& into) const;
void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const;
};
@ -139,6 +184,6 @@ struct pattern_constr : public pattern {
: constr(std::move(c)), params(std::move(p)) {}
void print(std::ostream &to) const;
virtual void insert_bindings(type_mgr& mgr, type_env_ptr& env) const;
void find_variables(std::set<std::string>& into) const;
virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const;
};

114
code/compiler/12/definition.cpp

@ -5,13 +5,20 @@
#include "llvm_context.hpp"
#include "type.hpp"
#include "type_env.hpp"
#include "graph.hpp"
#include <llvm/IR/DerivedTypes.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/Type.h>
void definition_defn::find_free(type_mgr& mgr, type_env_ptr& env) {
this->env = env;
void definition_defn::find_free() {
body->find_free(free_variables);
for(auto& param : params) {
free_variables.erase(param);
}
}
void definition_defn::insert_types(type_mgr& mgr, type_env_ptr& env, visibility v) {
this->env = env;
var_env = type_scope(env);
return_type = mgr.new_type();
full_type = return_type;
@ -21,39 +28,24 @@ void definition_defn::find_free(type_mgr& mgr, type_env_ptr& env) {
full_type = type_ptr(new type_arr(param_type, full_type));
var_env->bind(*it, param_type);
}
body->find_free(mgr, var_env, free_variables);
}
void definition_defn::insert_types(type_mgr& mgr) {
env->bind(name, full_type);
env->bind(name, full_type, v);
}
void definition_defn::typecheck(type_mgr& mgr) {
type_ptr body_type = body->typecheck(mgr);
type_ptr body_type = body->typecheck(mgr, var_env);
mgr.unify(return_type, body_type);
}
void definition_defn::compile() {
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
for(auto it = params.rbegin(); it != params.rend(); it++) {
new_env = env_ptr(new env_var(*it, new_env));
}
body->compile(new_env, instructions);
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
}
void definition_defn::declare_llvm(llvm_context& ctx) {
generated_function = ctx.create_custom_function(name, params.size());
}
void definition_defn::generate_llvm(llvm_context& ctx) {
ctx.builder.SetInsertPoint(&generated_function->getEntryBlock());
for(auto& instruction : instructions) {
instruction->gen_llvm(ctx, generated_function);
global_function& definition_defn::into_global(global_scope& scope) {
std::vector<std::string> all_params;
for(auto& free : free_variables) {
if(env->is_global(free)) continue;
all_params.push_back(free);
}
ctx.builder.CreateRetVoid();
all_params.insert(all_params.end(), params.begin(), params.end());
body->translate(scope);
return scope.add_function(name, std::move(all_params), std::move(body));
}
void definition_data::insert_types(type_env_ptr& env) {
@ -91,19 +83,63 @@ void definition_data::insert_constructors() const {
}
}
void definition_data::generate_llvm(llvm_context& ctx) {
void definition_data::into_globals(global_scope& scope) {
for(auto& constructor : constructors) {
auto new_function =
ctx.create_custom_function(constructor->name, constructor->types.size());
std::vector<instruction_ptr> instructions;
instructions.push_back(instruction_ptr(
new instruction_pack(constructor->tag, constructor->types.size())
));
instructions.push_back(instruction_ptr(new instruction_update(0)));
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
for (auto& instruction : instructions) {
instruction->gen_llvm(ctx, new_function);
global_constructor& c = scope.add_constructor(
constructor->name, constructor->tag, constructor->types.size());
env->set_mangled_name(constructor->name, c.name);
}
}
void definition_group::find_free(std::set<std::string>& into) {
for(auto& def_pair : defs_defn) {
def_pair.second->find_free();
for(auto& free_var : def_pair.second->free_variables) {
if(defs_defn.find(free_var) == defs_defn.end()) {
into.insert(free_var);
} else {
def_pair.second->nearby_variables.insert(free_var);
}
}
}
}
void definition_group::typecheck(type_mgr& mgr, type_env_ptr& env) {
this->env = type_scope(env);
for(auto& def_data : defs_data) {
def_data.second->insert_types(this->env);
}
for(auto& def_data : defs_data) {
def_data.second->insert_constructors();
}
function_graph dependency_graph;
for(auto& def_defn : defs_defn) {
def_defn.second->find_free();
dependency_graph.add_function(def_defn.second->name);
for(auto& dependency : def_defn.second->nearby_variables) {
if(defs_defn.find(dependency) == defs_defn.end())
throw 0;
dependency_graph.add_edge(def_defn.second->name, dependency);
}
}
std::vector<group_ptr> groups = dependency_graph.compute_order();
for(auto it = groups.rbegin(); it != groups.rend(); it++) {
auto& group = *it;
for(auto& def_defnn_name : group->members) {
auto& def_defn = defs_defn.find(def_defnn_name)->second;
def_defn->insert_types(mgr, this->env, vis);
}
for(auto& def_defnn_name : group->members) {
auto& def_defn = defs_defn.find(def_defnn_name)->second;
def_defn->typecheck(mgr);
}
for(auto& def_defnn_name : group->members) {
this->env->generalize(def_defnn_name, mgr);
}
ctx.builder.CreateRetVoid();
}
}

31
code/compiler/12/definition.hpp

@ -1,11 +1,13 @@
#pragma once
#include <memory>
#include <vector>
#include <map>
#include <set>
#include "instruction.hpp"
#include "llvm_context.hpp"
#include "parsed_type.hpp"
#include "type_env.hpp"
#include "global_scope.hpp"
struct ast;
using ast_ptr = std::unique_ptr<ast>;
@ -29,24 +31,20 @@ struct definition_defn {
type_env_ptr env;
type_env_ptr var_env;
std::set<std::string> free_variables;
std::set<std::string> nearby_variables;
type_ptr full_type;
type_ptr return_type;
std::vector<instruction_ptr> instructions;
llvm::Function* generated_function;
definition_defn(std::string n, std::vector<std::string> p, ast_ptr b)
: name(std::move(n)), params(std::move(p)), body(std::move(b)) {
}
void find_free(type_mgr& mgr, type_env_ptr& env);
void insert_types(type_mgr& mgr);
void find_free();
void insert_types(type_mgr& mgr, type_env_ptr& env, visibility v);
void typecheck(type_mgr& mgr);
void compile();
void declare_llvm(llvm_context& ctx);
void generate_llvm(llvm_context& ctx);
global_function& into_global(global_scope& scope);
};
using definition_defn_ptr = std::unique_ptr<definition_defn>;
@ -66,7 +64,20 @@ struct definition_data {
void insert_types(type_env_ptr& env);
void insert_constructors() const;
void generate_llvm(llvm_context& ctx);
void into_globals(global_scope& scope);
};
using definition_data_ptr = std::unique_ptr<definition_data>;
struct definition_group {
std::map<std::string, definition_data_ptr> defs_data;
std::map<std::string, definition_defn_ptr> defs_defn;
visibility vis;
type_env_ptr env;
definition_group(visibility v = visibility::local) : vis(v) {}
void find_free(std::set<std::string>& into);
void typecheck(type_mgr& mgr, type_env_ptr& env);
};

2
code/compiler/12/env.hpp

@ -15,7 +15,7 @@ struct env_var : public env {
std::string name;
env_ptr parent;
env_var(std::string& n, env_ptr p)
env_var(std::string n, env_ptr p)
: name(std::move(n)), parent(std::move(p)) {}
int get_offset(const std::string& name) const;

83
code/compiler/12/global_scope.cpp

@ -0,0 +1,83 @@
#include "global_scope.hpp"
#include "ast.hpp"
void global_function::compile() {
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
for(auto it = params.rbegin(); it != params.rend(); it++) {
new_env = env_ptr(new env_var(*it, new_env));
}
body->compile(new_env, instructions);
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
}
void global_function::declare_llvm(llvm_context& ctx) {
generated_function = ctx.create_custom_function(name, params.size());
}
void global_function::generate_llvm(llvm_context& ctx) {
ctx.builder.SetInsertPoint(&generated_function->getEntryBlock());
for(auto& instruction : instructions) {
instruction->gen_llvm(ctx, generated_function);
}
ctx.builder.CreateRetVoid();
}
void global_constructor::generate_llvm(llvm_context& ctx) {
auto new_function =
ctx.create_custom_function(name, arity);
std::vector<instruction_ptr> instructions;
instructions.push_back(instruction_ptr(new instruction_pack(tag, arity)));
instructions.push_back(instruction_ptr(new instruction_update(0)));
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
for (auto& instruction : instructions) {
instruction->gen_llvm(ctx, new_function);
}
ctx.builder.CreateRetVoid();
}
global_function& global_scope::add_function(std::string n, std::vector<std::string> ps, ast_ptr b) {
global_function* new_function = new global_function(mangle_name(n), std::move(ps), std::move(b));
functions.push_back(global_function_ptr(new_function));
return *new_function;
}
global_constructor& global_scope::add_constructor(std::string n, int8_t t, size_t a) {
global_constructor* new_constructor = new global_constructor(mangle_name(n), t, a);
constructors.push_back(global_constructor_ptr(new_constructor));
return *new_constructor;
}
void global_scope::compile() {
for(auto& function : functions) {
function->compile();
}
}
void global_scope::generate_llvm(llvm_context& ctx) {
for(auto& constructor : constructors) {
constructor->generate_llvm(ctx);
}
for(auto& function : functions) {
function->declare_llvm(ctx);
}
for(auto& function : functions) {
function->generate_llvm(ctx);
}
}
std::string global_scope::mangle_name(const std::string& n) {
auto occurence_it = occurence_count.find(n);
int occurence = 0;
if(occurence_it != occurence_count.end()) {
occurence = occurence_it->second + 1;
}
occurence_count[n] = occurence;
std::string final_name = n;
if (occurence != 0) {
final_name += "_";
final_name += std::to_string(occurence);
}
return final_name;
}

55
code/compiler/12/global_scope.hpp

@ -0,0 +1,55 @@
#pragma once
#include <memory>
#include <string>
#include <vector>
#include <llvm/IR/Function.h>
#include "instruction.hpp"
struct ast;
using ast_ptr = std::unique_ptr<ast>;
struct global_function {
std::string name;
std::vector<std::string> params;
ast_ptr body;
std::vector<instruction_ptr> instructions;
llvm::Function* generated_function;
global_function(std::string n, std::vector<std::string> ps, ast_ptr b)
: name(std::move(n)), params(std::move(ps)), body(std::move(b)) {}
void compile();
void declare_llvm(llvm_context& ctx);
void generate_llvm(llvm_context& ctx);
};
using global_function_ptr = std::unique_ptr<global_function>;
struct global_constructor {
std::string name;
int8_t tag;
size_t arity;
global_constructor(std::string n, int8_t t, size_t a)
: name(std::move(n)), tag(t), arity(a) {}
void generate_llvm(llvm_context& ctx);
};
using global_constructor_ptr = std::unique_ptr<global_constructor>;
struct global_scope {
std::map<std::string, int> occurence_count;
std::vector<global_function_ptr> functions;
std::vector<global_constructor_ptr> constructors;
global_function& add_function(std::string n, std::vector<std::string> ps, ast_ptr b);
global_constructor& add_constructor(std::string n, int8_t t, size_t a);
void compile();
void generate_llvm(llvm_context& ctx);
private:
std::string mangle_name(const std::string& n);
};

1
code/compiler/12/graph.hpp

@ -7,7 +7,6 @@
#include <map>
#include <memory>
#include <vector>
#include <iostream>
using function = std::string;

96
code/compiler/12/main.cpp

@ -21,12 +21,10 @@ void yy::parser::error(const std::string& msg) {
std::cout << "An error occured: " << msg << std::endl;
}
extern std::map<std::string, definition_data_ptr> defs_data;
extern std::map<std::string, definition_defn_ptr> defs_defn;
extern definition_group global_defs;
void typecheck_program(
const std::map<std::string, definition_data_ptr>& defs_data,
const std::map<std::string, definition_defn_ptr>& defs_defn,
definition_group& defs,
type_mgr& mgr, type_env_ptr& env) {
type_ptr int_type = type_ptr(new type_base("Int"));
env->bind_type("Int", int_type);
@ -35,63 +33,32 @@ void typecheck_program(
type_ptr binop_type = type_ptr(new type_arr(
int_type_app,
type_ptr(new type_arr(int_type_app, int_type_app))));
env->bind("+", binop_type);
env->bind("-", binop_type);
env->bind("*", binop_type);
env->bind("/", binop_type);
env->bind("+", binop_type, visibility::global);
env->bind("-", binop_type, visibility::global);
env->bind("*", binop_type, visibility::global);
env->bind("/", binop_type, visibility::global);
for(auto& def_data : defs_data) {
def_data.second->insert_types(env);
}
for(auto& def_data : defs_data) {
def_data.second->insert_constructors();
}
function_graph dependency_graph;
for(auto& def_defn : defs_defn) {
def_defn.second->find_free(mgr, env);
dependency_graph.add_function(def_defn.second->name);
for(auto& dependency : def_defn.second->free_variables) {
if(defs_defn.find(dependency) == defs_defn.end())
throw 0;
dependency_graph.add_edge(def_defn.second->name, dependency);
}
}
std::vector<group_ptr> groups = dependency_graph.compute_order();
for(auto it = groups.rbegin(); it != groups.rend(); it++) {
auto& group = *it;
for(auto& def_defnn_name : group->members) {
auto& def_defn = defs_defn.find(def_defnn_name)->second;
def_defn->insert_types(mgr);
}
for(auto& def_defnn_name : group->members) {
auto& def_defn = defs_defn.find(def_defnn_name)->second;
def_defn->typecheck(mgr);
}
for(auto& def_defnn_name : group->members) {
env->generalize(def_defnn_name, mgr);
}
}
std::set<std::string> free;
defs.find_free(free);
defs.typecheck(mgr, env);
for(auto& pair : env->names) {
for(auto& pair : defs.env->names) {
std::cout << pair.first << ": ";
pair.second->print(mgr, std::cout);
pair.second.type->print(mgr, std::cout);
std::cout << std::endl;
}
}
void compile_program(const std::map<std::string, definition_defn_ptr>& defs_defn) {
for(auto& def_defn : defs_defn) {
def_defn.second->compile();
for(auto& instruction : def_defn.second->instructions) {
instruction->print(0, std::cout);
}
std::cout << std::endl;
global_scope translate_program(definition_group& group) {
global_scope scope;
for(auto& data : group.defs_data) {
data.second->into_globals(scope);
}
for(auto& defn : group.defs_defn) {
auto& function = defn.second->into_global(scope);
function.body->env->parent->set_mangled_name(defn.first, function.name);
}
return scope;
}
void gen_llvm_internal_op(llvm_context& ctx, binop op) {
@ -151,24 +118,14 @@ void output_llvm(llvm_context& ctx, const std::string& filename) {
}
}
void gen_llvm(
const std::map<std::string, definition_data_ptr>& defs_data,
const std::map<std::string, definition_defn_ptr>& defs_defn) {
void gen_llvm(global_scope& scope) {
llvm_context ctx;
gen_llvm_internal_op(ctx, PLUS);
gen_llvm_internal_op(ctx, MINUS);
gen_llvm_internal_op(ctx, TIMES);
gen_llvm_internal_op(ctx, DIVIDE);
for(auto& def_data : defs_data) {
def_data.second->generate_llvm(ctx);
}
for(auto& def_defn : defs_defn) {
def_defn.second->declare_llvm(ctx);
}
for(auto& def_defn : defs_defn) {
def_defn.second->generate_llvm(ctx);
}
scope.generate_llvm(ctx);
ctx.module.print(llvm::outs(), nullptr);
output_llvm(ctx, "program.o");
@ -180,7 +137,7 @@ int main() {
type_env_ptr env(new type_env);
parser.parse();
for(auto& def_defn : defs_defn) {
for(auto& def_defn : global_defs.defs_defn) {
std::cout << def_defn.second->name;
for(auto& param : def_defn.second->params) std::cout << " " << param;
std::cout << ":" << std::endl;
@ -188,9 +145,10 @@ int main() {
def_defn.second->body->print(1, std::cout);
}
try {
typecheck_program(defs_data, defs_defn, mgr, env);
compile_program(defs_defn);
gen_llvm(defs_data, defs_defn);
typecheck_program(global_defs, mgr, env);
global_scope scope = translate_program(global_defs);
scope.compile();
gen_llvm(scope);
} catch(unification_error& err) {
std::cout << "failed to unify types: " << std::endl;
std::cout << " (1) \033[34m";

33
code/compiler/12/parser.y

@ -7,13 +7,13 @@
#include "parser.hpp"
#include "parsed_type.hpp"
std::map<std::string, definition_data_ptr> defs_data;
std::map<std::string, definition_defn_ptr> defs_defn;
definition_group global_defs;
extern yy::parser::symbol_type yylex();
%}
%token BACKSLASH
%token PLUS
%token TIMES
%token MINUS
@ -23,6 +23,8 @@ extern yy::parser::symbol_type yylex();
%token DATA
%token CASE
%token OF
%token LET
%token IN
%token OCURLY
%token CCURLY
%token OPAREN
@ -41,8 +43,9 @@ extern yy::parser::symbol_type yylex();
%type <std::vector<branch_ptr>> branches
%type <std::vector<constructor_ptr>> constructors
%type <std::vector<parsed_type_ptr>> typeList
%type <definition_group> definitions
%type <parsed_type_ptr> type nonArrowType typeListElement
%type <ast_ptr> aAdd aMul case app appBase
%type <ast_ptr> aAdd aMul case let lambda app appBase
%type <definition_data_ptr> data
%type <definition_defn_ptr> defn
%type <branch_ptr> branch
@ -54,17 +57,13 @@ extern yy::parser::symbol_type yylex();
%%
program
: definitions { }
: definitions { global_defs = std::move($1); global_defs.vis = visibility::global; }
;
definitions
: definitions definition { }
| definition { }
;
definition
: defn { auto name = $1->name; defs_defn[name] = std::move($1); }
| data { auto name = $1->name; defs_data[name] = std::move($1); }
: definitions defn { $$ = std::move($1); auto name = $2->name; $$.defs_defn[name] = std::move($2); }
| definitions data { $$ = std::move($1); auto name = $2->name; $$.defs_data[name] = std::move($2); }
| %empty { $$ = definition_group(); }
;
defn
@ -101,6 +100,18 @@ appBase
| UID { $$ = ast_ptr(new ast_uid(std::move($1))); }
| OPAREN aAdd CPAREN { $$ = std::move($2); }
| case { $$ = std::move($1); }
| let { $$ = std::move($1); }
| lambda { $$ = std::move($1); }
;
let
: LET OCURLY definitions CCURLY IN OCURLY aAdd CCURLY
{ $$ = ast_ptr(new ast_let(std::move($3), std::move($7))); }
;
lambda
: BACKSLASH lowercaseParams ARROW OCURLY aAdd CCURLY
{ $$ = ast_ptr(new ast_lambda(std::move($2), std::move($5))); }
;
case

3
code/compiler/12/scanner.l

@ -13,6 +13,7 @@
%%
[ \n]+ {}
\\ { return yy::parser::make_BACKSLASH(); }
\+ { return yy::parser::make_PLUS(); }
\* { return yy::parser::make_TIMES(); }
- { return yy::parser::make_MINUS(); }
@ -22,6 +23,8 @@ defn { return yy::parser::make_DEFN(); }
data { return yy::parser::make_DATA(); }
case { return yy::parser::make_CASE(); }
of { return yy::parser::make_OF(); }
let { return yy::parser::make_LET(); }
in { return yy::parser::make_IN(); }
\{ { return yy::parser::make_OCURLY(); }
\} { return yy::parser::make_CCURLY(); }
\( { return yy::parser::make_OPAREN(); }

32
code/compiler/12/type.cpp

@ -5,6 +5,8 @@
#include <vector>
#include "error.hpp"
bool type::is_arrow(const type_mgr& mgr) const { return false; }
void type_scheme::print(const type_mgr& mgr, std::ostream& to) const {
if(forall.size() != 0) {
to << "forall ";
@ -34,20 +36,35 @@ void type_var::print(const type_mgr& mgr, std::ostream& to) const {
}
}
bool type_var::is_arrow(const type_mgr& mgr) const {
auto it = mgr.types.find(name);
if(it != mgr.types.end()) {
return it->second->is_arrow(mgr);
} else {
return false;
}
}
void type_base::print(const type_mgr& mgr, std::ostream& to) const {
to << name;
}
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
bool print_parenths = left->is_arrow(mgr);
if(print_parenths) to << "(";
left->print(mgr, to);
to << " -> (";
if(print_parenths) to << ")";
to << " -> ";
right->print(mgr, to);
to << ")";
}
bool type_arr::is_arrow(const type_mgr& mgr) const {
return true;
}
void type_app::print(const type_mgr& mgr, std::ostream& to) const {
constructor->print(mgr, to);
to << "* ";
to << "*";
for(auto& arg : arguments) {
to << " ";
arg->print(mgr, to);
@ -185,3 +202,12 @@ void type_mgr::find_free(const type_ptr& t, std::set<std::string>& into) const {
for(auto& arg : app->arguments) find_free(arg, into);
}
}
void type_mgr::find_free(const type_scheme_ptr& t, std::set<std::string>& into) const {
std::set<std::string> monotype_free;
find_free(t->monotype, monotype_free);
for(auto& not_free : t->forall) {
monotype_free.erase(not_free);
}
into.insert(monotype_free.begin(), monotype_free.end());
}

4
code/compiler/12/type.hpp

@ -11,6 +11,7 @@ struct type {
virtual ~type() = default;
virtual void print(const type_mgr& mgr, std::ostream& to) const = 0;
virtual bool is_arrow(const type_mgr& mgr) const;
};
using type_ptr = std::shared_ptr<type>;
@ -34,6 +35,7 @@ struct type_var : public type {
: name(std::move(n)) {}
void print(const type_mgr& mgr, std::ostream& to) const;
bool is_arrow(const type_mgr& mgr) const;
};
struct type_base : public type {
@ -65,6 +67,7 @@ struct type_arr : public type {
: left(std::move(l)), right(std::move(r)) {}
void print(const type_mgr& mgr, std::ostream& to) const;
bool is_arrow(const type_mgr& mgr) const;
};
struct type_app : public type {
@ -92,4 +95,5 @@ struct type_mgr {
type_ptr resolve(type_ptr t, type_var*& var) const;
void bind(const std::string& s, type_ptr t);
void find_free(const type_ptr& t, std::set<std::string>& into) const;
void find_free(const type_scheme_ptr& t, std::set<std::string>& into) const;