Compare commits
19 Commits
1905601aaa
...
21851e3a9c
Author | SHA1 | Date | |
---|---|---|---|
21851e3a9c | |||
600d5b91ea | |||
09b90c3bbc | |||
f6ca13d6dc | |||
9c4d7c514f | |||
ad1946e9fb | |||
68910458e8 | |||
240e87eca4 | |||
6b5f7e25b7 | |||
e7229e644f | |||
08c8aca144 | |||
7f8dae74ac | |||
08503116ff | |||
a1d679a59d | |||
4586bd0188 | |||
a97b50f497 | |||
c84ff11d0d | |||
e966e74487 | |||
3865abfb4d |
|
@ -32,6 +32,7 @@ add_executable(compiler
|
||||||
binop.cpp binop.hpp
|
binop.cpp binop.hpp
|
||||||
instruction.cpp instruction.hpp
|
instruction.cpp instruction.hpp
|
||||||
graph.cpp graph.hpp
|
graph.cpp graph.hpp
|
||||||
|
global_scope.cpp global_scope.hpp
|
||||||
${BISON_parser_OUTPUTS}
|
${BISON_parser_OUTPUTS}
|
||||||
${FLEX_scanner_OUTPUTS}
|
${FLEX_scanner_OUTPUTS}
|
||||||
main.cpp
|
main.cpp
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
#include "binop.hpp"
|
#include "binop.hpp"
|
||||||
#include "error.hpp"
|
#include "error.hpp"
|
||||||
#include "type_env.hpp"
|
#include "type_env.hpp"
|
||||||
|
#include "env.hpp"
|
||||||
|
|
||||||
static void print_indent(int n, std::ostream& to) {
|
static void print_indent(int n, std::ostream& to) {
|
||||||
while(n--) to << " ";
|
while(n--) to << " ";
|
||||||
|
@ -13,14 +14,19 @@ void ast_int::print(int indent, std::ostream& to) const {
|
||||||
to << "INT: " << value << std::endl;
|
to << "INT: " << value << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_int::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
|
void ast_int::find_free(std::set<std::string>& into) {
|
||||||
this->env = env;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type_ptr ast_int::typecheck(type_mgr& mgr) {
|
type_ptr ast_int::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||||
|
this->env = env;
|
||||||
return type_ptr(new type_app(env->lookup_type("Int")));
|
return type_ptr(new type_app(env->lookup_type("Int")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ast_int::translate(global_scope& scope) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
into.push_back(instruction_ptr(new instruction_pushint(value)));
|
into.push_back(instruction_ptr(new instruction_pushint(value)));
|
||||||
}
|
}
|
||||||
|
@ -30,20 +36,25 @@ void ast_lid::print(int indent, std::ostream& to) const {
|
||||||
to << "LID: " << id << std::endl;
|
to << "LID: " << id << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_lid::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
|
void ast_lid::find_free(std::set<std::string>& into) {
|
||||||
this->env = env;
|
into.insert(id);
|
||||||
if(env->lookup(id) == nullptr) into.insert(id);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type_ptr ast_lid::typecheck(type_mgr& mgr) {
|
type_ptr ast_lid::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||||
|
this->env = env;
|
||||||
return env->lookup(id)->instantiate(mgr);
|
return env->lookup(id)->instantiate(mgr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ast_lid::translate(global_scope& scope) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
auto mangled_name = this->env->get_mangled_name(id);
|
||||||
into.push_back(instruction_ptr(
|
into.push_back(instruction_ptr(
|
||||||
env->has_variable(id) ?
|
(env->has_variable(mangled_name) && !this->env->is_global(id)) ?
|
||||||
(instruction*) new instruction_push(env->get_offset(id)) :
|
(instruction*) new instruction_push(env->get_offset(mangled_name)) :
|
||||||
(instruction*) new instruction_pushglobal(id)));
|
(instruction*) new instruction_pushglobal(mangled_name)));
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_uid::print(int indent, std::ostream& to) const {
|
void ast_uid::print(int indent, std::ostream& to) const {
|
||||||
|
@ -51,16 +62,22 @@ void ast_uid::print(int indent, std::ostream& to) const {
|
||||||
to << "UID: " << id << std::endl;
|
to << "UID: " << id << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_uid::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
|
void ast_uid::find_free(std::set<std::string>& into) {
|
||||||
this->env = env;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type_ptr ast_uid::typecheck(type_mgr& mgr) {
|
type_ptr ast_uid::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||||
|
this->env = env;
|
||||||
return env->lookup(id)->instantiate(mgr);
|
return env->lookup(id)->instantiate(mgr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ast_uid::translate(global_scope& scope) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
|
into.push_back(instruction_ptr(
|
||||||
|
new instruction_pushglobal(this->env->get_mangled_name(id))));
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_binop::print(int indent, std::ostream& to) const {
|
void ast_binop::print(int indent, std::ostream& to) const {
|
||||||
|
@ -70,15 +87,15 @@ void ast_binop::print(int indent, std::ostream& to) const {
|
||||||
right->print(indent + 1, to);
|
right->print(indent + 1, to);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_binop::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
|
void ast_binop::find_free(std::set<std::string>& into) {
|
||||||
this->env = env;
|
left->find_free(into);
|
||||||
left->find_free(mgr, env, into);
|
right->find_free(into);
|
||||||
right->find_free(mgr, env, into);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type_ptr ast_binop::typecheck(type_mgr& mgr) {
|
type_ptr ast_binop::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||||
type_ptr ltype = left->typecheck(mgr);
|
this->env = env;
|
||||||
type_ptr rtype = right->typecheck(mgr);
|
type_ptr ltype = left->typecheck(mgr, env);
|
||||||
|
type_ptr rtype = right->typecheck(mgr, env);
|
||||||
type_ptr ftype = env->lookup(op_name(op))->instantiate(mgr);
|
type_ptr ftype = env->lookup(op_name(op))->instantiate(mgr);
|
||||||
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
|
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
|
||||||
|
|
||||||
|
@ -90,6 +107,11 @@ type_ptr ast_binop::typecheck(type_mgr& mgr) {
|
||||||
return return_type;
|
return return_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ast_binop::translate(global_scope& scope) {
|
||||||
|
left->translate(scope);
|
||||||
|
right->translate(scope);
|
||||||
|
}
|
||||||
|
|
||||||
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
right->compile(env, into);
|
right->compile(env, into);
|
||||||
left->compile(env_ptr(new env_offset(1, env)), into);
|
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||||
|
@ -106,15 +128,15 @@ void ast_app::print(int indent, std::ostream& to) const {
|
||||||
right->print(indent + 1, to);
|
right->print(indent + 1, to);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_app::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
|
void ast_app::find_free(std::set<std::string>& into) {
|
||||||
this->env = env;
|
left->find_free(into);
|
||||||
left->find_free(mgr, env, into);
|
right->find_free(into);
|
||||||
right->find_free(mgr, env, into);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type_ptr ast_app::typecheck(type_mgr& mgr) {
|
type_ptr ast_app::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||||
type_ptr ltype = left->typecheck(mgr);
|
this->env = env;
|
||||||
type_ptr rtype = right->typecheck(mgr);
|
type_ptr ltype = left->typecheck(mgr, env);
|
||||||
|
type_ptr rtype = right->typecheck(mgr, env);
|
||||||
|
|
||||||
type_ptr return_type = mgr.new_type();
|
type_ptr return_type = mgr.new_type();
|
||||||
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
|
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
|
||||||
|
@ -122,6 +144,11 @@ type_ptr ast_app::typecheck(type_mgr& mgr) {
|
||||||
return return_type;
|
return return_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ast_app::translate(global_scope& scope) {
|
||||||
|
left->translate(scope);
|
||||||
|
right->translate(scope);
|
||||||
|
}
|
||||||
|
|
||||||
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
right->compile(env, into);
|
right->compile(env, into);
|
||||||
left->compile(env_ptr(new env_offset(1, env)), into);
|
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||||
|
@ -139,24 +166,30 @@ void ast_case::print(int indent, std::ostream& to) const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_case::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
|
void ast_case::find_free(std::set<std::string>& into) {
|
||||||
this->env = env;
|
of->find_free(into);
|
||||||
of->find_free(mgr, env, into);
|
|
||||||
for(auto& branch : branches) {
|
for(auto& branch : branches) {
|
||||||
type_env_ptr new_env = type_scope(env);
|
std::set<std::string> free_in_branch;
|
||||||
branch->pat->insert_bindings(mgr, new_env);
|
std::set<std::string> pattern_variables;
|
||||||
branch->expr->find_free(mgr, new_env, into);
|
branch->pat->find_variables(pattern_variables);
|
||||||
|
branch->expr->find_free(free_in_branch);
|
||||||
|
for(auto& free : free_in_branch) {
|
||||||
|
if(pattern_variables.find(free) == pattern_variables.end())
|
||||||
|
into.insert(free);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type_ptr ast_case::typecheck(type_mgr& mgr) {
|
type_ptr ast_case::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||||
|
this->env = env;
|
||||||
type_var* var;
|
type_var* var;
|
||||||
type_ptr case_type = mgr.resolve(of->typecheck(mgr), var);
|
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var);
|
||||||
type_ptr branch_type = mgr.new_type();
|
type_ptr branch_type = mgr.new_type();
|
||||||
|
|
||||||
for(auto& branch : branches) {
|
for(auto& branch : branches) {
|
||||||
branch->pat->typecheck(case_type, mgr, branch->expr->env);
|
type_env_ptr new_env = type_scope(env);
|
||||||
type_ptr curr_branch_type = branch->expr->typecheck(mgr);
|
branch->pat->typecheck(case_type, mgr, new_env);
|
||||||
|
type_ptr curr_branch_type = branch->expr->typecheck(mgr, new_env);
|
||||||
mgr.unify(branch_type, curr_branch_type);
|
mgr.unify(branch_type, curr_branch_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -170,6 +203,13 @@ type_ptr ast_case::typecheck(type_mgr& mgr) {
|
||||||
return branch_type;
|
return branch_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ast_case::translate(global_scope& scope) {
|
||||||
|
of->translate(scope);
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
branch->expr->translate(scope);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
type_app* app_type = dynamic_cast<type_app*>(input_type.get());
|
type_app* app_type = dynamic_cast<type_app*>(input_type.get());
|
||||||
type_data* type = dynamic_cast<type_data*>(app_type->constructor.get());
|
type_data* type = dynamic_cast<type_data*>(app_type->constructor.get());
|
||||||
|
@ -199,7 +239,7 @@ void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) c
|
||||||
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
|
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
|
||||||
env_ptr new_env = env;
|
env_ptr new_env = env;
|
||||||
for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) {
|
for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) {
|
||||||
new_env = env_ptr(new env_var(*it, new_env));
|
new_env = env_ptr(new env_var(branch->expr->env->get_mangled_name(*it), new_env));
|
||||||
}
|
}
|
||||||
|
|
||||||
branch_instructions.push_back(instruction_ptr(new instruction_split(
|
branch_instructions.push_back(instruction_ptr(new instruction_split(
|
||||||
|
@ -226,16 +266,145 @@ void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) c
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ast_let::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "LET: " << std::endl;
|
||||||
|
in->print(indent + 1, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_let::find_free(std::set<std::string>& into) {
|
||||||
|
definitions.find_free(into);
|
||||||
|
std::set<std::string> all_free;
|
||||||
|
in->find_free(all_free);
|
||||||
|
for(auto& free_var : all_free) {
|
||||||
|
if(definitions.defs_defn.find(free_var) == definitions.defs_defn.end())
|
||||||
|
into.insert(free_var);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_let::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||||
|
this->env = env;
|
||||||
|
definitions.typecheck(mgr, env);
|
||||||
|
return in->typecheck(mgr, definitions.env);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_let::translate(global_scope& scope) {
|
||||||
|
for(auto& def : definitions.defs_data) {
|
||||||
|
def.second->into_globals(scope);
|
||||||
|
}
|
||||||
|
for(auto& def : definitions.defs_defn) {
|
||||||
|
size_t original_params = def.second->params.size();
|
||||||
|
std::string original_name = def.second->name;
|
||||||
|
auto& global_definition = def.second->into_global(scope);
|
||||||
|
size_t captured = global_definition.params.size() - original_params;
|
||||||
|
|
||||||
|
type_env_ptr mangled_env = type_scope(env);
|
||||||
|
mangled_env->bind(def.first, env->lookup(def.first), visibility::global);
|
||||||
|
mangled_env->set_mangled_name(def.first, global_definition.name);
|
||||||
|
|
||||||
|
ast_ptr global_app(new ast_lid(global_definition.name));
|
||||||
|
global_app->env = mangled_env;
|
||||||
|
for(auto& param : global_definition.params) {
|
||||||
|
if(!(captured--)) break;
|
||||||
|
ast_ptr new_arg(new ast_lid(param));
|
||||||
|
new_arg->env = env;
|
||||||
|
global_app = ast_ptr(new ast_app(std::move(global_app), std::move(new_arg)));
|
||||||
|
global_app->env = env;
|
||||||
|
}
|
||||||
|
translated_definitions.push_back({ def.first, std::move(global_app) });
|
||||||
|
}
|
||||||
|
in->translate(scope);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_let::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(new instruction_alloc(translated_definitions.size())));
|
||||||
|
env_ptr new_env = env;
|
||||||
|
for(auto& def : translated_definitions) {
|
||||||
|
new_env = env_ptr(new env_var(definitions.env->get_mangled_name(def.first), std::move(new_env)));
|
||||||
|
}
|
||||||
|
int offset = translated_definitions.size() - 1;
|
||||||
|
for(auto& def : translated_definitions) {
|
||||||
|
def.second->compile(new_env, into);
|
||||||
|
into.push_back(instruction_ptr(new instruction_update(offset--)));
|
||||||
|
}
|
||||||
|
in->compile(new_env, into);
|
||||||
|
into.push_back(instruction_ptr(new instruction_slide(translated_definitions.size())));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lambda::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "LAMBDA";
|
||||||
|
for(auto& param : params) {
|
||||||
|
to << " " << param;
|
||||||
|
}
|
||||||
|
to << std::endl;
|
||||||
|
body->print(indent+1, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lambda::find_free(std::set<std::string>& into) {
|
||||||
|
body->find_free(free_variables);
|
||||||
|
for(auto& param : params) {
|
||||||
|
free_variables.erase(param);
|
||||||
|
}
|
||||||
|
into.insert(free_variables.begin(), free_variables.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_lambda::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||||
|
this->env = env;
|
||||||
|
var_env = type_scope(env);
|
||||||
|
type_ptr return_type = mgr.new_type();
|
||||||
|
type_ptr full_type = return_type;
|
||||||
|
|
||||||
|
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||||
|
type_ptr param_type = mgr.new_type();
|
||||||
|
var_env->bind(*it, param_type);
|
||||||
|
full_type = type_ptr(new type_arr(std::move(param_type), full_type));
|
||||||
|
}
|
||||||
|
|
||||||
|
mgr.unify(return_type, body->typecheck(mgr, var_env));
|
||||||
|
return full_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lambda::translate(global_scope& scope) {
|
||||||
|
std::vector<std::string> function_params;
|
||||||
|
for(auto& free_variable : free_variables) {
|
||||||
|
if(env->is_global(free_variable)) continue;
|
||||||
|
function_params.push_back(free_variable);
|
||||||
|
}
|
||||||
|
size_t captured_count = function_params.size();
|
||||||
|
function_params.insert(function_params.end(), params.begin(), params.end());
|
||||||
|
|
||||||
|
auto& new_function = scope.add_function("lambda", std::move(function_params), std::move(body));
|
||||||
|
type_env_ptr mangled_env = type_scope(env);
|
||||||
|
mangled_env->bind("lambda", type_scheme_ptr(nullptr), visibility::global);
|
||||||
|
mangled_env->set_mangled_name("lambda", new_function.name);
|
||||||
|
ast_ptr new_application = ast_ptr(new ast_lid("lambda"));
|
||||||
|
new_application->env = mangled_env;
|
||||||
|
|
||||||
|
for(auto& param : new_function.params) {
|
||||||
|
if(!(captured_count--)) break;
|
||||||
|
ast_ptr new_arg = ast_ptr(new ast_lid(param));
|
||||||
|
new_arg->env = env;
|
||||||
|
new_application = ast_ptr(new ast_app(std::move(new_application), std::move(new_arg)));
|
||||||
|
new_application->env = env;
|
||||||
|
}
|
||||||
|
translated = std::move(new_application);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lambda::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
translated->compile(env, into);
|
||||||
|
}
|
||||||
|
|
||||||
void pattern_var::print(std::ostream& to) const {
|
void pattern_var::print(std::ostream& to) const {
|
||||||
to << var;
|
to << var;
|
||||||
}
|
}
|
||||||
|
|
||||||
void pattern_var::insert_bindings(type_mgr& mgr, type_env_ptr& env) const {
|
void pattern_var::find_variables(std::set<std::string>& into) const {
|
||||||
env->bind(var, mgr.new_type());
|
into.insert(var);
|
||||||
}
|
}
|
||||||
|
|
||||||
void pattern_var::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const {
|
void pattern_var::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const {
|
||||||
mgr.unify(env->lookup(var)->instantiate(mgr), t);
|
env->bind(var, t);
|
||||||
}
|
}
|
||||||
|
|
||||||
void pattern_constr::print(std::ostream& to) const {
|
void pattern_constr::print(std::ostream& to) const {
|
||||||
|
@ -245,23 +414,22 @@ void pattern_constr::print(std::ostream& to) const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void pattern_constr::insert_bindings(type_mgr& mgr, type_env_ptr& env) const {
|
void pattern_constr::find_variables(std::set<std::string>& into) const {
|
||||||
for(auto& param : params) {
|
into.insert(params.begin(), params.end());
|
||||||
env->bind(param, mgr.new_type());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void pattern_constr::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const {
|
void pattern_constr::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const {
|
||||||
type_ptr constructor_type = env->lookup(constr)->instantiate(mgr);
|
type_scheme_ptr constructor_type_scheme = env->lookup(constr);
|
||||||
if(!constructor_type) {
|
if(!constructor_type_scheme) {
|
||||||
throw type_error(std::string("pattern using unknown constructor ") + constr);
|
throw type_error(std::string("pattern using unknown constructor ") + constr);
|
||||||
}
|
}
|
||||||
|
type_ptr constructor_type = constructor_type_scheme->instantiate(mgr);
|
||||||
|
|
||||||
for(auto& param : params) {
|
for(auto& param : params) {
|
||||||
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
|
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
|
||||||
if(!arr) throw type_error("too many parameters in constructor pattern");
|
if(!arr) throw type_error("too many parameters in constructor pattern");
|
||||||
|
|
||||||
mgr.unify(env->lookup(param)->instantiate(mgr), arr->left);
|
env->bind(param, arr->left);
|
||||||
constructor_type = arr->right;
|
constructor_type = arr->right;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,8 @@
|
||||||
#include "binop.hpp"
|
#include "binop.hpp"
|
||||||
#include "instruction.hpp"
|
#include "instruction.hpp"
|
||||||
#include "env.hpp"
|
#include "env.hpp"
|
||||||
|
#include "definition.hpp"
|
||||||
|
#include "global_scope.hpp"
|
||||||
|
|
||||||
struct ast {
|
struct ast {
|
||||||
type_env_ptr env;
|
type_env_ptr env;
|
||||||
|
@ -14,9 +16,9 @@ struct ast {
|
||||||
virtual ~ast() = default;
|
virtual ~ast() = default;
|
||||||
|
|
||||||
virtual void print(int indent, std::ostream& to) const = 0;
|
virtual void print(int indent, std::ostream& to) const = 0;
|
||||||
virtual void find_free(type_mgr& mgr,
|
virtual void find_free(std::set<std::string>& into) = 0;
|
||||||
type_env_ptr& env, std::set<std::string>& into) = 0;
|
virtual type_ptr typecheck(type_mgr& mgr, type_env_ptr& env) = 0;
|
||||||
virtual type_ptr typecheck(type_mgr& mgr) = 0;
|
virtual void translate(global_scope& scope) = 0;
|
||||||
virtual void compile(const env_ptr& env,
|
virtual void compile(const env_ptr& env,
|
||||||
std::vector<instruction_ptr>& into) const = 0;
|
std::vector<instruction_ptr>& into) const = 0;
|
||||||
};
|
};
|
||||||
|
@ -27,7 +29,7 @@ struct pattern {
|
||||||
virtual ~pattern() = default;
|
virtual ~pattern() = default;
|
||||||
|
|
||||||
virtual void print(std::ostream& to) const = 0;
|
virtual void print(std::ostream& to) const = 0;
|
||||||
virtual void insert_bindings(type_mgr& mgr, type_env_ptr& env) const = 0;
|
virtual void find_variables(std::set<std::string>& into) const = 0;
|
||||||
virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const = 0;
|
virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -50,8 +52,9 @@ struct ast_int : public ast {
|
||||||
: value(v) {}
|
: value(v) {}
|
||||||
|
|
||||||
void print(int indent, std::ostream& to) const;
|
void print(int indent, std::ostream& to) const;
|
||||||
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
|
void find_free(std::set<std::string>& into);
|
||||||
type_ptr typecheck(type_mgr& mgr);
|
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
void translate(global_scope& scope);
|
||||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -62,8 +65,9 @@ struct ast_lid : public ast {
|
||||||
: id(std::move(i)) {}
|
: id(std::move(i)) {}
|
||||||
|
|
||||||
void print(int indent, std::ostream& to) const;
|
void print(int indent, std::ostream& to) const;
|
||||||
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
|
void find_free(std::set<std::string>& into);
|
||||||
type_ptr typecheck(type_mgr& mgr);
|
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
void translate(global_scope& scope);
|
||||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -74,8 +78,9 @@ struct ast_uid : public ast {
|
||||||
: id(std::move(i)) {}
|
: id(std::move(i)) {}
|
||||||
|
|
||||||
void print(int indent, std::ostream& to) const;
|
void print(int indent, std::ostream& to) const;
|
||||||
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
|
void find_free(std::set<std::string>& into);
|
||||||
type_ptr typecheck(type_mgr& mgr);
|
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
void translate(global_scope& scope);
|
||||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -88,8 +93,9 @@ struct ast_binop : public ast {
|
||||||
: op(o), left(std::move(l)), right(std::move(r)) {}
|
: op(o), left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
void print(int indent, std::ostream& to) const;
|
void print(int indent, std::ostream& to) const;
|
||||||
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
|
void find_free(std::set<std::string>& into);
|
||||||
type_ptr typecheck(type_mgr& mgr);
|
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
void translate(global_scope& scope);
|
||||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -101,8 +107,9 @@ struct ast_app : public ast {
|
||||||
: left(std::move(l)), right(std::move(r)) {}
|
: left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
void print(int indent, std::ostream& to) const;
|
void print(int indent, std::ostream& to) const;
|
||||||
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
|
void find_free(std::set<std::string>& into);
|
||||||
type_ptr typecheck(type_mgr& mgr);
|
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
void translate(global_scope& scope);
|
||||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -115,8 +122,46 @@ struct ast_case : public ast {
|
||||||
: of(std::move(o)), branches(std::move(b)) {}
|
: of(std::move(o)), branches(std::move(b)) {}
|
||||||
|
|
||||||
void print(int indent, std::ostream& to) const;
|
void print(int indent, std::ostream& to) const;
|
||||||
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
|
void find_free(std::set<std::string>& into);
|
||||||
type_ptr typecheck(type_mgr& mgr);
|
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
void translate(global_scope& scope);
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_let : public ast {
|
||||||
|
using basic_definition = std::pair<std::string, ast_ptr>;
|
||||||
|
|
||||||
|
definition_group definitions;
|
||||||
|
ast_ptr in;
|
||||||
|
|
||||||
|
std::vector<basic_definition> translated_definitions;
|
||||||
|
|
||||||
|
ast_let(definition_group g, ast_ptr i)
|
||||||
|
: definitions(std::move(g)), in(std::move(i)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void find_free(std::set<std::string>& into);
|
||||||
|
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
void translate(global_scope& scope);
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_lambda : public ast {
|
||||||
|
std::vector<std::string> params;
|
||||||
|
ast_ptr body;
|
||||||
|
|
||||||
|
type_env_ptr var_env;
|
||||||
|
|
||||||
|
std::set<std::string> free_variables;
|
||||||
|
ast_ptr translated;
|
||||||
|
|
||||||
|
ast_lambda(std::vector<std::string> ps, ast_ptr b)
|
||||||
|
: params(std::move(ps)), body(std::move(b)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void find_free(std::set<std::string>& into);
|
||||||
|
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
void translate(global_scope& scope);
|
||||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -127,7 +172,7 @@ struct pattern_var : public pattern {
|
||||||
: var(std::move(v)) {}
|
: var(std::move(v)) {}
|
||||||
|
|
||||||
void print(std::ostream &to) const;
|
void print(std::ostream &to) const;
|
||||||
void insert_bindings(type_mgr& mgr, type_env_ptr& env) const;
|
void find_variables(std::set<std::string>& into) const;
|
||||||
void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const;
|
void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -139,6 +184,6 @@ struct pattern_constr : public pattern {
|
||||||
: constr(std::move(c)), params(std::move(p)) {}
|
: constr(std::move(c)), params(std::move(p)) {}
|
||||||
|
|
||||||
void print(std::ostream &to) const;
|
void print(std::ostream &to) const;
|
||||||
virtual void insert_bindings(type_mgr& mgr, type_env_ptr& env) const;
|
void find_variables(std::set<std::string>& into) const;
|
||||||
virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const;
|
virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const;
|
||||||
};
|
};
|
||||||
|
|
|
@ -5,13 +5,20 @@
|
||||||
#include "llvm_context.hpp"
|
#include "llvm_context.hpp"
|
||||||
#include "type.hpp"
|
#include "type.hpp"
|
||||||
#include "type_env.hpp"
|
#include "type_env.hpp"
|
||||||
|
#include "graph.hpp"
|
||||||
#include <llvm/IR/DerivedTypes.h>
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
#include <llvm/IR/Function.h>
|
#include <llvm/IR/Function.h>
|
||||||
#include <llvm/IR/Type.h>
|
#include <llvm/IR/Type.h>
|
||||||
|
|
||||||
void definition_defn::find_free(type_mgr& mgr, type_env_ptr& env) {
|
void definition_defn::find_free() {
|
||||||
this->env = env;
|
body->find_free(free_variables);
|
||||||
|
for(auto& param : params) {
|
||||||
|
free_variables.erase(param);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::insert_types(type_mgr& mgr, type_env_ptr& env, visibility v) {
|
||||||
|
this->env = env;
|
||||||
var_env = type_scope(env);
|
var_env = type_scope(env);
|
||||||
return_type = mgr.new_type();
|
return_type = mgr.new_type();
|
||||||
full_type = return_type;
|
full_type = return_type;
|
||||||
|
@ -21,39 +28,24 @@ void definition_defn::find_free(type_mgr& mgr, type_env_ptr& env) {
|
||||||
full_type = type_ptr(new type_arr(param_type, full_type));
|
full_type = type_ptr(new type_arr(param_type, full_type));
|
||||||
var_env->bind(*it, param_type);
|
var_env->bind(*it, param_type);
|
||||||
}
|
}
|
||||||
|
env->bind(name, full_type, v);
|
||||||
body->find_free(mgr, var_env, free_variables);
|
|
||||||
}
|
|
||||||
|
|
||||||
void definition_defn::insert_types(type_mgr& mgr) {
|
|
||||||
env->bind(name, full_type);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void definition_defn::typecheck(type_mgr& mgr) {
|
void definition_defn::typecheck(type_mgr& mgr) {
|
||||||
type_ptr body_type = body->typecheck(mgr);
|
type_ptr body_type = body->typecheck(mgr, var_env);
|
||||||
mgr.unify(return_type, body_type);
|
mgr.unify(return_type, body_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
void definition_defn::compile() {
|
|
||||||
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
|
|
||||||
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
|
||||||
new_env = env_ptr(new env_var(*it, new_env));
|
|
||||||
}
|
|
||||||
body->compile(new_env, instructions);
|
|
||||||
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
|
|
||||||
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
|
|
||||||
}
|
|
||||||
|
|
||||||
void definition_defn::declare_llvm(llvm_context& ctx) {
|
global_function& definition_defn::into_global(global_scope& scope) {
|
||||||
generated_function = ctx.create_custom_function(name, params.size());
|
std::vector<std::string> all_params;
|
||||||
}
|
for(auto& free : free_variables) {
|
||||||
|
if(env->is_global(free)) continue;
|
||||||
void definition_defn::generate_llvm(llvm_context& ctx) {
|
all_params.push_back(free);
|
||||||
ctx.builder.SetInsertPoint(&generated_function->getEntryBlock());
|
|
||||||
for(auto& instruction : instructions) {
|
|
||||||
instruction->gen_llvm(ctx, generated_function);
|
|
||||||
}
|
}
|
||||||
ctx.builder.CreateRetVoid();
|
all_params.insert(all_params.end(), params.begin(), params.end());
|
||||||
|
body->translate(scope);
|
||||||
|
return scope.add_function(name, std::move(all_params), std::move(body));
|
||||||
}
|
}
|
||||||
|
|
||||||
void definition_data::insert_types(type_env_ptr& env) {
|
void definition_data::insert_types(type_env_ptr& env) {
|
||||||
|
@ -91,19 +83,63 @@ void definition_data::insert_constructors() const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void definition_data::generate_llvm(llvm_context& ctx) {
|
void definition_data::into_globals(global_scope& scope) {
|
||||||
for(auto& constructor : constructors) {
|
for(auto& constructor : constructors) {
|
||||||
auto new_function =
|
global_constructor& c = scope.add_constructor(
|
||||||
ctx.create_custom_function(constructor->name, constructor->types.size());
|
constructor->name, constructor->tag, constructor->types.size());
|
||||||
std::vector<instruction_ptr> instructions;
|
env->set_mangled_name(constructor->name, c.name);
|
||||||
instructions.push_back(instruction_ptr(
|
}
|
||||||
new instruction_pack(constructor->tag, constructor->types.size())
|
}
|
||||||
));
|
|
||||||
instructions.push_back(instruction_ptr(new instruction_update(0)));
|
void definition_group::find_free(std::set<std::string>& into) {
|
||||||
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
for(auto& def_pair : defs_defn) {
|
||||||
for (auto& instruction : instructions) {
|
def_pair.second->find_free();
|
||||||
instruction->gen_llvm(ctx, new_function);
|
for(auto& free_var : def_pair.second->free_variables) {
|
||||||
}
|
if(defs_defn.find(free_var) == defs_defn.end()) {
|
||||||
ctx.builder.CreateRetVoid();
|
into.insert(free_var);
|
||||||
|
} else {
|
||||||
|
def_pair.second->nearby_variables.insert(free_var);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_group::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||||
|
this->env = type_scope(env);
|
||||||
|
|
||||||
|
for(auto& def_data : defs_data) {
|
||||||
|
def_data.second->insert_types(this->env);
|
||||||
|
}
|
||||||
|
for(auto& def_data : defs_data) {
|
||||||
|
def_data.second->insert_constructors();
|
||||||
|
}
|
||||||
|
|
||||||
|
function_graph dependency_graph;
|
||||||
|
|
||||||
|
for(auto& def_defn : defs_defn) {
|
||||||
|
def_defn.second->find_free();
|
||||||
|
dependency_graph.add_function(def_defn.second->name);
|
||||||
|
|
||||||
|
for(auto& dependency : def_defn.second->nearby_variables) {
|
||||||
|
if(defs_defn.find(dependency) == defs_defn.end())
|
||||||
|
throw 0;
|
||||||
|
dependency_graph.add_edge(def_defn.second->name, dependency);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<group_ptr> groups = dependency_graph.compute_order();
|
||||||
|
for(auto it = groups.rbegin(); it != groups.rend(); it++) {
|
||||||
|
auto& group = *it;
|
||||||
|
for(auto& def_defnn_name : group->members) {
|
||||||
|
auto& def_defn = defs_defn.find(def_defnn_name)->second;
|
||||||
|
def_defn->insert_types(mgr, this->env, vis);
|
||||||
|
}
|
||||||
|
for(auto& def_defnn_name : group->members) {
|
||||||
|
auto& def_defn = defs_defn.find(def_defnn_name)->second;
|
||||||
|
def_defn->typecheck(mgr);
|
||||||
|
}
|
||||||
|
for(auto& def_defnn_name : group->members) {
|
||||||
|
this->env->generalize(def_defnn_name, mgr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include "instruction.hpp"
|
#include "instruction.hpp"
|
||||||
#include "llvm_context.hpp"
|
#include "llvm_context.hpp"
|
||||||
#include "parsed_type.hpp"
|
#include "parsed_type.hpp"
|
||||||
#include "type_env.hpp"
|
#include "type_env.hpp"
|
||||||
|
#include "global_scope.hpp"
|
||||||
|
|
||||||
struct ast;
|
struct ast;
|
||||||
using ast_ptr = std::unique_ptr<ast>;
|
using ast_ptr = std::unique_ptr<ast>;
|
||||||
|
@ -29,24 +31,20 @@ struct definition_defn {
|
||||||
type_env_ptr env;
|
type_env_ptr env;
|
||||||
type_env_ptr var_env;
|
type_env_ptr var_env;
|
||||||
std::set<std::string> free_variables;
|
std::set<std::string> free_variables;
|
||||||
|
std::set<std::string> nearby_variables;
|
||||||
type_ptr full_type;
|
type_ptr full_type;
|
||||||
type_ptr return_type;
|
type_ptr return_type;
|
||||||
|
|
||||||
std::vector<instruction_ptr> instructions;
|
|
||||||
|
|
||||||
llvm::Function* generated_function;
|
|
||||||
|
|
||||||
definition_defn(std::string n, std::vector<std::string> p, ast_ptr b)
|
definition_defn(std::string n, std::vector<std::string> p, ast_ptr b)
|
||||||
: name(std::move(n)), params(std::move(p)), body(std::move(b)) {
|
: name(std::move(n)), params(std::move(p)), body(std::move(b)) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void find_free(type_mgr& mgr, type_env_ptr& env);
|
void find_free();
|
||||||
void insert_types(type_mgr& mgr);
|
void insert_types(type_mgr& mgr, type_env_ptr& env, visibility v);
|
||||||
void typecheck(type_mgr& mgr);
|
void typecheck(type_mgr& mgr);
|
||||||
void compile();
|
|
||||||
void declare_llvm(llvm_context& ctx);
|
global_function& into_global(global_scope& scope);
|
||||||
void generate_llvm(llvm_context& ctx);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
using definition_defn_ptr = std::unique_ptr<definition_defn>;
|
using definition_defn_ptr = std::unique_ptr<definition_defn>;
|
||||||
|
@ -66,7 +64,20 @@ struct definition_data {
|
||||||
|
|
||||||
void insert_types(type_env_ptr& env);
|
void insert_types(type_env_ptr& env);
|
||||||
void insert_constructors() const;
|
void insert_constructors() const;
|
||||||
void generate_llvm(llvm_context& ctx);
|
|
||||||
|
void into_globals(global_scope& scope);
|
||||||
};
|
};
|
||||||
|
|
||||||
using definition_data_ptr = std::unique_ptr<definition_data>;
|
using definition_data_ptr = std::unique_ptr<definition_data>;
|
||||||
|
|
||||||
|
struct definition_group {
|
||||||
|
std::map<std::string, definition_data_ptr> defs_data;
|
||||||
|
std::map<std::string, definition_defn_ptr> defs_defn;
|
||||||
|
visibility vis;
|
||||||
|
type_env_ptr env;
|
||||||
|
|
||||||
|
definition_group(visibility v = visibility::local) : vis(v) {}
|
||||||
|
|
||||||
|
void find_free(std::set<std::string>& into);
|
||||||
|
void typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
};
|
||||||
|
|
|
@ -15,7 +15,7 @@ struct env_var : public env {
|
||||||
std::string name;
|
std::string name;
|
||||||
env_ptr parent;
|
env_ptr parent;
|
||||||
|
|
||||||
env_var(std::string& n, env_ptr p)
|
env_var(std::string n, env_ptr p)
|
||||||
: name(std::move(n)), parent(std::move(p)) {}
|
: name(std::move(n)), parent(std::move(p)) {}
|
||||||
|
|
||||||
int get_offset(const std::string& name) const;
|
int get_offset(const std::string& name) const;
|
||||||
|
|
83
code/compiler/12/global_scope.cpp
Normal file
83
code/compiler/12/global_scope.cpp
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
#include "global_scope.hpp"
|
||||||
|
#include "ast.hpp"
|
||||||
|
|
||||||
|
void global_function::compile() {
|
||||||
|
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
|
||||||
|
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||||
|
new_env = env_ptr(new env_var(*it, new_env));
|
||||||
|
}
|
||||||
|
body->compile(new_env, instructions);
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
|
||||||
|
}
|
||||||
|
|
||||||
|
void global_function::declare_llvm(llvm_context& ctx) {
|
||||||
|
generated_function = ctx.create_custom_function(name, params.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
void global_function::generate_llvm(llvm_context& ctx) {
|
||||||
|
ctx.builder.SetInsertPoint(&generated_function->getEntryBlock());
|
||||||
|
for(auto& instruction : instructions) {
|
||||||
|
instruction->gen_llvm(ctx, generated_function);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateRetVoid();
|
||||||
|
}
|
||||||
|
|
||||||
|
void global_constructor::generate_llvm(llvm_context& ctx) {
|
||||||
|
auto new_function =
|
||||||
|
ctx.create_custom_function(name, arity);
|
||||||
|
std::vector<instruction_ptr> instructions;
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_pack(tag, arity)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_update(0)));
|
||||||
|
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
||||||
|
for (auto& instruction : instructions) {
|
||||||
|
instruction->gen_llvm(ctx, new_function);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateRetVoid();
|
||||||
|
}
|
||||||
|
|
||||||
|
global_function& global_scope::add_function(std::string n, std::vector<std::string> ps, ast_ptr b) {
|
||||||
|
global_function* new_function = new global_function(mangle_name(n), std::move(ps), std::move(b));
|
||||||
|
functions.push_back(global_function_ptr(new_function));
|
||||||
|
return *new_function;
|
||||||
|
}
|
||||||
|
|
||||||
|
global_constructor& global_scope::add_constructor(std::string n, int8_t t, size_t a) {
|
||||||
|
global_constructor* new_constructor = new global_constructor(mangle_name(n), t, a);
|
||||||
|
constructors.push_back(global_constructor_ptr(new_constructor));
|
||||||
|
return *new_constructor;
|
||||||
|
}
|
||||||
|
|
||||||
|
void global_scope::compile() {
|
||||||
|
for(auto& function : functions) {
|
||||||
|
function->compile();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void global_scope::generate_llvm(llvm_context& ctx) {
|
||||||
|
for(auto& constructor : constructors) {
|
||||||
|
constructor->generate_llvm(ctx);
|
||||||
|
}
|
||||||
|
for(auto& function : functions) {
|
||||||
|
function->declare_llvm(ctx);
|
||||||
|
}
|
||||||
|
for(auto& function : functions) {
|
||||||
|
function->generate_llvm(ctx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string global_scope::mangle_name(const std::string& n) {
|
||||||
|
auto occurence_it = occurence_count.find(n);
|
||||||
|
int occurence = 0;
|
||||||
|
if(occurence_it != occurence_count.end()) {
|
||||||
|
occurence = occurence_it->second + 1;
|
||||||
|
}
|
||||||
|
occurence_count[n] = occurence;
|
||||||
|
|
||||||
|
std::string final_name = n;
|
||||||
|
if (occurence != 0) {
|
||||||
|
final_name += "_";
|
||||||
|
final_name += std::to_string(occurence);
|
||||||
|
}
|
||||||
|
return final_name;
|
||||||
|
}
|
55
code/compiler/12/global_scope.hpp
Normal file
55
code/compiler/12/global_scope.hpp
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
#include "instruction.hpp"
|
||||||
|
|
||||||
|
struct ast;
|
||||||
|
using ast_ptr = std::unique_ptr<ast>;
|
||||||
|
|
||||||
|
struct global_function {
|
||||||
|
std::string name;
|
||||||
|
std::vector<std::string> params;
|
||||||
|
ast_ptr body;
|
||||||
|
|
||||||
|
std::vector<instruction_ptr> instructions;
|
||||||
|
llvm::Function* generated_function;
|
||||||
|
|
||||||
|
global_function(std::string n, std::vector<std::string> ps, ast_ptr b)
|
||||||
|
: name(std::move(n)), params(std::move(ps)), body(std::move(b)) {}
|
||||||
|
|
||||||
|
void compile();
|
||||||
|
void declare_llvm(llvm_context& ctx);
|
||||||
|
void generate_llvm(llvm_context& ctx);
|
||||||
|
};
|
||||||
|
|
||||||
|
using global_function_ptr = std::unique_ptr<global_function>;
|
||||||
|
|
||||||
|
struct global_constructor {
|
||||||
|
std::string name;
|
||||||
|
int8_t tag;
|
||||||
|
size_t arity;
|
||||||
|
|
||||||
|
global_constructor(std::string n, int8_t t, size_t a)
|
||||||
|
: name(std::move(n)), tag(t), arity(a) {}
|
||||||
|
|
||||||
|
void generate_llvm(llvm_context& ctx);
|
||||||
|
};
|
||||||
|
|
||||||
|
using global_constructor_ptr = std::unique_ptr<global_constructor>;
|
||||||
|
|
||||||
|
struct global_scope {
|
||||||
|
std::map<std::string, int> occurence_count;
|
||||||
|
std::vector<global_function_ptr> functions;
|
||||||
|
std::vector<global_constructor_ptr> constructors;
|
||||||
|
|
||||||
|
global_function& add_function(std::string n, std::vector<std::string> ps, ast_ptr b);
|
||||||
|
global_constructor& add_constructor(std::string n, int8_t t, size_t a);
|
||||||
|
|
||||||
|
void compile();
|
||||||
|
void generate_llvm(llvm_context& ctx);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::string mangle_name(const std::string& n);
|
||||||
|
};
|
|
@ -7,7 +7,6 @@
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
using function = std::string;
|
using function = std::string;
|
||||||
|
|
||||||
|
|
|
@ -21,12 +21,10 @@ void yy::parser::error(const std::string& msg) {
|
||||||
std::cout << "An error occured: " << msg << std::endl;
|
std::cout << "An error occured: " << msg << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern std::map<std::string, definition_data_ptr> defs_data;
|
extern definition_group global_defs;
|
||||||
extern std::map<std::string, definition_defn_ptr> defs_defn;
|
|
||||||
|
|
||||||
void typecheck_program(
|
void typecheck_program(
|
||||||
const std::map<std::string, definition_data_ptr>& defs_data,
|
definition_group& defs,
|
||||||
const std::map<std::string, definition_defn_ptr>& defs_defn,
|
|
||||||
type_mgr& mgr, type_env_ptr& env) {
|
type_mgr& mgr, type_env_ptr& env) {
|
||||||
type_ptr int_type = type_ptr(new type_base("Int"));
|
type_ptr int_type = type_ptr(new type_base("Int"));
|
||||||
env->bind_type("Int", int_type);
|
env->bind_type("Int", int_type);
|
||||||
|
@ -35,63 +33,32 @@ void typecheck_program(
|
||||||
type_ptr binop_type = type_ptr(new type_arr(
|
type_ptr binop_type = type_ptr(new type_arr(
|
||||||
int_type_app,
|
int_type_app,
|
||||||
type_ptr(new type_arr(int_type_app, int_type_app))));
|
type_ptr(new type_arr(int_type_app, int_type_app))));
|
||||||
env->bind("+", binop_type);
|
env->bind("+", binop_type, visibility::global);
|
||||||
env->bind("-", binop_type);
|
env->bind("-", binop_type, visibility::global);
|
||||||
env->bind("*", binop_type);
|
env->bind("*", binop_type, visibility::global);
|
||||||
env->bind("/", binop_type);
|
env->bind("/", binop_type, visibility::global);
|
||||||
|
|
||||||
for(auto& def_data : defs_data) {
|
std::set<std::string> free;
|
||||||
def_data.second->insert_types(env);
|
defs.find_free(free);
|
||||||
}
|
defs.typecheck(mgr, env);
|
||||||
for(auto& def_data : defs_data) {
|
|
||||||
def_data.second->insert_constructors();
|
|
||||||
}
|
|
||||||
|
|
||||||
function_graph dependency_graph;
|
for(auto& pair : defs.env->names) {
|
||||||
|
|
||||||
for(auto& def_defn : defs_defn) {
|
|
||||||
def_defn.second->find_free(mgr, env);
|
|
||||||
dependency_graph.add_function(def_defn.second->name);
|
|
||||||
|
|
||||||
for(auto& dependency : def_defn.second->free_variables) {
|
|
||||||
if(defs_defn.find(dependency) == defs_defn.end())
|
|
||||||
throw 0;
|
|
||||||
dependency_graph.add_edge(def_defn.second->name, dependency);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<group_ptr> groups = dependency_graph.compute_order();
|
|
||||||
for(auto it = groups.rbegin(); it != groups.rend(); it++) {
|
|
||||||
auto& group = *it;
|
|
||||||
for(auto& def_defnn_name : group->members) {
|
|
||||||
auto& def_defn = defs_defn.find(def_defnn_name)->second;
|
|
||||||
def_defn->insert_types(mgr);
|
|
||||||
}
|
|
||||||
for(auto& def_defnn_name : group->members) {
|
|
||||||
auto& def_defn = defs_defn.find(def_defnn_name)->second;
|
|
||||||
def_defn->typecheck(mgr);
|
|
||||||
}
|
|
||||||
for(auto& def_defnn_name : group->members) {
|
|
||||||
env->generalize(def_defnn_name, mgr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for(auto& pair : env->names) {
|
|
||||||
std::cout << pair.first << ": ";
|
std::cout << pair.first << ": ";
|
||||||
pair.second->print(mgr, std::cout);
|
pair.second.type->print(mgr, std::cout);
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void compile_program(const std::map<std::string, definition_defn_ptr>& defs_defn) {
|
global_scope translate_program(definition_group& group) {
|
||||||
for(auto& def_defn : defs_defn) {
|
global_scope scope;
|
||||||
def_defn.second->compile();
|
for(auto& data : group.defs_data) {
|
||||||
|
data.second->into_globals(scope);
|
||||||
for(auto& instruction : def_defn.second->instructions) {
|
|
||||||
instruction->print(0, std::cout);
|
|
||||||
}
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
}
|
||||||
|
for(auto& defn : group.defs_defn) {
|
||||||
|
auto& function = defn.second->into_global(scope);
|
||||||
|
function.body->env->parent->set_mangled_name(defn.first, function.name);
|
||||||
|
}
|
||||||
|
return scope;
|
||||||
}
|
}
|
||||||
|
|
||||||
void gen_llvm_internal_op(llvm_context& ctx, binop op) {
|
void gen_llvm_internal_op(llvm_context& ctx, binop op) {
|
||||||
|
@ -151,24 +118,14 @@ void output_llvm(llvm_context& ctx, const std::string& filename) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void gen_llvm(
|
void gen_llvm(global_scope& scope) {
|
||||||
const std::map<std::string, definition_data_ptr>& defs_data,
|
|
||||||
const std::map<std::string, definition_defn_ptr>& defs_defn) {
|
|
||||||
llvm_context ctx;
|
llvm_context ctx;
|
||||||
gen_llvm_internal_op(ctx, PLUS);
|
gen_llvm_internal_op(ctx, PLUS);
|
||||||
gen_llvm_internal_op(ctx, MINUS);
|
gen_llvm_internal_op(ctx, MINUS);
|
||||||
gen_llvm_internal_op(ctx, TIMES);
|
gen_llvm_internal_op(ctx, TIMES);
|
||||||
gen_llvm_internal_op(ctx, DIVIDE);
|
gen_llvm_internal_op(ctx, DIVIDE);
|
||||||
|
|
||||||
for(auto& def_data : defs_data) {
|
scope.generate_llvm(ctx);
|
||||||
def_data.second->generate_llvm(ctx);
|
|
||||||
}
|
|
||||||
for(auto& def_defn : defs_defn) {
|
|
||||||
def_defn.second->declare_llvm(ctx);
|
|
||||||
}
|
|
||||||
for(auto& def_defn : defs_defn) {
|
|
||||||
def_defn.second->generate_llvm(ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.module.print(llvm::outs(), nullptr);
|
ctx.module.print(llvm::outs(), nullptr);
|
||||||
output_llvm(ctx, "program.o");
|
output_llvm(ctx, "program.o");
|
||||||
|
@ -180,7 +137,7 @@ int main() {
|
||||||
type_env_ptr env(new type_env);
|
type_env_ptr env(new type_env);
|
||||||
|
|
||||||
parser.parse();
|
parser.parse();
|
||||||
for(auto& def_defn : defs_defn) {
|
for(auto& def_defn : global_defs.defs_defn) {
|
||||||
std::cout << def_defn.second->name;
|
std::cout << def_defn.second->name;
|
||||||
for(auto& param : def_defn.second->params) std::cout << " " << param;
|
for(auto& param : def_defn.second->params) std::cout << " " << param;
|
||||||
std::cout << ":" << std::endl;
|
std::cout << ":" << std::endl;
|
||||||
|
@ -188,9 +145,10 @@ int main() {
|
||||||
def_defn.second->body->print(1, std::cout);
|
def_defn.second->body->print(1, std::cout);
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
typecheck_program(defs_data, defs_defn, mgr, env);
|
typecheck_program(global_defs, mgr, env);
|
||||||
compile_program(defs_defn);
|
global_scope scope = translate_program(global_defs);
|
||||||
gen_llvm(defs_data, defs_defn);
|
scope.compile();
|
||||||
|
gen_llvm(scope);
|
||||||
} catch(unification_error& err) {
|
} catch(unification_error& err) {
|
||||||
std::cout << "failed to unify types: " << std::endl;
|
std::cout << "failed to unify types: " << std::endl;
|
||||||
std::cout << " (1) \033[34m";
|
std::cout << " (1) \033[34m";
|
||||||
|
|
|
@ -7,13 +7,13 @@
|
||||||
#include "parser.hpp"
|
#include "parser.hpp"
|
||||||
#include "parsed_type.hpp"
|
#include "parsed_type.hpp"
|
||||||
|
|
||||||
std::map<std::string, definition_data_ptr> defs_data;
|
definition_group global_defs;
|
||||||
std::map<std::string, definition_defn_ptr> defs_defn;
|
|
||||||
|
|
||||||
extern yy::parser::symbol_type yylex();
|
extern yy::parser::symbol_type yylex();
|
||||||
|
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
%token BACKSLASH
|
||||||
%token PLUS
|
%token PLUS
|
||||||
%token TIMES
|
%token TIMES
|
||||||
%token MINUS
|
%token MINUS
|
||||||
|
@ -23,6 +23,8 @@ extern yy::parser::symbol_type yylex();
|
||||||
%token DATA
|
%token DATA
|
||||||
%token CASE
|
%token CASE
|
||||||
%token OF
|
%token OF
|
||||||
|
%token LET
|
||||||
|
%token IN
|
||||||
%token OCURLY
|
%token OCURLY
|
||||||
%token CCURLY
|
%token CCURLY
|
||||||
%token OPAREN
|
%token OPAREN
|
||||||
|
@ -41,8 +43,9 @@ extern yy::parser::symbol_type yylex();
|
||||||
%type <std::vector<branch_ptr>> branches
|
%type <std::vector<branch_ptr>> branches
|
||||||
%type <std::vector<constructor_ptr>> constructors
|
%type <std::vector<constructor_ptr>> constructors
|
||||||
%type <std::vector<parsed_type_ptr>> typeList
|
%type <std::vector<parsed_type_ptr>> typeList
|
||||||
|
%type <definition_group> definitions
|
||||||
%type <parsed_type_ptr> type nonArrowType typeListElement
|
%type <parsed_type_ptr> type nonArrowType typeListElement
|
||||||
%type <ast_ptr> aAdd aMul case app appBase
|
%type <ast_ptr> aAdd aMul case let lambda app appBase
|
||||||
%type <definition_data_ptr> data
|
%type <definition_data_ptr> data
|
||||||
%type <definition_defn_ptr> defn
|
%type <definition_defn_ptr> defn
|
||||||
%type <branch_ptr> branch
|
%type <branch_ptr> branch
|
||||||
|
@ -54,17 +57,13 @@ extern yy::parser::symbol_type yylex();
|
||||||
%%
|
%%
|
||||||
|
|
||||||
program
|
program
|
||||||
: definitions { }
|
: definitions { global_defs = std::move($1); global_defs.vis = visibility::global; }
|
||||||
;
|
;
|
||||||
|
|
||||||
definitions
|
definitions
|
||||||
: definitions definition { }
|
: definitions defn { $$ = std::move($1); auto name = $2->name; $$.defs_defn[name] = std::move($2); }
|
||||||
| definition { }
|
| definitions data { $$ = std::move($1); auto name = $2->name; $$.defs_data[name] = std::move($2); }
|
||||||
;
|
| %empty { $$ = definition_group(); }
|
||||||
|
|
||||||
definition
|
|
||||||
: defn { auto name = $1->name; defs_defn[name] = std::move($1); }
|
|
||||||
| data { auto name = $1->name; defs_data[name] = std::move($1); }
|
|
||||||
;
|
;
|
||||||
|
|
||||||
defn
|
defn
|
||||||
|
@ -101,6 +100,18 @@ appBase
|
||||||
| UID { $$ = ast_ptr(new ast_uid(std::move($1))); }
|
| UID { $$ = ast_ptr(new ast_uid(std::move($1))); }
|
||||||
| OPAREN aAdd CPAREN { $$ = std::move($2); }
|
| OPAREN aAdd CPAREN { $$ = std::move($2); }
|
||||||
| case { $$ = std::move($1); }
|
| case { $$ = std::move($1); }
|
||||||
|
| let { $$ = std::move($1); }
|
||||||
|
| lambda { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
let
|
||||||
|
: LET OCURLY definitions CCURLY IN OCURLY aAdd CCURLY
|
||||||
|
{ $$ = ast_ptr(new ast_let(std::move($3), std::move($7))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
lambda
|
||||||
|
: BACKSLASH lowercaseParams ARROW OCURLY aAdd CCURLY
|
||||||
|
{ $$ = ast_ptr(new ast_lambda(std::move($2), std::move($5))); }
|
||||||
;
|
;
|
||||||
|
|
||||||
case
|
case
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
%%
|
%%
|
||||||
|
|
||||||
[ \n]+ {}
|
[ \n]+ {}
|
||||||
|
\\ { return yy::parser::make_BACKSLASH(); }
|
||||||
\+ { return yy::parser::make_PLUS(); }
|
\+ { return yy::parser::make_PLUS(); }
|
||||||
\* { return yy::parser::make_TIMES(); }
|
\* { return yy::parser::make_TIMES(); }
|
||||||
- { return yy::parser::make_MINUS(); }
|
- { return yy::parser::make_MINUS(); }
|
||||||
|
@ -22,6 +23,8 @@ defn { return yy::parser::make_DEFN(); }
|
||||||
data { return yy::parser::make_DATA(); }
|
data { return yy::parser::make_DATA(); }
|
||||||
case { return yy::parser::make_CASE(); }
|
case { return yy::parser::make_CASE(); }
|
||||||
of { return yy::parser::make_OF(); }
|
of { return yy::parser::make_OF(); }
|
||||||
|
let { return yy::parser::make_LET(); }
|
||||||
|
in { return yy::parser::make_IN(); }
|
||||||
\{ { return yy::parser::make_OCURLY(); }
|
\{ { return yy::parser::make_OCURLY(); }
|
||||||
\} { return yy::parser::make_CCURLY(); }
|
\} { return yy::parser::make_CCURLY(); }
|
||||||
\( { return yy::parser::make_OPAREN(); }
|
\( { return yy::parser::make_OPAREN(); }
|
||||||
|
|
|
@ -5,6 +5,8 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "error.hpp"
|
#include "error.hpp"
|
||||||
|
|
||||||
|
bool type::is_arrow(const type_mgr& mgr) const { return false; }
|
||||||
|
|
||||||
void type_scheme::print(const type_mgr& mgr, std::ostream& to) const {
|
void type_scheme::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
if(forall.size() != 0) {
|
if(forall.size() != 0) {
|
||||||
to << "forall ";
|
to << "forall ";
|
||||||
|
@ -34,20 +36,35 @@ void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool type_var::is_arrow(const type_mgr& mgr) const {
|
||||||
|
auto it = mgr.types.find(name);
|
||||||
|
if(it != mgr.types.end()) {
|
||||||
|
return it->second->is_arrow(mgr);
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void type_base::print(const type_mgr& mgr, std::ostream& to) const {
|
void type_base::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
to << name;
|
to << name;
|
||||||
}
|
}
|
||||||
|
|
||||||
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
|
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
bool print_parenths = left->is_arrow(mgr);
|
||||||
|
if(print_parenths) to << "(";
|
||||||
left->print(mgr, to);
|
left->print(mgr, to);
|
||||||
to << " -> (";
|
if(print_parenths) to << ")";
|
||||||
|
to << " -> ";
|
||||||
right->print(mgr, to);
|
right->print(mgr, to);
|
||||||
to << ")";
|
}
|
||||||
|
|
||||||
|
bool type_arr::is_arrow(const type_mgr& mgr) const {
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void type_app::print(const type_mgr& mgr, std::ostream& to) const {
|
void type_app::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
constructor->print(mgr, to);
|
constructor->print(mgr, to);
|
||||||
to << "* ";
|
to << "*";
|
||||||
for(auto& arg : arguments) {
|
for(auto& arg : arguments) {
|
||||||
to << " ";
|
to << " ";
|
||||||
arg->print(mgr, to);
|
arg->print(mgr, to);
|
||||||
|
@ -185,3 +202,12 @@ void type_mgr::find_free(const type_ptr& t, std::set<std::string>& into) const {
|
||||||
for(auto& arg : app->arguments) find_free(arg, into);
|
for(auto& arg : app->arguments) find_free(arg, into);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void type_mgr::find_free(const type_scheme_ptr& t, std::set<std::string>& into) const {
|
||||||
|
std::set<std::string> monotype_free;
|
||||||
|
find_free(t->monotype, monotype_free);
|
||||||
|
for(auto& not_free : t->forall) {
|
||||||
|
monotype_free.erase(not_free);
|
||||||
|
}
|
||||||
|
into.insert(monotype_free.begin(), monotype_free.end());
|
||||||
|
}
|
||||||
|
|
|
@ -11,6 +11,7 @@ struct type {
|
||||||
virtual ~type() = default;
|
virtual ~type() = default;
|
||||||
|
|
||||||
virtual void print(const type_mgr& mgr, std::ostream& to) const = 0;
|
virtual void print(const type_mgr& mgr, std::ostream& to) const = 0;
|
||||||
|
virtual bool is_arrow(const type_mgr& mgr) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
using type_ptr = std::shared_ptr<type>;
|
using type_ptr = std::shared_ptr<type>;
|
||||||
|
@ -34,6 +35,7 @@ struct type_var : public type {
|
||||||
: name(std::move(n)) {}
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
bool is_arrow(const type_mgr& mgr) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct type_base : public type {
|
struct type_base : public type {
|
||||||
|
@ -65,6 +67,7 @@ struct type_arr : public type {
|
||||||
: left(std::move(l)), right(std::move(r)) {}
|
: left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
bool is_arrow(const type_mgr& mgr) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct type_app : public type {
|
struct type_app : public type {
|
||||||
|
@ -92,4 +95,5 @@ struct type_mgr {
|
||||||
type_ptr resolve(type_ptr t, type_var*& var) const;
|
type_ptr resolve(type_ptr t, type_var*& var) const;
|
||||||
void bind(const std::string& s, type_ptr t);
|
void bind(const std::string& s, type_ptr t);
|
||||||
void find_free(const type_ptr& t, std::set<std::string>& into) const;
|
void find_free(const type_ptr& t, std::set<std::string>& into) const;
|
||||||
|
void find_free(const type_scheme_ptr& t, std::set<std::string>& into) const;
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,13 +1,49 @@
|
||||||
#include "type_env.hpp"
|
#include "type_env.hpp"
|
||||||
#include "type.hpp"
|
#include "type.hpp"
|
||||||
|
|
||||||
|
void type_env::find_free(const type_mgr& mgr, std::set<std::string>& into) const {
|
||||||
|
if(parent != nullptr) parent->find_free(mgr, into);
|
||||||
|
for(auto& binding : names) {
|
||||||
|
mgr.find_free(binding.second.type, into);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_env::find_free_except(const type_mgr& mgr, const std::string& avoid,
|
||||||
|
std::set<std::string>& into) const {
|
||||||
|
if(parent != nullptr) parent->find_free(mgr, into);
|
||||||
|
for(auto& binding : names) {
|
||||||
|
if(binding.first == avoid) continue;
|
||||||
|
mgr.find_free(binding.second.type, into);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
type_scheme_ptr type_env::lookup(const std::string& name) const {
|
type_scheme_ptr type_env::lookup(const std::string& name) const {
|
||||||
auto it = names.find(name);
|
auto it = names.find(name);
|
||||||
if(it != names.end()) return it->second;
|
if(it != names.end()) return it->second.type;
|
||||||
if(parent) return parent->lookup(name);
|
if(parent) return parent->lookup(name);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool type_env::is_global(const std::string& name) const {
|
||||||
|
auto it = names.find(name);
|
||||||
|
if(it != names.end()) return it->second.vis == visibility::global;
|
||||||
|
if(parent) return parent->is_global(name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_env::set_mangled_name(const std::string& name, const std::string& mangled) {
|
||||||
|
auto it = names.find(name);
|
||||||
|
if(it != names.end()) it->second.mangled_name = mangled;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string& type_env::get_mangled_name(const std::string& name) const {
|
||||||
|
auto it = names.find(name);
|
||||||
|
if(it != names.end())
|
||||||
|
return (it->second.mangled_name != "") ? it->second.mangled_name : name;
|
||||||
|
if(parent) return parent->get_mangled_name(name);
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
type_ptr type_env::lookup_type(const std::string& name) const {
|
type_ptr type_env::lookup_type(const std::string& name) const {
|
||||||
auto it = type_names.find(name);
|
auto it = type_names.find(name);
|
||||||
if(it != type_names.end()) return it->second;
|
if(it != type_names.end()) return it->second;
|
||||||
|
@ -15,12 +51,13 @@ type_ptr type_env::lookup_type(const std::string& name) const {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void type_env::bind(const std::string& name, type_ptr t) {
|
void type_env::bind(const std::string& name, type_ptr t, visibility v) {
|
||||||
names[name] = type_scheme_ptr(new type_scheme(t));
|
type_scheme_ptr new_scheme(new type_scheme(std::move(t)));
|
||||||
|
names[name] = variable_data(std::move(new_scheme), v, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
void type_env::bind(const std::string& name, type_scheme_ptr t) {
|
void type_env::bind(const std::string& name, type_scheme_ptr t, visibility v) {
|
||||||
names[name] = t;
|
names[name] = variable_data(std::move(t), v, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
void type_env::bind_type(const std::string& type_name, type_ptr t) {
|
void type_env::bind_type(const std::string& type_name, type_ptr t) {
|
||||||
|
@ -31,12 +68,15 @@ void type_env::bind_type(const std::string& type_name, type_ptr t) {
|
||||||
void type_env::generalize(const std::string& name, type_mgr& mgr) {
|
void type_env::generalize(const std::string& name, type_mgr& mgr) {
|
||||||
auto names_it = names.find(name);
|
auto names_it = names.find(name);
|
||||||
if(names_it == names.end()) throw 0;
|
if(names_it == names.end()) throw 0;
|
||||||
if(names_it->second->forall.size() > 0) throw 0;
|
if(names_it->second.type->forall.size() > 0) throw 0;
|
||||||
|
|
||||||
std::set<std::string> free_variables;
|
std::set<std::string> free_in_type;
|
||||||
mgr.find_free(names_it->second->monotype, free_variables);
|
std::set<std::string> free_in_env;
|
||||||
for(auto& free : free_variables) {
|
mgr.find_free(names_it->second.type->monotype, free_in_type);
|
||||||
names_it->second->forall.push_back(free);
|
find_free_except(mgr, name, free_in_env);
|
||||||
|
for(auto& free : free_in_type) {
|
||||||
|
if(free_in_env.find(free) != free_in_env.end()) continue;
|
||||||
|
names_it->second.type->forall.push_back(free);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,23 +1,45 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <set>
|
||||||
#include "type.hpp"
|
#include "type.hpp"
|
||||||
|
|
||||||
struct type_env;
|
struct type_env;
|
||||||
using type_env_ptr = std::shared_ptr<type_env>;
|
using type_env_ptr = std::shared_ptr<type_env>;
|
||||||
|
|
||||||
|
enum class visibility { global,local };
|
||||||
|
|
||||||
struct type_env {
|
struct type_env {
|
||||||
|
struct variable_data {
|
||||||
|
type_scheme_ptr type;
|
||||||
|
visibility vis;
|
||||||
|
std::string mangled_name;
|
||||||
|
|
||||||
|
variable_data()
|
||||||
|
: variable_data(nullptr, visibility::local, "") {}
|
||||||
|
variable_data(type_scheme_ptr t, visibility v, std::string n)
|
||||||
|
: type(std::move(t)), vis(v), mangled_name(std::move(n)) {}
|
||||||
|
};
|
||||||
|
|
||||||
type_env_ptr parent;
|
type_env_ptr parent;
|
||||||
std::map<std::string, type_scheme_ptr> names;
|
std::map<std::string, variable_data> names;
|
||||||
std::map<std::string, type_ptr> type_names;
|
std::map<std::string, type_ptr> type_names;
|
||||||
|
|
||||||
type_env(type_env_ptr p) : parent(std::move(p)) {}
|
type_env(type_env_ptr p) : parent(std::move(p)) {}
|
||||||
type_env() : type_env(nullptr) {}
|
type_env() : type_env(nullptr) {}
|
||||||
|
|
||||||
|
void find_free(const type_mgr& mgr, std::set<std::string>& into) const;
|
||||||
|
void find_free_except(const type_mgr& mgr, const std::string& avoid,
|
||||||
|
std::set<std::string>& into) const;
|
||||||
type_scheme_ptr lookup(const std::string& name) const;
|
type_scheme_ptr lookup(const std::string& name) const;
|
||||||
|
bool is_global(const std::string& name) const;
|
||||||
|
void set_mangled_name(const std::string& name, const std::string& mangled);
|
||||||
|
const std::string& get_mangled_name(const std::string& name) const;
|
||||||
type_ptr lookup_type(const std::string& name) const;
|
type_ptr lookup_type(const std::string& name) const;
|
||||||
void bind(const std::string& name, type_ptr t);
|
void bind(const std::string& name, type_ptr t,
|
||||||
void bind(const std::string& name, type_scheme_ptr t);
|
visibility v = visibility::local);
|
||||||
|
void bind(const std::string& name, type_scheme_ptr t,
|
||||||
|
visibility v = visibility::local);
|
||||||
void bind_type(const std::string& type_name, type_ptr t);
|
void bind_type(const std::string& type_name, type_ptr t);
|
||||||
void generalize(const std::string& name, type_mgr& mgr);
|
void generalize(const std::string& name, type_mgr& mgr);
|
||||||
};
|
};
|
||||||
|
|
|
@ -106,4 +106,537 @@ Wait a moment, didn't we just talk about nested polymorphic definitions, and how
|
||||||
This is true, but why should we perform transformations on a malformed program? Typechecking before pulling functions to the global scope will help us save the work, and breaking down one dependency-searching problem (which is \(O(n^3)\) thanks to Warshall's) into smaller, independent problems may even lead to better performance. Furthermore, typechecking before program transformations will help us come up with more helpful error messages.
|
This is true, but why should we perform transformations on a malformed program? Typechecking before pulling functions to the global scope will help us save the work, and breaking down one dependency-searching problem (which is \(O(n^3)\) thanks to Warshall's) into smaller, independent problems may even lead to better performance. Furthermore, typechecking before program transformations will help us come up with more helpful error messages.
|
||||||
{{< /sidenote >}} and can be transformed into a sequence of instructions just like any other global function. It has been pulled from its `where` (which, by the way, is pretty much equivalent to a `let/in`) to the top level.
|
{{< /sidenote >}} and can be transformed into a sequence of instructions just like any other global function. It has been pulled from its `where` (which, by the way, is pretty much equivalent to a `let/in`) to the top level.
|
||||||
|
|
||||||
|
Now, see how `addSingle` became `(addSingle n)`? If we chose to rewrite the
|
||||||
|
program this way, we'd have to find-and-replace every instance of `addSingle`
|
||||||
|
in the function body, which would be tedious and require us to keep
|
||||||
|
track of shadowed variables and the like. Also, what if we used a local
|
||||||
|
definition twice in the original piece of code? How about something like this:
|
||||||
|
|
||||||
|
```Haskell {linenos=table}
|
||||||
|
fourthPower x = square * square
|
||||||
|
where
|
||||||
|
square = x * x
|
||||||
|
```
|
||||||
|
|
||||||
|
Applying the strategy we saw above, we get:
|
||||||
|
|
||||||
|
```Haskell {linenos=table}
|
||||||
|
fourthPower x = (square x) * (square x)
|
||||||
|
square x = x * x
|
||||||
|
```
|
||||||
|
|
||||||
|
This is valid, except that in our evaluation model, the two instances
|
||||||
|
of `(square x)` will be built independently of one another, and thus,
|
||||||
|
will not be shared. This, in turn, will mean that `square` will be called
|
||||||
|
twice, which is not what we would expect from looking at the original program.
|
||||||
|
This isn't good. Instead, why don't we keep the `where`, but modify it
|
||||||
|
as follows:
|
||||||
|
|
||||||
|
```Haskell {linenos=table}
|
||||||
|
fourthPower x = square * square
|
||||||
|
where square = square' x
|
||||||
|
square' x = x * x
|
||||||
|
```
|
||||||
|
|
||||||
|
This time, assuming we can properly implement `where`, the call to
|
||||||
|
`square' x` should only occur once. Though I've been using `where`,
|
||||||
|
which leads to less clutter in Haskell code, the exact same approach applies
|
||||||
|
to `let/in`, and that's what we'll be using in our language.
|
||||||
|
|
||||||
This technique of replacing captured variables with arguments, and pulling closures into the global scope to aid compilation, is called [Lambda Lifting](https://en.wikipedia.org/wiki/Lambda_lifting). Its name is no coincidence - lambda functions need to undergo the same kind of transformation as our nested definitions (unlike nested definitions, though, lambda functions need to be named). This is why they are included in this post together with `let/in`!
|
This technique of replacing captured variables with arguments, and pulling closures into the global scope to aid compilation, is called [Lambda Lifting](https://en.wikipedia.org/wiki/Lambda_lifting). Its name is no coincidence - lambda functions need to undergo the same kind of transformation as our nested definitions (unlike nested definitions, though, lambda functions need to be named). This is why they are included in this post together with `let/in`!
|
||||||
|
|
||||||
|
### Implementation
|
||||||
|
Now that we understand what we have to do, it's time to jump straight into
|
||||||
|
doing it. First, we need to refactor our current code so allow for the changes
|
||||||
|
we're going to make; then, we can implement `let/in` expressions; finally,
|
||||||
|
we'll tackle lambda functions.
|
||||||
|
|
||||||
|
#### Infrastructure Changes
|
||||||
|
When finding captured variables, the notion of _free variables_ once again
|
||||||
|
becomes important. Recall that a free variable in an expression is a variable
|
||||||
|
that is defined outside of that expression. Consider, for example, the
|
||||||
|
expression:
|
||||||
|
|
||||||
|
```Haskell
|
||||||
|
let x = 5 in x + y
|
||||||
|
```
|
||||||
|
|
||||||
|
In this expression, `x` is _not_ a free variable, since it's defined
|
||||||
|
in the `let/in` expression. On the other hand, `y` _is_ a free variable,
|
||||||
|
since it's not defined locally.
|
||||||
|
|
||||||
|
The algorithm that we used for computing free variables was rather biased.
|
||||||
|
Previously, we only cared about the difference between a local variable
|
||||||
|
(defined somewhere in a function's body, or referring to one of the function's
|
||||||
|
parameters) and a global variable (referring to a function name). This shows in
|
||||||
|
our code for `find_free`. Consider, for example, this segment:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/11/ast.cpp" 33 36 >}}
|
||||||
|
|
||||||
|
We created bindings in our type environment whenever we saw a new variable
|
||||||
|
being introduced, which led us to only count variables that we did not bind
|
||||||
|
_anywhere_ as 'free'. This approach is no longer sufficient. Consider,
|
||||||
|
for example, the following Haskell code:
|
||||||
|
|
||||||
|
```Haskell {linenos=table}
|
||||||
|
someFunction x =
|
||||||
|
let
|
||||||
|
y = x + 5
|
||||||
|
in
|
||||||
|
x*y
|
||||||
|
```
|
||||||
|
|
||||||
|
We can see that the variable `x` is introduced on line 1.
|
||||||
|
Thus, our current algorithm will happily store `x` in an environment,
|
||||||
|
and not count it as free. But clearly, the definition of `y` on line 3
|
||||||
|
captures `x`! If we were to lift `y` into global scope, we would need
|
||||||
|
to pass `x` to it as an argument. To fix this, we have to separate the creation
|
||||||
|
and assignment of type environments from free variable detection. Why
|
||||||
|
don't we start with `ast` and its descendants? Our signatures become:
|
||||||
|
|
||||||
|
```C++
|
||||||
|
void ast::find_free(std::set<std::string>& into);
|
||||||
|
type_ptr ast::typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
```
|
||||||
|
|
||||||
|
For the most part, the code remains unchanged. We avoid
|
||||||
|
using `env` (and `this->env`), and default to marking
|
||||||
|
any variable as a free variable:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/ast.cpp" 39 41 >}}
|
||||||
|
|
||||||
|
Since we no longer use the environment, we resort to an
|
||||||
|
alternative method of removing bound variables. Here's
|
||||||
|
`ast_case::find_free`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/ast.cpp" 169 181 >}}
|
||||||
|
|
||||||
|
For each branch, we find the free variables. However, we
|
||||||
|
want to avoid marking variables that were introduced through
|
||||||
|
pattern matching as free (they are not). Thus, we use `pattern::find_variables`
|
||||||
|
to see which of the variables were bound by that pattern,
|
||||||
|
and remove them from the list of free variables. We
|
||||||
|
can then safely add the list of free variables in the pattern to the overall
|
||||||
|
list of free variables. Other `ast` descendants experience largely
|
||||||
|
cosmetic changes (such as the removal of the `env` parameter).
|
||||||
|
|
||||||
|
Of course, we must implement `find_variables` for each of our `pattern`
|
||||||
|
subclasses. Here's what I got for `pattern_var`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/ast.cpp" 402 404 >}}
|
||||||
|
|
||||||
|
And here's an equally terse implementation for `pattern_constr`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/ast.cpp" 417 419 >}}
|
||||||
|
|
||||||
|
We also want to update `definition_defn` with this change. Our signatures
|
||||||
|
become:
|
||||||
|
|
||||||
|
```C++
|
||||||
|
void definition_defn::find_free();
|
||||||
|
void definition_defn::insert_types(type_mgr& mgr, type_env_ptr& env, visibility v);
|
||||||
|
```
|
||||||
|
|
||||||
|
We'll get to the `visiblity` parameter later. The implementations
|
||||||
|
are fairly simple. Just like `ast_case`, we want to erase each function's
|
||||||
|
parameters from its list of free variables:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/definition.cpp" 13 18 >}}
|
||||||
|
|
||||||
|
Since `find_free` no longer creates any type bindings or environments,
|
||||||
|
this functionality is shouldered by `insert_types`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/definition.cpp" 20 32 >}}
|
||||||
|
|
||||||
|
Now that free variables are properly computed, we are able to move on
|
||||||
|
to bigger and better things.
|
||||||
|
|
||||||
|
#### Nested Definitions
|
||||||
|
At present, our code for typechecking the whole program is located in
|
||||||
|
`main.cpp`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/11/main.cpp" 43 61 >}}
|
||||||
|
|
||||||
|
This piece of code goes on. We now want this to be more general. Soon, `let/in`
|
||||||
|
expressions with bring with them definitions that are inside other definitions,
|
||||||
|
which will not be reachable at the top level. The fundamental topological
|
||||||
|
sorting algorithm, though, will remain the same. We can abstract a series
|
||||||
|
of definitions that need to be ordered and then typechecked into a new struct,
|
||||||
|
`definition_group`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/definition.hpp" 73 83 >}}
|
||||||
|
|
||||||
|
This will be exactly like a list of `defn`/`data` definitions we have at the
|
||||||
|
top level, except now, it can also occur in other places, like `let/in`
|
||||||
|
expressions. Once again, ignore for the moment the `visibility` field.
|
||||||
|
|
||||||
|
The way we defined function ordering requires some extra work from
|
||||||
|
`definition_group`. Recall that conceptually, functions can only depend
|
||||||
|
on other functions defined in the same `let/in` expression, or, more generally,
|
||||||
|
in the same `definition_group`. This means that we now classify free variables
|
||||||
|
in definitions into two categories: free variables that refer to "nearby"
|
||||||
|
definitions (i.e. definitions in the same group) and free variables that refer
|
||||||
|
to "far away" definitions. The "nearby" variables will be used to do
|
||||||
|
topological ordering, while the "far away" variables can be passed along
|
||||||
|
further up, perhaps into an enclosing `let/in` expression (for which "nearby"
|
||||||
|
variables aren't actually free, since they are bound in the `let`). We
|
||||||
|
implement this partitioning of variables in `definition_group::find_free`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/definition.cpp" 94 105 >}}
|
||||||
|
|
||||||
|
Notice that we have added a new `nearby_variables` field to `definition_defn`.
|
||||||
|
This is used on line 101, and will be once again used in `definition_group::typecheck`. Speaking of `typecheck`, let's look at its definition:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/definition.cpp" 107 145 >}}
|
||||||
|
|
||||||
|
This function is a little long, but conceptually, each `for` loop
|
||||||
|
contains a step of the process:
|
||||||
|
|
||||||
|
* The first loop declares all data types, so that constructors can
|
||||||
|
be verified to properly reference them.
|
||||||
|
* The second loop creates all the data type constructors.
|
||||||
|
* The third loop adds edges to our dependency graph.
|
||||||
|
* The fourth loop performs typechecking on the now-ordered groups of mutually
|
||||||
|
recursive functions.
|
||||||
|
* The first inner loop inserts the types of all the functions into the environment.
|
||||||
|
* The second inner loop actually performs typechecking.
|
||||||
|
* The third inner loop makes as many things polymorphic as possible.
|
||||||
|
|
||||||
|
We can now adjust our `parser.y` to use a `definition_group` instead of
|
||||||
|
two global vectors. First, we declare a global `definition_group`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/parser.y" 10 10 >}}
|
||||||
|
|
||||||
|
Then, we adjust `definitions` to create `definition_group`s:
|
||||||
|
|
||||||
|
{{< codelines "text" "compiler/12/parser.y" 59 68 >}}
|
||||||
|
|
||||||
|
We can now adjust `main.cpp` to use the global `definition_group`. Among
|
||||||
|
other changes (such as removing `extern` references to `vector`s, and updating
|
||||||
|
function signatures) we also update the `typecheck_program` function:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/main.cpp" 41 49 >}}
|
||||||
|
|
||||||
|
Now, our code is ready for typechecking nested definitions, but not for
|
||||||
|
compiling them. The main thing that we still have to address is the addition
|
||||||
|
of new definitions to the global scope. Let's take a look at that next.
|
||||||
|
|
||||||
|
#### Global Definitions
|
||||||
|
We want every function (and even non-function definitions that capture surrounding
|
||||||
|
variables), regardless of whether or not it was declared in the global scope,
|
||||||
|
to be processed and converted to LLVM code. The LLVM code conversion takes
|
||||||
|
several steps. First, the function's AST is translated into G-machine
|
||||||
|
instructions, which we covered in [part 5]({{< relref "05_compiler_execution.md" >}}),
|
||||||
|
by a process we covered in [part 6]({{< relref "06_compiler_compilation.md" >}}).
|
||||||
|
Then, an LLVM function is created for every function, and registered globally.
|
||||||
|
Finally, the G-machine instructions are converted into LLVM IR, which is
|
||||||
|
inserted into the previously created functions. These things
|
||||||
|
can't be done in a single pass: at the very least, we can't start translating
|
||||||
|
G-machine instructions into LLVM IR until functions are globally declared,
|
||||||
|
because we would otherwise have no means of referencing other functions. It
|
||||||
|
makes sense to me, then, to pull out all the 'global' definitions into
|
||||||
|
a single top-level list (perhaps somewhere in `main.cpp`).
|
||||||
|
|
||||||
|
Let's start implementing this with a new `global_scope` struct. This
|
||||||
|
struct will contain all of the global function and constructor definitions:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/global_scope.hpp" 42 55 >}}
|
||||||
|
|
||||||
|
This struct will allow us to keep track of all the global definitions,
|
||||||
|
emitting them as we go, and then coming back to them as necessary.
|
||||||
|
There are also signs of another piece of functionality: `occurence_count`
|
||||||
|
and `mangle_name`. These two will be used to handle duplicate names.
|
||||||
|
|
||||||
|
We cannot have two global functions named the same thing, but we can
|
||||||
|
easily imagine a situation in which two separate `let/in` expressions define
|
||||||
|
a variable like `x`, which then needs to be lifted to the global scope. We
|
||||||
|
resolve such conflicts by slightly changing - "mangling" - the name of
|
||||||
|
one of the resulting global definitions. We allow the first global definition
|
||||||
|
to be named the same as it was originally (in our example, this would be `x`).
|
||||||
|
However, if we detect that a global definition `x` already exists (we
|
||||||
|
track this using `occurence_count`), we rename it to `x_1`. Subsequent
|
||||||
|
global definitions will end up being named `x_2`, `x_3`, and so on.
|
||||||
|
|
||||||
|
Alright, let's take a look at `global_function` and `global_constructor`.
|
||||||
|
Here's the former:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/global_scope.hpp" 11 27 >}}
|
||||||
|
|
||||||
|
There's nothing really surprising here: all of the fields
|
||||||
|
are reminiscent of `definition_defn`, though some type-related variables
|
||||||
|
are missing. We also include the three compilation-related methods,
|
||||||
|
`compile`, `declare_llvm`, and `generate_llvm`, which were previously in `definition_defn`. Let's look at `global_constructor` now:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/global_scope.hpp" 29 40 >}}
|
||||||
|
|
||||||
|
This maps pretty closely to a single `definition_data::constructor`.
|
||||||
|
There's a difference here that is not clear at a glance, though. Whereas
|
||||||
|
the `name` in a `definition_defn` or `definition_data` refers to the
|
||||||
|
name as given by the user in the code, the `name` of a `global_function`
|
||||||
|
or `global_constructor` has gone through mangling, and thus, should be
|
||||||
|
unique.
|
||||||
|
|
||||||
|
Let's now look at the implementation of these structs' methods. The methods
|
||||||
|
`add_function` and `add_constructor` are pretty straightforward. Here's
|
||||||
|
the former:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/global_scope.cpp" 39 43 >}}
|
||||||
|
|
||||||
|
And here's the latter:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/global_scope.cpp" 45 49 >}}
|
||||||
|
|
||||||
|
In both of these functions, we return a reference to the new global
|
||||||
|
definition we created. This helps us access the mangled `name` field,
|
||||||
|
and, in the case of `global_function`, inspect the `ast_ptr` that represents
|
||||||
|
its body.
|
||||||
|
|
||||||
|
Next, we have `global_scope::compile` and `global_scope::generate_llvm`,
|
||||||
|
which encapsulate these operations on all global definitions. Their
|
||||||
|
implementations are very straightforward, and are similar to the
|
||||||
|
`gen_llvm` function we used to have in our `main.cpp`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/global_scope.cpp" 51 67 >}}
|
||||||
|
|
||||||
|
Finally, we have `mangle`, which takes care of potentially duplicate
|
||||||
|
variable names:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/global_scope.cpp" 69 83 >}}
|
||||||
|
|
||||||
|
Let's move on to the global definition structs.
|
||||||
|
The `compile`, `declare_llvm`, and `generate_llvm` methods for
|
||||||
|
`global_function` are pretty much the same as those that we used to have
|
||||||
|
in `definition_defn`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/global_scope.cpp" 4 24 >}}
|
||||||
|
|
||||||
|
The same is true for `global_constructor` and its method `generate_llvm`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/global_scope.cpp" 26 37 >}}
|
||||||
|
|
||||||
|
Recall that in this case, we need not have two methods for declaring
|
||||||
|
and generating LLVM, since constructors don't reference other constructors,
|
||||||
|
and are always generated before any function definitions.
|
||||||
|
|
||||||
|
#### Visibility
|
||||||
|
Should we really be turning _all_ free variables in a function definition
|
||||||
|
into arguments? Consider the following piece of Haskell code:
|
||||||
|
|
||||||
|
```Haskell {linenos=table}
|
||||||
|
add x y = x + y
|
||||||
|
mul x y = x * y
|
||||||
|
something = mul (add 1 3) 3
|
||||||
|
```
|
||||||
|
|
||||||
|
In the definition of `something`, `mul` and `add` occur free.
|
||||||
|
A very naive lifting algorithm might be tempted to rewrite such a program
|
||||||
|
as follows:
|
||||||
|
|
||||||
|
```Haskell {linenos=table}
|
||||||
|
add x y = x + y
|
||||||
|
mul x y = x * y
|
||||||
|
something' add mul = mul (add 1 3) 3
|
||||||
|
something = something' add mul
|
||||||
|
```
|
||||||
|
|
||||||
|
But that's absurd! Not only are `add` and `mul` available globally,
|
||||||
|
but such a rewrite generates another definition with free variables,
|
||||||
|
which means we didn't really improve our program in any way. From this
|
||||||
|
example, we can see that we don't want to be turning reference to global
|
||||||
|
variables into function parameters. But how can we tell if a variable
|
||||||
|
we're trying to operate on is global or not? I propose a flag in our
|
||||||
|
`type_env`, which we'll augment to be used as a symbol table. To do
|
||||||
|
this, we update the implementation of `type_env` to map variables to
|
||||||
|
values of a struct `variable_data`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/type_env.hpp" 13 22 >}}
|
||||||
|
|
||||||
|
The `visibility` enum is defined as follows:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/type_env.hpp" 10 10 >}}
|
||||||
|
|
||||||
|
As you can see from the above snippet, we also added a `mangled_name` field
|
||||||
|
to the new `variable_data` struct. We will be using this field shortly. We
|
||||||
|
also add a few methods to our `type_env`, and end up with the following:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/type_env.hpp" 31 44 >}}
|
||||||
|
|
||||||
|
We will come back to `find_free` and `find_free_except`, as well as
|
||||||
|
`set_mangled_name` and `get_mangled_name`. For now, we just adjust `bind` to
|
||||||
|
take a visibility parameter that defaults to `local`, and implement
|
||||||
|
`is_global`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/type_env.cpp" 27 32 >}}
|
||||||
|
|
||||||
|
Remember the `visibility::global` in `parser.y`? This is where that comes in.
|
||||||
|
Specifically, we recall that `definition_defn::insert_types` is responsible
|
||||||
|
for placing function types into the environment, making them accessible
|
||||||
|
during typechecking later. At this time, we already need to know whether
|
||||||
|
or not the definitions are global or local (so that we can create the binding).
|
||||||
|
Thus, we add `visibility` as a parameter to `insert_types`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/definition.hpp" 44 44 >}}
|
||||||
|
|
||||||
|
Since we are now moving from manually wrangling definitions towards using
|
||||||
|
`definition_group`, we make it so that the group itself provides this
|
||||||
|
argument. To do this, we add the `visibility` field from before to it,
|
||||||
|
and set it in the parser. One more thing: since constructors never
|
||||||
|
capture variables, we can always move them straight to the global
|
||||||
|
scope, and thus, we'll always mark them with `visibility::global`.
|
||||||
|
|
||||||
|
#### Managing Mangled Names
|
||||||
|
Just mangling names is not enough. Consider the following program:
|
||||||
|
|
||||||
|
```text {linenos=table}
|
||||||
|
defn packOne x = {
|
||||||
|
let {
|
||||||
|
data Packed a = { Pack a }
|
||||||
|
} in {
|
||||||
|
Pack x
|
||||||
|
}
|
||||||
|
}
|
||||||
|
defn packTwo x = {
|
||||||
|
let {
|
||||||
|
data Packed a = { Pack a }
|
||||||
|
} in {
|
||||||
|
Pack x
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
{{< sidenote "right" "lifting-types-note" "Lifting the data type declarations" >}}
|
||||||
|
We are actually not <em>quite</em> doing something like the following snippet.
|
||||||
|
The reason for this is that we don't mangle the names for types. I pointed
|
||||||
|
out this potential issue in a sidenote in the previous post. Since the size
|
||||||
|
of this post is already balooning, I will not deal with this issue here.
|
||||||
|
Even at the end of this post, our compiler will not be able to distinguish
|
||||||
|
between the two <code>Packed</code> types. We will hopefully get to it later.
|
||||||
|
{{< /sidenote >}} and their constructors into the global
|
||||||
|
scope gives us something like:
|
||||||
|
|
||||||
|
``` {linenos=table}
|
||||||
|
data Packed a = { Pack a }
|
||||||
|
data Packed_1 a = { Pack_1 a }
|
||||||
|
defn packOne x = { Pack x }
|
||||||
|
defn packTwo x = { Pack_1 x }
|
||||||
|
```
|
||||||
|
|
||||||
|
Notice that we had to rename one of the calls to `Pack` to be a call to
|
||||||
|
be `Pack_1`. To actually change our AST to reference `Pack_1`, we'd have
|
||||||
|
to traverse the whole tree, and make sure to keep track of definitions
|
||||||
|
that could shadow `Pack` further down. This is cumbersome. Instead, we
|
||||||
|
can mark a variable as referring to a mangled version of itself, and
|
||||||
|
access this information when needed. To do this, we add the `mangled_name`
|
||||||
|
field to the `variable_data` struct as we've seen above, and implement
|
||||||
|
the `set_mangled_name` and `get_mangled_name` methods. The former:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/type_env.cpp" 34 37 >}}
|
||||||
|
|
||||||
|
And the latter:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/type_env.cpp" 39 45 >}}
|
||||||
|
|
||||||
|
We don't allow the `set_mangled_name` to affect variables that are declared
|
||||||
|
above the receiving `type_env`, and use the empty string as a 'none' value.
|
||||||
|
Now, when lifting data type constructors, we'll be able to use
|
||||||
|
`set_mangled_name` to make sure constructor calls are made correctly. We
|
||||||
|
will also be able to use this in other cases, like the translation
|
||||||
|
of local function definitions.
|
||||||
|
|
||||||
|
#### New AST Nodes
|
||||||
|
Finally, it's time for us to add new AST nodes to our language.
|
||||||
|
Specifically, these nodes are `ast_let` (for `let/in` expressions)
|
||||||
|
and `ast_lambda` for lambda functions. We declare them as follows:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/ast.hpp" 131 166 >}}
|
||||||
|
|
||||||
|
In `ast_let`, the `definitions` field corresponds to the original definitions
|
||||||
|
given by the user in the program, and the `in` field corresponds to the
|
||||||
|
expression which uses these definitions. In the process of lifting, though,
|
||||||
|
we eventually transfer each of the definitions to the global scope, replacing
|
||||||
|
their right hand sides with partial applications. After this transformation,
|
||||||
|
all the data type definitions are effectively gone, and all the function
|
||||||
|
definitions are converted into the simple form `x = f a1 ... an`. We hold
|
||||||
|
these post-transformation equations in the `translated_definitions` field,
|
||||||
|
and it's them that we compile in this node's `compile` method.
|
||||||
|
|
||||||
|
In `ast_lambda`, we allow multiple parameters (like Haskell's `\x y -> x + y`).
|
||||||
|
We store these parameters in the `params` field, and we store the lambda's
|
||||||
|
expression in the `body` field. Just like `definition_defn`,
|
||||||
|
the `ast_lambda` node maintains a separate environment in which its children
|
||||||
|
have been bound, and a list of variables that occur freely in its body. The
|
||||||
|
former is used for typechecking, while the latter is used for lifting.
|
||||||
|
Finally, the `translated` field holds the lambda function's form
|
||||||
|
after its body has been transformed into a global function. Similarly to
|
||||||
|
`ast_let`, this node will be in the form `f a1 ... an`.
|
||||||
|
|
||||||
|
The
|
||||||
|
observant reader will have noticed that we have a new method: `translate`.
|
||||||
|
This is a new method for all `ast` descendants, and will implement the
|
||||||
|
steps of moving definitions to the global scope and transforming the
|
||||||
|
program. Before we get to it, though, let's quickly see the parsing
|
||||||
|
rules for `ast_let` and `ast_lambda`:
|
||||||
|
|
||||||
|
{{< codelines "text" "compiler/12/parser.y" 107 115 >}}
|
||||||
|
|
||||||
|
This is pretty similar to the rest of the grammar, so I will give this no
|
||||||
|
further explanation.
|
||||||
|
|
||||||
|
{{< todo >}}
|
||||||
|
Explain typechecking for lambda functions and let/in expressions.
|
||||||
|
{{< /todo >}}
|
||||||
|
|
||||||
|
{{< todo >}}
|
||||||
|
Explain free variable detection for lambda functions and let/in expressions.
|
||||||
|
{{< /todo >}}
|
||||||
|
|
||||||
|
#### Translation
|
||||||
|
While collecting all of the definitions into a global list, we can
|
||||||
|
also do some program transformations. Let's return to our earlier example:
|
||||||
|
|
||||||
|
```Haskell {linenos=table}
|
||||||
|
fourthPower x = square * square
|
||||||
|
where
|
||||||
|
square = x * x
|
||||||
|
```
|
||||||
|
|
||||||
|
We said it should be translated into something like:
|
||||||
|
|
||||||
|
```Haskell {linenos=table}
|
||||||
|
fourthPower x = square * square
|
||||||
|
where square = square' x
|
||||||
|
square' x = x * x
|
||||||
|
```
|
||||||
|
|
||||||
|
In our language, the original program above would be:
|
||||||
|
|
||||||
|
```text {linenos=table}
|
||||||
|
defn fourthPower x = {
|
||||||
|
let {
|
||||||
|
defn square = { x * x }
|
||||||
|
} in {
|
||||||
|
square * square
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
And the translated version would be:
|
||||||
|
|
||||||
|
```text {linenos=table}
|
||||||
|
defn fourthPower x = {
|
||||||
|
let {
|
||||||
|
defn square = { square' x }
|
||||||
|
} in {
|
||||||
|
square * square
|
||||||
|
}
|
||||||
|
}
|
||||||
|
defn square' x = { x * x }
|
||||||
|
```
|
||||||
|
|
||||||
|
Setting aside for the moment the naming of `square'` and `square`, we observe
|
||||||
|
that we want to perform the following operations:
|
||||||
|
|
||||||
|
1. Move the body of the original definition of `square` into its own
|
||||||
|
global definition, adding all the captured variables as arguments.
|
||||||
|
2. Replace the right hand side of the `let/in` expression with an application
|
||||||
|
of the global definition to the variables it requires.
|
||||||
|
|
|
@ -6,4 +6,4 @@
|
||||||
{{ .Scratch.Set "u" $t }}
|
{{ .Scratch.Set "u" $t }}
|
||||||
{{ end }}
|
{{ end }}
|
||||||
{{ $v := first (add (sub (int (.Get 3)) (int (.Get 2))) 1) (.Scratch.Get "u") }}
|
{{ $v := first (add (sub (int (.Get 3)) (int (.Get 2))) 1) (.Scratch.Get "u") }}
|
||||||
{{ highlight (delimit $v "\n") (.Get 0) "" }}
|
{{ highlight (delimit $v "\n") (.Get 0) (printf "linenos=table,linenostart=%d" (.Get 2)) }}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user