Compare commits
No commits in common. "577e0ad930b0c6e58e1aa2e6a02e814d3bc2f1c1" and "5d53678e83804e8c0e0a449a6f1ace452dc4a7e7" have entirely different histories.
577e0ad930
...
5d53678e83
@ -29,7 +29,6 @@ add_executable(compiler
|
|||||||
error.cpp error.hpp
|
error.cpp error.hpp
|
||||||
binop.cpp binop.hpp
|
binop.cpp binop.hpp
|
||||||
instruction.cpp instruction.hpp
|
instruction.cpp instruction.hpp
|
||||||
graph.cpp graph.hpp
|
|
||||||
${BISON_parser_OUTPUTS}
|
${BISON_parser_OUTPUTS}
|
||||||
${FLEX_scanner_OUTPUTS}
|
${FLEX_scanner_OUTPUTS}
|
||||||
main.cpp
|
main.cpp
|
||||||
|
@ -1,10 +1,7 @@
|
|||||||
#include "ast.hpp"
|
#include "ast.hpp"
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
#include <iostream>
|
|
||||||
#include "binop.hpp"
|
#include "binop.hpp"
|
||||||
#include "error.hpp"
|
#include "error.hpp"
|
||||||
#include "type.hpp"
|
|
||||||
#include "type_env.hpp"
|
|
||||||
|
|
||||||
static void print_indent(int n, std::ostream& to) {
|
static void print_indent(int n, std::ostream& to) {
|
||||||
while(n--) to << " ";
|
while(n--) to << " ";
|
||||||
@ -15,11 +12,7 @@ void ast_int::print(int indent, std::ostream& to) const {
|
|||||||
to << "INT: " << value << std::endl;
|
to << "INT: " << value << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_int::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
|
type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) {
|
||||||
this->env = env;
|
|
||||||
}
|
|
||||||
|
|
||||||
type_ptr ast_int::typecheck(type_mgr& mgr) {
|
|
||||||
return type_ptr(new type_base("Int"));
|
return type_ptr(new type_base("Int"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -32,13 +25,8 @@ void ast_lid::print(int indent, std::ostream& to) const {
|
|||||||
to << "LID: " << id << std::endl;
|
to << "LID: " << id << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_lid::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
|
type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) {
|
||||||
this->env = env;
|
return env.lookup(id);
|
||||||
if(env->lookup(id) == nullptr) into.insert(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
type_ptr ast_lid::typecheck(type_mgr& mgr) {
|
|
||||||
return env->lookup(id)->instantiate(mgr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
@ -53,12 +41,8 @@ void ast_uid::print(int indent, std::ostream& to) const {
|
|||||||
to << "UID: " << id << std::endl;
|
to << "UID: " << id << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_uid::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
|
type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) {
|
||||||
this->env = env;
|
return env.lookup(id);
|
||||||
}
|
|
||||||
|
|
||||||
type_ptr ast_uid::typecheck(type_mgr& mgr) {
|
|
||||||
return env->lookup(id)->instantiate(mgr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
@ -72,16 +56,10 @@ void ast_binop::print(int indent, std::ostream& to) const {
|
|||||||
right->print(indent + 1, to);
|
right->print(indent + 1, to);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_binop::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
|
type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) {
|
||||||
this->env = env;
|
type_ptr ltype = left->typecheck(mgr, env);
|
||||||
left->find_free(mgr, env, into);
|
type_ptr rtype = right->typecheck(mgr, env);
|
||||||
right->find_free(mgr, env, into);
|
type_ptr ftype = env.lookup(op_name(op));
|
||||||
}
|
|
||||||
|
|
||||||
type_ptr ast_binop::typecheck(type_mgr& mgr) {
|
|
||||||
type_ptr ltype = left->typecheck(mgr);
|
|
||||||
type_ptr rtype = right->typecheck(mgr);
|
|
||||||
type_ptr ftype = env->lookup(op_name(op))->instantiate(mgr);
|
|
||||||
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
|
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
|
||||||
|
|
||||||
type_ptr return_type = mgr.new_type();
|
type_ptr return_type = mgr.new_type();
|
||||||
@ -108,15 +86,9 @@ void ast_app::print(int indent, std::ostream& to) const {
|
|||||||
right->print(indent + 1, to);
|
right->print(indent + 1, to);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_app::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
|
type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) {
|
||||||
this->env = env;
|
type_ptr ltype = left->typecheck(mgr, env);
|
||||||
left->find_free(mgr, env, into);
|
type_ptr rtype = right->typecheck(mgr, env);
|
||||||
right->find_free(mgr, env, into);
|
|
||||||
}
|
|
||||||
|
|
||||||
type_ptr ast_app::typecheck(type_mgr& mgr) {
|
|
||||||
type_ptr ltype = left->typecheck(mgr);
|
|
||||||
type_ptr rtype = right->typecheck(mgr);
|
|
||||||
|
|
||||||
type_ptr return_type = mgr.new_type();
|
type_ptr return_type = mgr.new_type();
|
||||||
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
|
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
|
||||||
@ -141,33 +113,20 @@ void ast_case::print(int indent, std::ostream& to) const {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_case::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
|
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) {
|
||||||
this->env = env;
|
|
||||||
of->find_free(mgr, env, into);
|
|
||||||
for(auto& branch : branches) {
|
|
||||||
type_env_ptr new_env = type_scope(env);
|
|
||||||
branch->pat->insert_bindings(mgr, new_env);
|
|
||||||
branch->expr->find_free(mgr, new_env, into);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type_ptr ast_case::typecheck(type_mgr& mgr) {
|
|
||||||
type_var* var;
|
type_var* var;
|
||||||
type_ptr case_type = mgr.resolve(of->typecheck(mgr), var);
|
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var);
|
||||||
type_ptr branch_type = mgr.new_type();
|
type_ptr branch_type = mgr.new_type();
|
||||||
|
|
||||||
for(auto& branch : branches) {
|
for(auto& branch : branches) {
|
||||||
branch->pat->typecheck(case_type, mgr, branch->expr->env);
|
type_env new_env = env.scope();
|
||||||
type_ptr curr_branch_type = branch->expr->typecheck(mgr);
|
branch->pat->match(case_type, mgr, new_env);
|
||||||
|
type_ptr curr_branch_type = branch->expr->typecheck(mgr, new_env);
|
||||||
mgr.unify(branch_type, curr_branch_type);
|
mgr.unify(branch_type, curr_branch_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
input_type = mgr.resolve(case_type, var);
|
input_type = mgr.resolve(case_type, var);
|
||||||
if(!dynamic_cast<type_data*>(input_type.get())) {
|
if(!dynamic_cast<type_data*>(input_type.get())) {
|
||||||
std::cout << dynamic_cast<type_data*>(input_type.get()) << std::endl;
|
|
||||||
std::cout << dynamic_cast<type_base*>(input_type.get()) << std::endl;
|
|
||||||
std::cout << var << std::endl;
|
|
||||||
input_type->print(mgr, std::cout); std::cout << std::endl;
|
|
||||||
throw type_error("attempting case analysis of non-data type");
|
throw type_error("attempting case analysis of non-data type");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -233,12 +192,8 @@ void pattern_var::print(std::ostream& to) const {
|
|||||||
to << var;
|
to << var;
|
||||||
}
|
}
|
||||||
|
|
||||||
void pattern_var::insert_bindings(type_mgr& mgr, type_env_ptr& env) const {
|
void pattern_var::match(type_ptr t, type_mgr& mgr, type_env& env) const {
|
||||||
env->bind(var, mgr.new_type());
|
env.bind(var, t);
|
||||||
}
|
|
||||||
|
|
||||||
void pattern_var::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const {
|
|
||||||
mgr.unify(env->lookup(var)->instantiate(mgr), t);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void pattern_constr::print(std::ostream& to) const {
|
void pattern_constr::print(std::ostream& to) const {
|
||||||
@ -248,23 +203,17 @@ void pattern_constr::print(std::ostream& to) const {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void pattern_constr::insert_bindings(type_mgr& mgr, type_env_ptr& env) const {
|
void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const {
|
||||||
for(auto& param : params) {
|
type_ptr constructor_type = env.lookup(constr);
|
||||||
env->bind(param, mgr.new_type());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void pattern_constr::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const {
|
|
||||||
type_ptr constructor_type = env->lookup(constr)->instantiate(mgr);
|
|
||||||
if(!constructor_type) {
|
if(!constructor_type) {
|
||||||
throw type_error(std::string("pattern using unknown constructor ") + constr);
|
throw type_error(std::string("pattern using unknown constructor ") + constr);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(auto& param : params) {
|
for(int i = 0; i < params.size(); i++) {
|
||||||
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
|
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
|
||||||
if(!arr) throw type_error("too many parameters in constructor pattern");
|
if(!arr) throw type_error("too many parameters in constructor pattern");
|
||||||
|
|
||||||
mgr.unify(env->lookup(param)->instantiate(mgr), arr->left);
|
env.bind(params[i], arr->left);
|
||||||
constructor_type = arr->right;
|
constructor_type = arr->right;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <set>
|
|
||||||
#include "type.hpp"
|
#include "type.hpp"
|
||||||
#include "type_env.hpp"
|
#include "type_env.hpp"
|
||||||
#include "binop.hpp"
|
#include "binop.hpp"
|
||||||
@ -9,14 +8,10 @@
|
|||||||
#include "env.hpp"
|
#include "env.hpp"
|
||||||
|
|
||||||
struct ast {
|
struct ast {
|
||||||
type_env_ptr env;
|
|
||||||
|
|
||||||
virtual ~ast() = default;
|
virtual ~ast() = default;
|
||||||
|
|
||||||
virtual void print(int indent, std::ostream& to) const = 0;
|
virtual void print(int indent, std::ostream& to) const = 0;
|
||||||
virtual void find_free(type_mgr& mgr,
|
virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) = 0;
|
||||||
type_env_ptr& env, std::set<std::string>& into) = 0;
|
|
||||||
virtual type_ptr typecheck(type_mgr& mgr) = 0;
|
|
||||||
virtual void compile(const env_ptr& env,
|
virtual void compile(const env_ptr& env,
|
||||||
std::vector<instruction_ptr>& into) const = 0;
|
std::vector<instruction_ptr>& into) const = 0;
|
||||||
};
|
};
|
||||||
@ -27,8 +22,7 @@ struct pattern {
|
|||||||
virtual ~pattern() = default;
|
virtual ~pattern() = default;
|
||||||
|
|
||||||
virtual void print(std::ostream& to) const = 0;
|
virtual void print(std::ostream& to) const = 0;
|
||||||
virtual void insert_bindings(type_mgr& mgr, type_env_ptr& env) const = 0;
|
virtual void match(type_ptr t, type_mgr& mgr, type_env& env) const = 0;
|
||||||
virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const = 0;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
using pattern_ptr = std::unique_ptr<pattern>;
|
using pattern_ptr = std::unique_ptr<pattern>;
|
||||||
@ -50,8 +44,7 @@ struct ast_int : public ast {
|
|||||||
: value(v) {}
|
: value(v) {}
|
||||||
|
|
||||||
void print(int indent, std::ostream& to) const;
|
void print(int indent, std::ostream& to) const;
|
||||||
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
|
type_ptr typecheck(type_mgr& mgr, const type_env& env);
|
||||||
type_ptr typecheck(type_mgr& mgr);
|
|
||||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -62,8 +55,7 @@ struct ast_lid : public ast {
|
|||||||
: id(std::move(i)) {}
|
: id(std::move(i)) {}
|
||||||
|
|
||||||
void print(int indent, std::ostream& to) const;
|
void print(int indent, std::ostream& to) const;
|
||||||
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
|
type_ptr typecheck(type_mgr& mgr, const type_env& env);
|
||||||
type_ptr typecheck(type_mgr& mgr);
|
|
||||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -74,8 +66,7 @@ struct ast_uid : public ast {
|
|||||||
: id(std::move(i)) {}
|
: id(std::move(i)) {}
|
||||||
|
|
||||||
void print(int indent, std::ostream& to) const;
|
void print(int indent, std::ostream& to) const;
|
||||||
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
|
type_ptr typecheck(type_mgr& mgr, const type_env& env);
|
||||||
type_ptr typecheck(type_mgr& mgr);
|
|
||||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -88,8 +79,7 @@ struct ast_binop : public ast {
|
|||||||
: op(o), left(std::move(l)), right(std::move(r)) {}
|
: op(o), left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
void print(int indent, std::ostream& to) const;
|
void print(int indent, std::ostream& to) const;
|
||||||
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
|
type_ptr typecheck(type_mgr& mgr, const type_env& env);
|
||||||
type_ptr typecheck(type_mgr& mgr);
|
|
||||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -101,8 +91,7 @@ struct ast_app : public ast {
|
|||||||
: left(std::move(l)), right(std::move(r)) {}
|
: left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
void print(int indent, std::ostream& to) const;
|
void print(int indent, std::ostream& to) const;
|
||||||
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
|
type_ptr typecheck(type_mgr& mgr, const type_env& env);
|
||||||
type_ptr typecheck(type_mgr& mgr);
|
|
||||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -115,8 +104,7 @@ struct ast_case : public ast {
|
|||||||
: of(std::move(o)), branches(std::move(b)) {}
|
: of(std::move(o)), branches(std::move(b)) {}
|
||||||
|
|
||||||
void print(int indent, std::ostream& to) const;
|
void print(int indent, std::ostream& to) const;
|
||||||
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
|
type_ptr typecheck(type_mgr& mgr, const type_env& env);
|
||||||
type_ptr typecheck(type_mgr& mgr);
|
|
||||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -127,8 +115,7 @@ struct pattern_var : public pattern {
|
|||||||
: var(std::move(v)) {}
|
: var(std::move(v)) {}
|
||||||
|
|
||||||
void print(std::ostream &to) const;
|
void print(std::ostream &to) const;
|
||||||
void insert_bindings(type_mgr& mgr, type_env_ptr& env) const;
|
void match(type_ptr t, type_mgr& mgr, type_env& env) const;
|
||||||
void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct pattern_constr : public pattern {
|
struct pattern_constr : public pattern {
|
||||||
@ -139,6 +126,5 @@ struct pattern_constr : public pattern {
|
|||||||
: constr(std::move(c)), params(std::move(p)) {}
|
: constr(std::move(c)), params(std::move(p)) {}
|
||||||
|
|
||||||
void print(std::ostream &to) const;
|
void print(std::ostream &to) const;
|
||||||
virtual void insert_bindings(type_mgr& mgr, type_env_ptr& env) const;
|
void match(type_ptr t, type_mgr&, type_env& env) const;
|
||||||
virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const;
|
|
||||||
};
|
};
|
||||||
|
@ -3,34 +3,35 @@
|
|||||||
#include "ast.hpp"
|
#include "ast.hpp"
|
||||||
#include "instruction.hpp"
|
#include "instruction.hpp"
|
||||||
#include "llvm_context.hpp"
|
#include "llvm_context.hpp"
|
||||||
#include "type.hpp"
|
|
||||||
#include "type_env.hpp"
|
|
||||||
#include <llvm/IR/DerivedTypes.h>
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
#include <llvm/IR/Function.h>
|
#include <llvm/IR/Function.h>
|
||||||
#include <llvm/IR/Type.h>
|
#include <llvm/IR/Type.h>
|
||||||
|
|
||||||
void definition_defn::find_free(type_mgr& mgr, type_env_ptr& env) {
|
void definition_defn::typecheck_first(type_mgr& mgr, type_env& env) {
|
||||||
this->env = env;
|
|
||||||
|
|
||||||
var_env = type_scope(env);
|
|
||||||
return_type = mgr.new_type();
|
return_type = mgr.new_type();
|
||||||
full_type = return_type;
|
type_ptr full_type = return_type;
|
||||||
|
|
||||||
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||||
type_ptr param_type = mgr.new_type();
|
type_ptr param_type = mgr.new_type();
|
||||||
full_type = type_ptr(new type_arr(param_type, full_type));
|
full_type = type_ptr(new type_arr(param_type, full_type));
|
||||||
var_env->bind(*it, param_type);
|
param_types.push_back(param_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
body->find_free(mgr, var_env, free_variables);
|
env.bind(name, full_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
void definition_defn::insert_types(type_mgr& mgr) {
|
void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const {
|
||||||
env->bind(name, full_type);
|
type_env new_env = env.scope();
|
||||||
|
auto param_it = params.begin();
|
||||||
|
auto type_it = param_types.rbegin();
|
||||||
|
|
||||||
|
while(param_it != params.end() && type_it != param_types.rend()) {
|
||||||
|
new_env.bind(*param_it, *type_it);
|
||||||
|
param_it++;
|
||||||
|
type_it++;
|
||||||
}
|
}
|
||||||
|
|
||||||
void definition_defn::typecheck(type_mgr& mgr) {
|
type_ptr body_type = body->typecheck(mgr, new_env);
|
||||||
type_ptr body_type = body->typecheck(mgr);
|
|
||||||
mgr.unify(return_type, body_type);
|
mgr.unify(return_type, body_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -43,12 +44,11 @@ void definition_defn::compile() {
|
|||||||
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
|
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
|
||||||
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
|
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
|
||||||
}
|
}
|
||||||
|
void definition_defn::gen_llvm_first(llvm_context& ctx) {
|
||||||
void definition_defn::declare_llvm(llvm_context& ctx) {
|
|
||||||
generated_function = ctx.create_custom_function(name, params.size());
|
generated_function = ctx.create_custom_function(name, params.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
void definition_defn::generate_llvm(llvm_context& ctx) {
|
void definition_defn::gen_llvm_second(llvm_context& ctx) {
|
||||||
ctx.builder.SetInsertPoint(&generated_function->getEntryBlock());
|
ctx.builder.SetInsertPoint(&generated_function->getEntryBlock());
|
||||||
for(auto& instruction : instructions) {
|
for(auto& instruction : instructions) {
|
||||||
instruction->gen_llvm(ctx, generated_function);
|
instruction->gen_llvm(ctx, generated_function);
|
||||||
@ -56,14 +56,9 @@ void definition_defn::generate_llvm(llvm_context& ctx) {
|
|||||||
ctx.builder.CreateRetVoid();
|
ctx.builder.CreateRetVoid();
|
||||||
}
|
}
|
||||||
|
|
||||||
void definition_data::insert_types(type_mgr& mgr, type_env_ptr& env) {
|
void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
|
||||||
this->env = env;
|
type_data* this_type = new type_data(name);
|
||||||
env->bind_type(name, type_ptr(new type_data(name)));
|
type_ptr return_type = type_ptr(this_type);
|
||||||
}
|
|
||||||
|
|
||||||
void definition_data::insert_constructors() const {
|
|
||||||
type_ptr return_type = env->lookup_type(name);
|
|
||||||
type_data* this_type = static_cast<type_data*>(return_type.get());
|
|
||||||
int next_tag = 0;
|
int next_tag = 0;
|
||||||
|
|
||||||
for(auto& constructor : constructors) {
|
for(auto& constructor : constructors) {
|
||||||
@ -72,16 +67,23 @@ void definition_data::insert_constructors() const {
|
|||||||
|
|
||||||
type_ptr full_type = return_type;
|
type_ptr full_type = return_type;
|
||||||
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
|
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
|
||||||
type_ptr type = env->lookup_type(*it);
|
type_ptr type = type_ptr(new type_base(*it));
|
||||||
if(!type) throw 0;
|
|
||||||
full_type = type_ptr(new type_arr(type, full_type));
|
full_type = type_ptr(new type_arr(type, full_type));
|
||||||
}
|
}
|
||||||
|
|
||||||
env->bind(constructor->name, full_type);
|
env.bind(constructor->name, full_type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void definition_data::generate_llvm(llvm_context& ctx) {
|
void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::compile() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::gen_llvm_first(llvm_context& ctx) {
|
||||||
for(auto& constructor : constructors) {
|
for(auto& constructor : constructors) {
|
||||||
auto new_function =
|
auto new_function =
|
||||||
ctx.create_custom_function(constructor->name, constructor->types.size());
|
ctx.create_custom_function(constructor->name, constructor->types.size());
|
||||||
@ -97,3 +99,7 @@ void definition_data::generate_llvm(llvm_context& ctx) {
|
|||||||
ctx.builder.CreateRetVoid();
|
ctx.builder.CreateRetVoid();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void definition_data::gen_llvm_second(llvm_context& ctx) {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <set>
|
|
||||||
#include "instruction.hpp"
|
#include "instruction.hpp"
|
||||||
#include "llvm_context.hpp"
|
#include "llvm_context.hpp"
|
||||||
#include "type_env.hpp"
|
#include "type_env.hpp"
|
||||||
@ -9,6 +8,18 @@
|
|||||||
struct ast;
|
struct ast;
|
||||||
using ast_ptr = std::unique_ptr<ast>;
|
using ast_ptr = std::unique_ptr<ast>;
|
||||||
|
|
||||||
|
struct definition {
|
||||||
|
virtual ~definition() = default;
|
||||||
|
|
||||||
|
virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0;
|
||||||
|
virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0;
|
||||||
|
virtual void compile() = 0;
|
||||||
|
virtual void gen_llvm_first(llvm_context& ctx) = 0;
|
||||||
|
virtual void gen_llvm_second(llvm_context& ctx) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using definition_ptr = std::unique_ptr<definition>;
|
||||||
|
|
||||||
struct constructor {
|
struct constructor {
|
||||||
std::string name;
|
std::string name;
|
||||||
std::vector<std::string> types;
|
std::vector<std::string> types;
|
||||||
@ -20,16 +31,13 @@ struct constructor {
|
|||||||
|
|
||||||
using constructor_ptr = std::unique_ptr<constructor>;
|
using constructor_ptr = std::unique_ptr<constructor>;
|
||||||
|
|
||||||
struct definition_defn {
|
struct definition_defn : public definition {
|
||||||
std::string name;
|
std::string name;
|
||||||
std::vector<std::string> params;
|
std::vector<std::string> params;
|
||||||
ast_ptr body;
|
ast_ptr body;
|
||||||
|
|
||||||
type_env_ptr env;
|
|
||||||
type_env_ptr var_env;
|
|
||||||
std::set<std::string> free_variables;
|
|
||||||
type_ptr full_type;
|
|
||||||
type_ptr return_type;
|
type_ptr return_type;
|
||||||
|
std::vector<type_ptr> param_types;
|
||||||
|
|
||||||
std::vector<instruction_ptr> instructions;
|
std::vector<instruction_ptr> instructions;
|
||||||
|
|
||||||
@ -40,28 +48,23 @@ struct definition_defn {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void find_free(type_mgr& mgr, type_env_ptr& env);
|
void typecheck_first(type_mgr& mgr, type_env& env);
|
||||||
void insert_types(type_mgr& mgr);
|
void typecheck_second(type_mgr& mgr, const type_env& env) const;
|
||||||
void typecheck(type_mgr& mgr);
|
|
||||||
void compile();
|
void compile();
|
||||||
void declare_llvm(llvm_context& ctx);
|
void gen_llvm_first(llvm_context& ctx);
|
||||||
void generate_llvm(llvm_context& ctx);
|
void gen_llvm_second(llvm_context& ctx);
|
||||||
};
|
};
|
||||||
|
|
||||||
using definition_defn_ptr = std::unique_ptr<definition_defn>;
|
struct definition_data : public definition {
|
||||||
|
|
||||||
struct definition_data {
|
|
||||||
std::string name;
|
std::string name;
|
||||||
std::vector<constructor_ptr> constructors;
|
std::vector<constructor_ptr> constructors;
|
||||||
|
|
||||||
type_env_ptr env;
|
|
||||||
|
|
||||||
definition_data(std::string n, std::vector<constructor_ptr> cs)
|
definition_data(std::string n, std::vector<constructor_ptr> cs)
|
||||||
: name(std::move(n)), constructors(std::move(cs)) {}
|
: name(std::move(n)), constructors(std::move(cs)) {}
|
||||||
|
|
||||||
void insert_types(type_mgr& mgr, type_env_ptr& env);
|
void typecheck_first(type_mgr& mgr, type_env& env);
|
||||||
void insert_constructors() const;
|
void typecheck_second(type_mgr& mgr, const type_env& env) const;
|
||||||
void generate_llvm(llvm_context& ctx);
|
void compile();
|
||||||
|
void gen_llvm_first(llvm_context& ctx);
|
||||||
|
void gen_llvm_second(llvm_context& ctx);
|
||||||
};
|
};
|
||||||
|
|
||||||
using definition_data_ptr = std::unique_ptr<definition_data>;
|
|
||||||
|
@ -1,8 +0,0 @@
|
|||||||
data Bool = { True, False }
|
|
||||||
defn if c t e = {
|
|
||||||
case c of {
|
|
||||||
True -> { t }
|
|
||||||
False -> { e }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
defn main = { if (if True False True) 11 3 }
|
|
@ -46,7 +46,115 @@ class function_graph {
|
|||||||
std::map<group_id, data_ptr>&);
|
std::map<group_id, data_ptr>&);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
std::set<function>& add_function(const function& f);
|
|
||||||
void add_edge(const function& from, const function& to);
|
void add_edge(const function& from, const function& to);
|
||||||
std::vector<group_ptr> compute_order();
|
std::vector<group_ptr> compute_order();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
std::set<function_graph::edge> function_graph::compute_transitive_edges() {
|
||||||
|
std::set<edge> transitive_edges;
|
||||||
|
transitive_edges.insert(edges.begin(), edges.end());
|
||||||
|
for(auto& connector : adjacency_lists) {
|
||||||
|
for(auto& from : adjacency_lists) {
|
||||||
|
edge to_connector { from.first, connector.first };
|
||||||
|
for(auto& to : adjacency_lists) {
|
||||||
|
edge full_jump { from.first, to.first };
|
||||||
|
if(transitive_edges.find(full_jump) != transitive_edges.end()) continue;
|
||||||
|
|
||||||
|
edge from_connector { connector.first, to.first };
|
||||||
|
if(transitive_edges.find(to_connector) != transitive_edges.end() &&
|
||||||
|
transitive_edges.find(from_connector) != transitive_edges.end())
|
||||||
|
transitive_edges.insert(std::move(full_jump));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return transitive_edges;
|
||||||
|
}
|
||||||
|
|
||||||
|
void function_graph::create_groups(
|
||||||
|
const std::set<edge>& transitive_edges,
|
||||||
|
std::map<function, group_id>& group_ids,
|
||||||
|
std::map<group_id, data_ptr>& group_data_map) {
|
||||||
|
group_id id_counter = 0;
|
||||||
|
for(auto& vertex : adjacency_lists) {
|
||||||
|
if(group_ids.find(vertex.first) != group_ids.end())
|
||||||
|
continue;
|
||||||
|
data_ptr new_group(new group_data);
|
||||||
|
new_group->functions.insert(vertex.first);
|
||||||
|
group_data_map[id_counter] = new_group;
|
||||||
|
group_ids[vertex.first] = id_counter;
|
||||||
|
for(auto& other_vertex : adjacency_lists) {
|
||||||
|
if(transitive_edges.find({vertex.first, other_vertex.first}) != transitive_edges.end() &&
|
||||||
|
transitive_edges.find({other_vertex.first, vertex.first}) != transitive_edges.end()) {
|
||||||
|
group_ids[other_vertex.first] = id_counter;
|
||||||
|
new_group->functions.insert(other_vertex.first);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
id_counter++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void function_graph::create_edges(
|
||||||
|
std::map<function, group_id>& group_ids,
|
||||||
|
std::map<group_id, data_ptr>& group_data_map) {
|
||||||
|
std::set<std::pair<group_id, group_id>> group_edges;
|
||||||
|
for(auto& vertex : adjacency_lists) {
|
||||||
|
auto vertex_id = group_ids[vertex.first];
|
||||||
|
auto& vertex_data = group_data_map[vertex_id];
|
||||||
|
for(auto& other_vertex : vertex.second) {
|
||||||
|
auto other_id = group_ids[other_vertex];
|
||||||
|
if(vertex_id == other_id) continue;
|
||||||
|
if(group_edges.find({vertex_id, other_id}) != group_edges.end())
|
||||||
|
continue;
|
||||||
|
group_edges.insert({vertex_id, other_id});
|
||||||
|
vertex_data->adjacency_list.insert(other_id);
|
||||||
|
group_data_map[other_id]->indegree++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<group_ptr> function_graph::generate_order(
|
||||||
|
std::map<function, group_id>& group_ids,
|
||||||
|
std::map<group_id, data_ptr>& group_data_map) {
|
||||||
|
std::queue<group_id> id_queue;
|
||||||
|
std::vector<group_ptr> output;
|
||||||
|
for(auto& group : group_data_map) {
|
||||||
|
if(group.second->indegree == 0) id_queue.push(group.first);
|
||||||
|
}
|
||||||
|
|
||||||
|
while(!id_queue.empty()) {
|
||||||
|
auto new_id = id_queue.front();
|
||||||
|
auto& group_data = group_data_map[new_id];
|
||||||
|
group_ptr output_group(new group);
|
||||||
|
output_group->members = std::move(group_data->functions);
|
||||||
|
id_queue.pop();
|
||||||
|
|
||||||
|
for(auto& adjacent_group : group_data->adjacency_list) {
|
||||||
|
if(--group_data_map[adjacent_group]->indegree == 0)
|
||||||
|
id_queue.push(adjacent_group);
|
||||||
|
}
|
||||||
|
|
||||||
|
output.push_back(std::move(output_group));
|
||||||
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
void function_graph::add_edge(const function& from, const function& to) {
|
||||||
|
auto adjacency_list_it = adjacency_lists.find(from);
|
||||||
|
if(adjacency_list_it != adjacency_lists.end()) {
|
||||||
|
adjacency_list_it->second.insert(to);
|
||||||
|
} else {
|
||||||
|
adjacency_lists[from] = { to };
|
||||||
|
}
|
||||||
|
edges.insert({ from, to });
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<group_ptr> function_graph::compute_order() {
|
||||||
|
std::set<edge> transitive_edges = compute_transitive_edges();
|
||||||
|
std::map<function, group_id> group_ids;
|
||||||
|
std::map<group_id, data_ptr> group_data_map;
|
||||||
|
|
||||||
|
create_groups(transitive_edges, group_ids, group_data_map);
|
||||||
|
create_edges(group_ids, group_data_map);
|
||||||
|
return generate_order(group_ids, group_data_map);
|
||||||
|
}
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include "binop.hpp"
|
#include "binop.hpp"
|
||||||
#include "definition.hpp"
|
#include "definition.hpp"
|
||||||
#include "graph.hpp"
|
|
||||||
#include "instruction.hpp"
|
#include "instruction.hpp"
|
||||||
#include "llvm_context.hpp"
|
#include "llvm_context.hpp"
|
||||||
#include "parser.hpp"
|
#include "parser.hpp"
|
||||||
@ -21,72 +20,43 @@ void yy::parser::error(const std::string& msg) {
|
|||||||
std::cout << "An error occured: " << msg << std::endl;
|
std::cout << "An error occured: " << msg << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern std::map<std::string, definition_data_ptr> defs_data;
|
extern std::vector<definition_ptr> program;
|
||||||
extern std::map<std::string, definition_defn_ptr> defs_defn;
|
|
||||||
|
|
||||||
void typecheck_program(
|
void typecheck_program(
|
||||||
const std::map<std::string, definition_data_ptr>& defs_data,
|
const std::vector<definition_ptr>& prog,
|
||||||
const std::map<std::string, definition_defn_ptr>& defs_defn,
|
type_mgr& mgr, type_env& env) {
|
||||||
type_mgr& mgr, type_env_ptr& env) {
|
|
||||||
type_ptr int_type = type_ptr(new type_base("Int"));
|
type_ptr int_type = type_ptr(new type_base("Int"));
|
||||||
env->bind_type("Int", int_type);
|
|
||||||
|
|
||||||
type_ptr binop_type = type_ptr(new type_arr(
|
type_ptr binop_type = type_ptr(new type_arr(
|
||||||
int_type,
|
int_type,
|
||||||
type_ptr(new type_arr(int_type, int_type))));
|
type_ptr(new type_arr(int_type, int_type))));
|
||||||
env->bind("+", binop_type);
|
|
||||||
env->bind("-", binop_type);
|
|
||||||
env->bind("*", binop_type);
|
|
||||||
env->bind("/", binop_type);
|
|
||||||
|
|
||||||
for(auto& def_data : defs_data) {
|
env.bind("+", binop_type);
|
||||||
def_data.second->insert_types(mgr, env);
|
env.bind("-", binop_type);
|
||||||
}
|
env.bind("*", binop_type);
|
||||||
for(auto& def_data : defs_data) {
|
env.bind("/", binop_type);
|
||||||
def_data.second->insert_constructors();
|
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->typecheck_first(mgr, env);
|
||||||
}
|
}
|
||||||
|
|
||||||
function_graph dependency_graph;
|
for(auto& def : prog) {
|
||||||
|
def->typecheck_second(mgr, env);
|
||||||
for(auto& def_defn : defs_defn) {
|
|
||||||
def_defn.second->find_free(mgr, env);
|
|
||||||
dependency_graph.add_function(def_defn.second->name);
|
|
||||||
|
|
||||||
for(auto& dependency : def_defn.second->free_variables) {
|
|
||||||
if(defs_defn.find(dependency) == defs_defn.end())
|
|
||||||
throw 0;
|
|
||||||
dependency_graph.add_edge(def_defn.second->name, dependency);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<group_ptr> groups = dependency_graph.compute_order();
|
for(auto& pair : env.names) {
|
||||||
for(auto it = groups.rbegin(); it != groups.rend(); it++) {
|
|
||||||
auto& group = *it;
|
|
||||||
for(auto& def_defnn_name : group->members) {
|
|
||||||
auto& def_defn = defs_defn.find(def_defnn_name)->second;
|
|
||||||
def_defn->insert_types(mgr);
|
|
||||||
}
|
|
||||||
for(auto& def_defnn_name : group->members) {
|
|
||||||
auto& def_defn = defs_defn.find(def_defnn_name)->second;
|
|
||||||
def_defn->typecheck(mgr);
|
|
||||||
}
|
|
||||||
for(auto& def_defnn_name : group->members) {
|
|
||||||
env->generalize(def_defnn_name, mgr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for(auto& pair : env->names) {
|
|
||||||
std::cout << pair.first << ": ";
|
std::cout << pair.first << ": ";
|
||||||
pair.second->print(mgr, std::cout);
|
pair.second->print(mgr, std::cout);
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void compile_program(const std::map<std::string, definition_defn_ptr>& defs_defn) {
|
void compile_program(const std::vector<definition_ptr>& prog) {
|
||||||
for(auto& def_defn : defs_defn) {
|
for(auto& def : prog) {
|
||||||
def_defn.second->compile();
|
def->compile();
|
||||||
|
|
||||||
for(auto& instruction : def_defn.second->instructions) {
|
definition_defn* defn = dynamic_cast<definition_defn*>(def.get());
|
||||||
|
if(!defn) continue;
|
||||||
|
for(auto& instruction : defn->instructions) {
|
||||||
instruction->print(0, std::cout);
|
instruction->print(0, std::cout);
|
||||||
}
|
}
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
@ -150,25 +120,20 @@ void output_llvm(llvm_context& ctx, const std::string& filename) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void gen_llvm(
|
void gen_llvm(const std::vector<definition_ptr>& prog) {
|
||||||
const std::map<std::string, definition_data_ptr>& defs_data,
|
|
||||||
const std::map<std::string, definition_defn_ptr>& defs_defn) {
|
|
||||||
llvm_context ctx;
|
llvm_context ctx;
|
||||||
gen_llvm_internal_op(ctx, PLUS);
|
gen_llvm_internal_op(ctx, PLUS);
|
||||||
gen_llvm_internal_op(ctx, MINUS);
|
gen_llvm_internal_op(ctx, MINUS);
|
||||||
gen_llvm_internal_op(ctx, TIMES);
|
gen_llvm_internal_op(ctx, TIMES);
|
||||||
gen_llvm_internal_op(ctx, DIVIDE);
|
gen_llvm_internal_op(ctx, DIVIDE);
|
||||||
|
|
||||||
for(auto& def_data : defs_data) {
|
for(auto& definition : prog) {
|
||||||
def_data.second->generate_llvm(ctx);
|
definition->gen_llvm_first(ctx);
|
||||||
}
|
|
||||||
for(auto& def_defn : defs_defn) {
|
|
||||||
def_defn.second->declare_llvm(ctx);
|
|
||||||
}
|
|
||||||
for(auto& def_defn : defs_defn) {
|
|
||||||
def_defn.second->generate_llvm(ctx);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for(auto& definition : prog) {
|
||||||
|
definition->gen_llvm_second(ctx);
|
||||||
|
}
|
||||||
ctx.module.print(llvm::outs(), nullptr);
|
ctx.module.print(llvm::outs(), nullptr);
|
||||||
output_llvm(ctx, "program.o");
|
output_llvm(ctx, "program.o");
|
||||||
}
|
}
|
||||||
@ -176,20 +141,23 @@ void gen_llvm(
|
|||||||
int main() {
|
int main() {
|
||||||
yy::parser parser;
|
yy::parser parser;
|
||||||
type_mgr mgr;
|
type_mgr mgr;
|
||||||
type_env_ptr env(new type_env);
|
type_env env;
|
||||||
|
|
||||||
parser.parse();
|
parser.parse();
|
||||||
for(auto& def_defn : defs_defn) {
|
for(auto& definition : program) {
|
||||||
std::cout << def_defn.second->name;
|
definition_defn* def = dynamic_cast<definition_defn*>(definition.get());
|
||||||
for(auto& param : def_defn.second->params) std::cout << " " << param;
|
if(!def) continue;
|
||||||
|
|
||||||
|
std::cout << def->name;
|
||||||
|
for(auto& param : def->params) std::cout << " " << param;
|
||||||
std::cout << ":" << std::endl;
|
std::cout << ":" << std::endl;
|
||||||
|
|
||||||
def_defn.second->body->print(1, std::cout);
|
def->body->print(1, std::cout);
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
typecheck_program(defs_data, defs_defn, mgr, env);
|
typecheck_program(program, mgr, env);
|
||||||
compile_program(defs_defn);
|
compile_program(program);
|
||||||
gen_llvm(defs_data, defs_defn);
|
gen_llvm(program);
|
||||||
} catch(unification_error& err) {
|
} catch(unification_error& err) {
|
||||||
std::cout << "failed to unify types: " << std::endl;
|
std::cout << "failed to unify types: " << std::endl;
|
||||||
std::cout << " (1) \033[34m";
|
std::cout << " (1) \033[34m";
|
||||||
|
@ -1,14 +1,11 @@
|
|||||||
%{
|
%{
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <map>
|
|
||||||
#include "ast.hpp"
|
#include "ast.hpp"
|
||||||
#include "definition.hpp"
|
#include "definition.hpp"
|
||||||
#include "parser.hpp"
|
#include "parser.hpp"
|
||||||
|
|
||||||
std::map<std::string, definition_data_ptr> defs_data;
|
std::vector<definition_ptr> program;
|
||||||
std::map<std::string, definition_defn_ptr> defs_defn;
|
|
||||||
|
|
||||||
extern yy::parser::symbol_type yylex();
|
extern yy::parser::symbol_type yylex();
|
||||||
|
|
||||||
%}
|
%}
|
||||||
@ -37,11 +34,11 @@ extern yy::parser::symbol_type yylex();
|
|||||||
%define api.token.constructor
|
%define api.token.constructor
|
||||||
|
|
||||||
%type <std::vector<std::string>> lowercaseParams uppercaseParams
|
%type <std::vector<std::string>> lowercaseParams uppercaseParams
|
||||||
|
%type <std::vector<definition_ptr>> program definitions
|
||||||
%type <std::vector<branch_ptr>> branches
|
%type <std::vector<branch_ptr>> branches
|
||||||
%type <std::vector<constructor_ptr>> constructors
|
%type <std::vector<constructor_ptr>> constructors
|
||||||
%type <ast_ptr> aAdd aMul case app appBase
|
%type <ast_ptr> aAdd aMul case app appBase
|
||||||
%type <definition_data_ptr> data
|
%type <definition_ptr> definition defn data
|
||||||
%type <definition_defn_ptr> defn
|
|
||||||
%type <branch_ptr> branch
|
%type <branch_ptr> branch
|
||||||
%type <pattern_ptr> pattern
|
%type <pattern_ptr> pattern
|
||||||
%type <constructor_ptr> constructor
|
%type <constructor_ptr> constructor
|
||||||
@ -51,22 +48,22 @@ extern yy::parser::symbol_type yylex();
|
|||||||
%%
|
%%
|
||||||
|
|
||||||
program
|
program
|
||||||
: definitions { }
|
: definitions { program = std::move($1); }
|
||||||
;
|
;
|
||||||
|
|
||||||
definitions
|
definitions
|
||||||
: definitions definition { }
|
: definitions definition { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
| definition { }
|
| definition { $$ = std::vector<definition_ptr>(); $$.push_back(std::move($1)); }
|
||||||
;
|
;
|
||||||
|
|
||||||
definition
|
definition
|
||||||
: defn { auto name = $1->name; defs_defn[name] = std::move($1); }
|
: defn { $$ = std::move($1); }
|
||||||
| data { auto name = $1->name; defs_data[name] = std::move($1); }
|
| data { $$ = std::move($1); }
|
||||||
;
|
;
|
||||||
|
|
||||||
defn
|
defn
|
||||||
: DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY
|
: DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY
|
||||||
{ $$ = definition_defn_ptr(
|
{ $$ = definition_ptr(
|
||||||
new definition_defn(std::move($2), std::move($3), std::move($6))); }
|
new definition_defn(std::move($2), std::move($3), std::move($6))); }
|
||||||
;
|
;
|
||||||
|
|
||||||
@ -128,7 +125,7 @@ pattern
|
|||||||
|
|
||||||
data
|
data
|
||||||
: DATA UID EQUAL OCURLY constructors CCURLY
|
: DATA UID EQUAL OCURLY constructors CCURLY
|
||||||
{ $$ = definition_data_ptr(new definition_data(std::move($2), std::move($5))); }
|
{ $$ = definition_ptr(new definition_data(std::move($2), std::move($5))); }
|
||||||
;
|
;
|
||||||
|
|
||||||
constructors
|
constructors
|
||||||
|
@ -1,45 +1,8 @@
|
|||||||
#include "type.hpp"
|
#include "type.hpp"
|
||||||
#include <ostream>
|
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "error.hpp"
|
#include "error.hpp"
|
||||||
|
|
||||||
void type_scheme::print(const type_mgr& mgr, std::ostream& to) const {
|
|
||||||
if(forall.size() != 0) {
|
|
||||||
to << "forall ";
|
|
||||||
for(auto& var : forall) {
|
|
||||||
to << var << " ";
|
|
||||||
}
|
|
||||||
to << ". ";
|
|
||||||
}
|
|
||||||
monotype->print(mgr, to);
|
|
||||||
}
|
|
||||||
|
|
||||||
type_ptr substitute(const type_mgr& mgr, const std::map<std::string, type_ptr>& subst, const type_ptr& t) {
|
|
||||||
type_var* var;
|
|
||||||
type_ptr resolved = mgr.resolve(t, var);
|
|
||||||
if(var) {
|
|
||||||
auto subst_it = subst.find(var->name);
|
|
||||||
if(subst_it == subst.end()) return resolved;
|
|
||||||
return subst_it->second;
|
|
||||||
} else if(type_arr* arr = dynamic_cast<type_arr*>(t.get())) {
|
|
||||||
auto left_result = substitute(mgr, subst, arr->left);
|
|
||||||
auto right_result = substitute(mgr, subst, arr->right);
|
|
||||||
if(left_result == arr->left && right_result == arr->right) return t;
|
|
||||||
return type_ptr(new type_arr(left_result, right_result));
|
|
||||||
}
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
|
|
||||||
type_ptr type_scheme::instantiate(type_mgr& mgr) const {
|
|
||||||
if(forall.size() == 0) return monotype;
|
|
||||||
std::map<std::string, type_ptr> subst;
|
|
||||||
for(auto& var : forall) {
|
|
||||||
subst[var] = mgr.new_type();
|
|
||||||
}
|
|
||||||
return substitute(mgr, subst, monotype);
|
|
||||||
}
|
|
||||||
|
|
||||||
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
auto it = mgr.types.find(name);
|
auto it = mgr.types.find(name);
|
||||||
if(it != mgr.types.end()) {
|
if(it != mgr.types.end()) {
|
||||||
@ -134,15 +97,3 @@ void type_mgr::bind(const std::string& s, type_ptr t) {
|
|||||||
if(other && other->name == s) return;
|
if(other && other->name == s) return;
|
||||||
types[s] = t;
|
types[s] = t;
|
||||||
}
|
}
|
||||||
|
|
||||||
void type_mgr::find_free(const type_ptr& t, std::set<std::string>& into) const {
|
|
||||||
type_var* var;
|
|
||||||
type_ptr resolved = resolve(t, var);
|
|
||||||
|
|
||||||
if(var) {
|
|
||||||
into.insert(var->name);
|
|
||||||
} else if(type_arr* arr = dynamic_cast<type_arr*>(resolved.get())) {
|
|
||||||
find_free(arr->left, into);
|
|
||||||
find_free(arr->right, into);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <vector>
|
|
||||||
#include <set>
|
|
||||||
|
|
||||||
struct type_mgr;
|
struct type_mgr;
|
||||||
|
|
||||||
@ -14,18 +12,6 @@ struct type {
|
|||||||
|
|
||||||
using type_ptr = std::shared_ptr<type>;
|
using type_ptr = std::shared_ptr<type>;
|
||||||
|
|
||||||
struct type_scheme {
|
|
||||||
std::vector<std::string> forall;
|
|
||||||
type_ptr monotype;
|
|
||||||
|
|
||||||
type_scheme(type_ptr type) : forall(), monotype(std::move(type)) {}
|
|
||||||
|
|
||||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
|
||||||
type_ptr instantiate(type_mgr& mgr) const;
|
|
||||||
};
|
|
||||||
|
|
||||||
using type_scheme_ptr = std::shared_ptr<type_scheme>;
|
|
||||||
|
|
||||||
struct type_var : public type {
|
struct type_var : public type {
|
||||||
std::string name;
|
std::string name;
|
||||||
|
|
||||||
@ -76,5 +62,4 @@ struct type_mgr {
|
|||||||
void unify(type_ptr l, type_ptr r);
|
void unify(type_ptr l, type_ptr r);
|
||||||
type_ptr resolve(type_ptr t, type_var*& var) const;
|
type_ptr resolve(type_ptr t, type_var*& var) const;
|
||||||
void bind(const std::string& s, type_ptr t);
|
void bind(const std::string& s, type_ptr t);
|
||||||
void find_free(const type_ptr& t, std::set<std::string>& into) const;
|
|
||||||
};
|
};
|
||||||
|
@ -1,45 +1,16 @@
|
|||||||
#include "type_env.hpp"
|
#include "type_env.hpp"
|
||||||
#include "type.hpp"
|
|
||||||
|
|
||||||
type_scheme_ptr type_env::lookup(const std::string& name) const {
|
type_ptr type_env::lookup(const std::string& name) const {
|
||||||
auto it = names.find(name);
|
auto it = names.find(name);
|
||||||
if(it != names.end()) return it->second;
|
if(it != names.end()) return it->second;
|
||||||
if(parent) return parent->lookup(name);
|
if(parent) return parent->lookup(name);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
type_ptr type_env::lookup_type(const std::string& name) const {
|
|
||||||
auto it = type_names.find(name);
|
|
||||||
if(it != type_names.end()) return it->second;
|
|
||||||
if(parent) return parent->lookup_type(name);
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
void type_env::bind(const std::string& name, type_ptr t) {
|
void type_env::bind(const std::string& name, type_ptr t) {
|
||||||
names[name] = type_scheme_ptr(new type_scheme(t));
|
|
||||||
}
|
|
||||||
|
|
||||||
void type_env::bind(const std::string& name, type_scheme_ptr t) {
|
|
||||||
names[name] = t;
|
names[name] = t;
|
||||||
}
|
}
|
||||||
|
|
||||||
void type_env::bind_type(const std::string& type_name, type_ptr t) {
|
type_env type_env::scope() const {
|
||||||
if(lookup_type(type_name) != nullptr) throw 0;
|
return type_env(this);
|
||||||
type_names[type_name] = t;
|
|
||||||
}
|
|
||||||
|
|
||||||
void type_env::generalize(const std::string& name, type_mgr& mgr) {
|
|
||||||
auto names_it = names.find(name);
|
|
||||||
if(names_it == names.end()) throw 0;
|
|
||||||
if(names_it->second->forall.size() > 0) throw 0;
|
|
||||||
|
|
||||||
std::set<std::string> free_variables;
|
|
||||||
mgr.find_free(names_it->second->monotype, free_variables);
|
|
||||||
for(auto& free : free_variables) {
|
|
||||||
names_it->second->forall.push_back(free);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type_env_ptr type_scope(type_env_ptr parent) {
|
|
||||||
return type_env_ptr(new type_env(std::move(parent)));
|
|
||||||
}
|
}
|
||||||
|
@ -2,24 +2,15 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include "type.hpp"
|
#include "type.hpp"
|
||||||
|
|
||||||
struct type_env;
|
|
||||||
using type_env_ptr = std::shared_ptr<type_env>;
|
|
||||||
|
|
||||||
struct type_env {
|
struct type_env {
|
||||||
type_env_ptr parent;
|
std::map<std::string, type_ptr> names;
|
||||||
std::map<std::string, type_scheme_ptr> names;
|
type_env const* parent = nullptr;
|
||||||
std::map<std::string, type_ptr> type_names;
|
|
||||||
|
|
||||||
type_env(type_env_ptr p) : parent(std::move(p)) {}
|
type_env(type_env const* p)
|
||||||
|
: parent(p) {}
|
||||||
type_env() : type_env(nullptr) {}
|
type_env() : type_env(nullptr) {}
|
||||||
|
|
||||||
type_scheme_ptr lookup(const std::string& name) const;
|
type_ptr lookup(const std::string& name) const;
|
||||||
type_ptr lookup_type(const std::string& name) const;
|
|
||||||
void bind(const std::string& name, type_ptr t);
|
void bind(const std::string& name, type_ptr t);
|
||||||
void bind(const std::string& name, type_scheme_ptr t);
|
type_env scope() const;
|
||||||
void bind_type(const std::string& type_name, type_ptr t);
|
|
||||||
void generalize(const std::string& name, type_mgr& mgr);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
type_env_ptr type_scope(type_env_ptr parent);
|
|
||||||
|
@ -257,8 +257,7 @@ they are placed in one group. We then construct a dependency graph __of these gr
|
|||||||
3. We compute a topological order of the group graph. This helps us typecheck the dependencies
|
3. We compute a topological order of the group graph. This helps us typecheck the dependencies
|
||||||
of functions before checking the functions themselves. In our specific case, this would ensure
|
of functions before checking the functions themselves. In our specific case, this would ensure
|
||||||
we check `if` first, and only then move on to `testOne` and `testTwo`. The order of typechecking
|
we check `if` first, and only then move on to `testOne` and `testTwo`. The order of typechecking
|
||||||
within a group does not matter, as long as we generalize only after typechecking all functions
|
within a group does not matter.
|
||||||
in a group.
|
|
||||||
4. We typecheck the function groups, and functions within them, following the above topological order.
|
4. We typecheck the function groups, and functions within them, following the above topological order.
|
||||||
|
|
||||||
To find the transitive closure of a graph, we can use [Warshall's Algorithm](https://cs.winona.edu/lin/cs440/ch08-2.pdf).
|
To find the transitive closure of a graph, we can use [Warshall's Algorithm](https://cs.winona.edu/lin/cs440/ch08-2.pdf).
|
||||||
@ -327,7 +326,7 @@ I think that we should create a C++ class that will represent our function
|
|||||||
dependency graph. Let's call it `function_graph`. I propose the following
|
dependency graph. Let's call it `function_graph`. I propose the following
|
||||||
definition:
|
definition:
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/graph.hpp" 12 52 >}}
|
{{< codelines "C++" "compiler/10/graph.hpp" 12 51 >}}
|
||||||
|
|
||||||
There's a lot to unpack here. First of all, we create a type alias `function` that
|
There's a lot to unpack here. First of all, we create a type alias `function` that
|
||||||
represents the label of a function in our graph. It is probably most convenient
|
represents the label of a function in our graph. It is probably most convenient
|
||||||
@ -364,22 +363,16 @@ of each group are computed, as well as their adjacency lists.
|
|||||||
* `generate_order` uses the indegrees and adjacency lists produced in the prior step
|
* `generate_order` uses the indegrees and adjacency lists produced in the prior step
|
||||||
to establish a topological order.
|
to establish a topological order.
|
||||||
|
|
||||||
Following these, we have three public function definitions:
|
Finally, the `add_edge` method is used to add a new dependency between two functions,
|
||||||
* `add_function` adds a vertex to the graph. Sometimes, a function does not
|
while the `compute_order` method uses the internal methods described above to convert
|
||||||
reference any other functions, and would not appear in the list of edges.
|
|
||||||
We will call this function to make sure that the function graph is aware
|
|
||||||
of such functions. For convenience, this function returns the adjacency list
|
|
||||||
of the added function.
|
|
||||||
* `add_edge` adds a new dependency between two functions.
|
|
||||||
* `compute_order` method uses the internal methods described above to convert
|
|
||||||
the function dependency graph into a properly ordered list of groups.
|
the function dependency graph into a properly ordered list of groups.
|
||||||
|
|
||||||
Let's start by looking at how to implement the internal methods. `compute_transitive_edges`
|
Let's start by looking at how to implement the internal methods. `compute_transitive_edges`
|
||||||
is a very straightforward implementation of Warshall's:
|
is a very straightforward implementation of Warshall's:
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/graph.cpp" 3 21 >}}
|
{{< codelines "C++" "compiler/10/graph.hpp" 53 71 >}}
|
||||||
|
|
||||||
Next is `create_groups`. For each function, we iterate over all other functions.
|
Next is `create_groups`, for each function, we iterate over all other functions.
|
||||||
If the other function is mutually dependent with the first function, we add
|
If the other function is mutually dependent with the first function, we add
|
||||||
it to the same group. In the outer loop, we skip over functions that have
|
it to the same group. In the outer loop, we skip over functions that have
|
||||||
already been added to the group. This is because
|
already been added to the group. This is because
|
||||||
@ -399,7 +392,7 @@ is an [equivalence relation](https://en.wikipedia.org/wiki/Equivalence_relation)
|
|||||||
which means that if we already added a function to a group, all its
|
which means that if we already added a function to a group, all its
|
||||||
group members were also already visited and added.
|
group members were also already visited and added.
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/graph.cpp" 23 44 >}}
|
{{< codelines "C++" "compiler/10/graph.hpp" 73 94 >}}
|
||||||
|
|
||||||
Once groups have been created, we use their functions' edges
|
Once groups have been created, we use their functions' edges
|
||||||
to create edges for the groups themselves, using `create_edges`.
|
to create edges for the groups themselves, using `create_edges`.
|
||||||
@ -407,362 +400,26 @@ We avoid creating edges from a group to itself, to avoid
|
|||||||
unnecessary cycles. While constructing the edges, we also
|
unnecessary cycles. While constructing the edges, we also
|
||||||
increment the relevant indegree counter.
|
increment the relevant indegree counter.
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/graph.cpp" 46 63 >}}
|
{{< codelines "C++" "compiler/10/graph.hpp" 96 113 >}}
|
||||||
|
|
||||||
Finally, we apply Kahn's algorithm to create a topological order
|
Finally, we apply Kahn's algorithm to create a topological order
|
||||||
in `generate_order`:
|
in `generate_order`:
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/graph.cpp" 65 90 >}}
|
{{< codelines "C++" "compiler/10/graph.hpp" 115 140 >}}
|
||||||
|
|
||||||
These four steps are used in `compute_order`:
|
These four steps are used in `compute_order`:
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/graph.cpp" 106 114 >}}
|
{{< codelines "C++" "compiler/10/graph.hpp" 152 160 >}}
|
||||||
|
|
||||||
Let's now look at the remaining two public definitions.
|
Finally, `add_edge` straightforwardly adds an edge
|
||||||
First comes `add_function`, which creates an adjacency list for the
|
to the graph:
|
||||||
function to be inserted (if one does not already exist),
|
|
||||||
and returns a reference to the resulting list:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/graph.cpp" 92 99 >}}
|
{{< codelines "C++" "compiler/10/graph.hpp" 142 150 >}}
|
||||||
|
|
||||||
We use this in `add_edge`, which straightforwardly creates an edge
|
|
||||||
between two functions:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/graph.cpp" 101 104 >}}
|
|
||||||
|
|
||||||
With this, we can now properly order our typechecking.
|
With this, we can now properly order our typechecking.
|
||||||
However, we are just getting started: there are still
|
However, there are a few pieces of the puzzle missing.
|
||||||
numerous changes we need to make to get our compiler
|
First of all, we need to actually insert function
|
||||||
to behave as we desire.
|
dependencies into the graph. Second, we need to think
|
||||||
|
about how our existing language features and implementation
|
||||||
The first change is the least relevant, but will help clean
|
will interact with polymorphism. Third, we have to come up
|
||||||
up our code base in the presence of polymorphism: we will
|
with an implementation of polymorphic data types.
|
||||||
get rid of `resolve`, in both definitions and AST nodes.
|
|
||||||
The reasons for this are twofold. First,
|
|
||||||
{{< sidenote "right" "case-type-note" "only the case expression node actually uses the type it stores." >}}
|
|
||||||
Recall that <code>ast_case</code> needs this information to properly
|
|
||||||
account for the changes to the stack from when data is unpacked.
|
|
||||||
{{< /sidenote >}} This means that
|
|
||||||
all the rest of the infrastructure we've written around
|
|
||||||
preserving types is somewhat pointless. Second, when
|
|
||||||
we call `resolve`, we'd now have to distinguish
|
|
||||||
between type variables captured by "forall" and actual,
|
|
||||||
undefined variables. That's a lot of wasted work!
|
|
||||||
To replace the now-removed `type` field,
|
|
||||||
we make `ast_case` include a new member, `input_type`,
|
|
||||||
which stores the type of the thing between `case` and `of`.
|
|
||||||
Since `ast_case` requires its type to be a data type
|
|
||||||
at the time of typechecking, we no longer need to resolve anything.
|
|
||||||
|
|
||||||
Next, we need to work in a step geared towards finding function calls
|
|
||||||
(to determine dependencies). As we have noted in [part 6]({{< relref "06_compiler_compilation.md" >}}),
|
|
||||||
it's pretty easy to tell apart calls to global functions from "local" ones. If
|
|
||||||
we see that a variable was previously bound (perhaps as a function argument,
|
|
||||||
or by a pattern in a case expression), we know for sure that it is not a global
|
|
||||||
function call. Otherwise, if the variable isn't bound anywhere in the function
|
|
||||||
definition (it's a __free variable__), it must refer to a global function. Then,
|
|
||||||
we can traverse the function body, storing variables that are bound (but only within
|
|
||||||
their scope), and noting references to variables we haven't yet seen. To
|
|
||||||
implement this, we can use a linked list, where each node refers to a particular
|
|
||||||
scope, points to the scope enclosing it, and contains a list of variables...
|
|
||||||
|
|
||||||
Wait a minute, this is identical to `type_env`! There's no reason to reimplement all
|
|
||||||
this. But then, another question arises: do we throw away the `type_env` generated
|
|
||||||
by the dependency-searching step? It seems wasteful, since we will eventually
|
|
||||||
repeat this same work. Rather, we'll re-use the same `type_env` instances
|
|
||||||
in both this new step and `typecheck`. To do this, we will now store a pointer
|
|
||||||
to a `type_env` in every AST node, and set this pointer during our first traversal
|
|
||||||
of the tree. Indeed, this makes our `type_env` more like a
|
|
||||||
[symbol table](https://en.wikipedia.org/wiki/Symbol_table). With this change,
|
|
||||||
our new dependency-finding step will be implemented by the `find_free` function
|
|
||||||
with the following signature:
|
|
||||||
|
|
||||||
```C++
|
|
||||||
void ast::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
|
|
||||||
```
|
|
||||||
|
|
||||||
Let's take a look at how this will be implemented. The simplest case (as usual)
|
|
||||||
is `ast_int`:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/ast.cpp" 16 18 >}}
|
|
||||||
|
|
||||||
In this case, we associate the `type_env` with the node, but don't do anything
|
|
||||||
else: a number is not a variable. A more interesting case is `ast_lid`:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/ast.cpp" 33 36 >}}
|
|
||||||
|
|
||||||
If a lowercase variable has not yet been bound to something, it's free,
|
|
||||||
and we store it. Somewhat counterintuitively, `ast_uid` behaves
|
|
||||||
differently:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/ast.cpp" 54 56 >}}
|
|
||||||
|
|
||||||
We don't allow uppercase variables to be bound to anything outside of data type
|
|
||||||
declarations, so we don't care about uppercase free variables. Next up is
|
|
||||||
`ast_binop`:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/ast.cpp" 73 77 >}}
|
|
||||||
|
|
||||||
A binary operator can have free variables in the subexpressions on the left and on the right, and
|
|
||||||
the above implementation reflects that. This is identical to the implementation of
|
|
||||||
`ast_app`:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/ast.cpp" 109 113 >}}
|
|
||||||
|
|
||||||
Finally, `ast_case` requires the most complicated function (as usual):
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/ast.cpp" 142 150 >}}
|
|
||||||
|
|
||||||
The `type_scope` function replaces the `type_env::scope` method,
|
|
||||||
which cannot (without significant effort) operate on smart pointers.
|
|
||||||
Importantly, we are using a new `pattern` method here, `insert_bindings`. This
|
|
||||||
is because we split "introducing variables" and "typechecking variables"
|
|
||||||
into two steps for patterns, as well. The implementation of `insert_bindings`
|
|
||||||
for `pattern_var` is as follows:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/ast.cpp" 230 232 >}}
|
|
||||||
|
|
||||||
A variable pattern always introduces the variable it is made up of.
|
|
||||||
On the other hand, the implementation for `pattern_constr` is as follows:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/ast.cpp" 245 249 >}}
|
|
||||||
|
|
||||||
All the variables of the pattern are placed into the environment. For now, we don't worry
|
|
||||||
about arity; this is the job of typechecking.
|
|
||||||
|
|
||||||
These changes are reflected in all instances of our `typecheck` function. First of
|
|
||||||
all, `typecheck` no longer needs to receive a `type_env` parameter, since each
|
|
||||||
tree node has a `type_env_ptr`. Furthermore, `typecheck` should no longer call
|
|
||||||
`bind`, since this was already done by `find_free`. For example,
|
|
||||||
`ast_lid::typecheck` will now use `env::lookup`:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/ast.cpp" 38 40 >}}
|
|
||||||
|
|
||||||
Don't worry about `instantiate` for now; that's coming up. Similarly to
|
|
||||||
`ast_lid`, `ast_case::typecheck` will no longer introduce new bindings,
|
|
||||||
and unify instead:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/ast.cpp" 152 169 >}}
|
|
||||||
|
|
||||||
The above implementation uses another new `pattern` method, `typecheck`.
|
|
||||||
This method inherits the type checking functionality previously
|
|
||||||
contained in `pattern::match`. Here's the implementation for `pattern_var`:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/ast.cpp" 234 236 >}}
|
|
||||||
|
|
||||||
And here's the implementation for `pattern_constr`:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/ast.cpp" 251 266 >}}
|
|
||||||
|
|
||||||
So far, so good. However, for all of this to reach the main typechecking
|
|
||||||
code, not only `ast` subclasses need to be updated, but also
|
|
||||||
the `definition`s. Here things get more complicated, because
|
|
||||||
`definition_data` and `definition_defn` are growing more and more apart.
|
|
||||||
Previously, we had two typechecking steps: `typecheck_first` (which registered
|
|
||||||
function names into the environment) and `typecheck_second` (which performed
|
|
||||||
the actual typechecking). However, not only are these names not informative,
|
|
||||||
but the algorithms for typechecking the two types of definition will soon
|
|
||||||
have different numbers of "major" steps.
|
|
||||||
|
|
||||||
Let's take a look at how we would typecheck data types. I propose the following
|
|
||||||
steps:
|
|
||||||
|
|
||||||
1. Iterate all declared data types, storing them into some kind of "known" list.
|
|
||||||
2. Iterate again, and for each constructor of a type, verify that
|
|
||||||
it refers to "known" types. Add valid constructors to the global environment as functions.
|
|
||||||
|
|
||||||
We don't currently verify that types are "known"; A user could declare a list of `Floobs`,
|
|
||||||
and never say what a `Floob` is. This isn't too big of an issue (good luck constructing
|
|
||||||
a value of a non-existent type), but a mature compiler should prevent this from happening.
|
|
||||||
|
|
||||||
On the other hand, here are the steps for function definitions:
|
|
||||||
|
|
||||||
1. Find the free variables of each function to create the ordered list of groups as described above.
|
|
||||||
2. Within each group, insert a general function type (like \\(a \\rightarrow b \\rightarrow c\\))
|
|
||||||
into the environment for each function.
|
|
||||||
3. Within each group (in the same pass) run typechecking
|
|
||||||
(including polymorphism, using the rules as described above).
|
|
||||||
|
|
||||||
The two types of definitions further diverge when generating LLVM and compiling to G-machine instructions:
|
|
||||||
data types immediately construct and insert their functions, and do not emit G-machine instructions,
|
|
||||||
while functions generate G-machine instructions, declare prototypes, and emit LLVM in three distinct phases.
|
|
||||||
Overall, there are virtually no similarities between the two data type declarations, and any inheritance
|
|
||||||
of common functions starts to appear somewhat forced. To address this, we remove the `definition` class
|
|
||||||
altogether, and sever the relationship between `definition_data` and `definition_defn`. The
|
|
||||||
two now look as follows:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/definition.hpp" 23 67 >}}
|
|
||||||
|
|
||||||
In `definition_defn`, the functions are arranged as follows:
|
|
||||||
|
|
||||||
* `find_free` locates the free variables in the definition, populating
|
|
||||||
the `free_variables` field and thereby finding edges for the function graph.
|
|
||||||
* `insert_types` stores the type of the function into the global environment
|
|
||||||
(a pointer to which is now stored as a field).
|
|
||||||
* `typecheck` runs the standard typechecking steps.
|
|
||||||
* `compile` generates G-machine instructions.
|
|
||||||
* `declare_llvm` inserts LLVM function prototypes into the `llvm_context`.
|
|
||||||
* `generate_llvm` converts G-machine instructions into LLVM IR.
|
|
||||||
|
|
||||||
In `definition_data`, the steps are significantly simpler:
|
|
||||||
|
|
||||||
* `insert_types` registers the type being declared as a "known" type.
|
|
||||||
* `insert_constructors` inserts constructors (which are verified to
|
|
||||||
refer to "known" types) into the global environment.
|
|
||||||
* `generate_llvm` creates the LLVM functions (and their IR).
|
|
||||||
|
|
||||||
While the last three methods of `definition_defn` remain unchanged save
|
|
||||||
for the name, the implementations of the first three see some updates.
|
|
||||||
First is `find_free`:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/definition.cpp" 12 26 >}}
|
|
||||||
|
|
||||||
First, to make sure we don't pollute the global scope
|
|
||||||
with function parameters, `find_free` creates a new environment
|
|
||||||
`var_env`. Then, it stores into this new environment the function parameters,
|
|
||||||
ensuring that the parameters of a function aren't marked "free".
|
|
||||||
Concurrently, `find_free` constructs the "general" function
|
|
||||||
type (used by `insert_types`). Once all the arguments have been bound, `definition_defn::find_free`
|
|
||||||
makes a call to `ast::find_free`, which does the work of actually
|
|
||||||
finding free variables.
|
|
||||||
|
|
||||||
Since the function type is created by `find_free`, `insert_types` has very little to do:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/definition.cpp" 28 30 >}}
|
|
||||||
|
|
||||||
Finally, `typecheck`, which no longer has to bind the function
|
|
||||||
arguments to new types, is also fairly simple:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/definition.cpp" 32 35 >}}
|
|
||||||
|
|
||||||
Let's move on to data types. In order to implement `definition_data::insert_types`,
|
|
||||||
we need to store somewhere a list of all the valid type names. We do this
|
|
||||||
by adding a new `type_names` field to `type_env`, and implementing the
|
|
||||||
corresponding methods `lookup_type`:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/type_env.cpp" 11 16 >}}
|
|
||||||
|
|
||||||
And `bind_type`:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/type_env.cpp" 26 29 >}}
|
|
||||||
|
|
||||||
Note in the above snippets that we disallow redeclaring type names;
|
|
||||||
declaring two data types (or other types) with the same name in
|
|
||||||
our language will not be valid. In `insert_types`, we create a new
|
|
||||||
data type and store it in the environment:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/definition.cpp" 59 62 >}}
|
|
||||||
|
|
||||||
We then update `insert_constructors` to query the environment
|
|
||||||
when creating constructor types, rather than blindly using `new type_base(...)`
|
|
||||||
like before:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/definition.cpp" 64 82 >}}
|
|
||||||
|
|
||||||
The separation of data and function definitions must be reconciled with code
|
|
||||||
going back as far as the parser. While previously, we populated a single, global
|
|
||||||
vector of definitions called `program`, we can no longer do that. Instead, we'll
|
|
||||||
split our program into two maps, one for data types and one for functions. We
|
|
||||||
use maps for convenience: since the groups generated by our function graph refer
|
|
||||||
to functions by name, and it would be nice to quickly look up the data
|
|
||||||
the names refer to. Rather than returning such maps, we change our semantic
|
|
||||||
actions to simply insert new data into one of two global maps. Below
|
|
||||||
is a snippet that includes all the changes:
|
|
||||||
|
|
||||||
{{< codelines "plaintext" "compiler/10/parser.y" 39 65 >}}
|
|
||||||
|
|
||||||
Note that `program` and `definitions` no longer have a type, and that `data` and `defn`
|
|
||||||
have been changed to return `definition_data_ptr` and `definition_defn_ptr`, respectively.
|
|
||||||
This necessitates changes to our main file. First of all, we declare the two new maps
|
|
||||||
we hope to receive from Bison:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/main.cpp" 24 25 >}}
|
|
||||||
|
|
||||||
We then change all affected functions, which in many cases amounts to splitting the `program` parameter
|
|
||||||
into `defs_data` and `defs_defn` parameters. We also make other, largely mechanical changes: code iterating
|
|
||||||
over definitions now requires the use of `second` to refer to the value stored in the map, and LLVM
|
|
||||||
generation now needs to separately process the two different types of definitions. The biggest change
|
|
||||||
occurs in `typecheck_program`, which not only undergoes all the aforementioned modifications, but
|
|
||||||
is also updated to use topological ordering:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/main.cpp" 27 84 >}}
|
|
||||||
|
|
||||||
The above code uses the yet-unexplained `generalize` method. What's going on?
|
|
||||||
|
|
||||||
Observe that the __Var__ rule of the Hindley-Milner type system says that a variable \\(x\\)
|
|
||||||
can have a __polytype__ in the environment \\(\\Gamma\\). Our `type_ptr` can only represent monotypes,
|
|
||||||
so we must change what `type_env` associates with names to a new struct for representing polytypes,
|
|
||||||
which we will call `type_scheme`. The `type_scheme` struct, just like the formal definition of
|
|
||||||
a polytype, contains zero or more "forall"-quantified type variables, followed by a monotype which
|
|
||||||
may use these variables:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/type.hpp" 17 27 >}}
|
|
||||||
|
|
||||||
The `type_scheme::instantiate` method is effectively an implementation of the special
|
|
||||||
case of the __Inst__ rule, in which a polytype is specialized to a monotype. Since
|
|
||||||
the __App__ and __Case__ rules only use monotypes, we'll be using this special case a lot.
|
|
||||||
We implement this method as follows:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/type.cpp" 34 41 >}}
|
|
||||||
|
|
||||||
In the above code, if the type scheme represents a monotype (i.e., it has no quantified variables),
|
|
||||||
we simply return that monotype. Otherwise, we must perform a substitution, replacing "forall"-quantified
|
|
||||||
variables with fresh type parameters to be determined (we will never determine a single type for any of
|
|
||||||
the quantified variables, since they are specifically meant to represent any type).
|
|
||||||
We build a substitution map, which assigns to each quantified type variable a corresponding
|
|
||||||
"fresh" type, and then create a new type with with the substitution applied using `substitute`,
|
|
||||||
which is implemented as follows:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/type.cpp" 18 32 >}}
|
|
||||||
|
|
||||||
In principle, the function is fairly simple: if the current type is equivalent to a
|
|
||||||
quantified type, we return the corresponding "fresh" type. If, on the other hand,
|
|
||||||
the type represents a function, we perform a substitution in the function's input
|
|
||||||
and output types. This method avoids creating new types where possible; a new type
|
|
||||||
is only created if a function's input or output type is changed by a substitution
|
|
||||||
(in which case, the function itself is changed by the substitution). In all
|
|
||||||
other cases, substitution won't do anything, so we just return the original type.
|
|
||||||
|
|
||||||
Now it is a bit more clear why we saw `instantiate` in a code snippet some time ago;
|
|
||||||
to compute a monotype for a variable reference, we must take into account the
|
|
||||||
possibility that the variable has a polymorphic type, which needs to be specialized
|
|
||||||
(potentially differently in every occurrence of the variable).
|
|
||||||
|
|
||||||
When talking about our new typechecking algorithm, we mentioned using __Gen__ to sprinkle
|
|
||||||
polymorphism wherever possible. Whenever possible, __Gen__ will add free variables
|
|
||||||
in a type to the "forall" quantifier at the front, making that type polymorphic.
|
|
||||||
We implement this using a new `generalize` added to the `type_env`, which (as per
|
|
||||||
convention) generalizes the type of a given variable as much as possible:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/type_env.cpp" 31 41 >}}
|
|
||||||
|
|
||||||
For now, we disallow types to be generalized twice, and we naturally disallow generalizing
|
|
||||||
types of nonexistent variables. If neither of those things occurs, we find all the free
|
|
||||||
variables in the variable's current type using a new method called `type_mgr::find_free`,
|
|
||||||
and put them into the "forall" quantifier. `type_mgr::find_free` is implemented as follows:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/10/type.cpp" 138 148 >}}
|
|
||||||
|
|
||||||
The above code is fairly straightforward; if a type is a variable that is not yet bound to anything,
|
|
||||||
it is free; if the type is a function, we search for free variables in its input and output types;
|
|
||||||
otherwise, the type has no free variables.
|
|
||||||
|
|
||||||
Finally, we have made the necessary changes. Let's test it out with the example from the beginning:
|
|
||||||
|
|
||||||
{{< rawblock "compiler/10/examples/if.txt" >}}
|
|
||||||
|
|
||||||
Running it, we get the output:
|
|
||||||
|
|
||||||
```
|
|
||||||
3
|
|
||||||
```
|
|
||||||
|
|
||||||
Hooray!
|
|
||||||
|
|
||||||
While this is a major success, we are not yet done. Although our functions can now
|
|
||||||
have polymorphic types, the same cannot be said for our data types! We want to
|
|
||||||
have lists of integers __and__ lists of booleans, without having to duplicate any code!
|
|
||||||
While this also falls into the category of polymorphism, this post has already gotten very long,
|
|
||||||
and we will return to it in the near future. Once we're done with that, I still intend
|
|
||||||
to go over `let/in` expressions, __lambda functions__, and __Input/Output__ together with
|
|
||||||
__strings__. See you in these future posts!
|
|
||||||
|
Loading…
Reference in New Issue
Block a user