Compare commits
	
		
			5 Commits
		
	
	
		
			5d53678e83
			...
			577e0ad930
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 577e0ad930 | |||
| 2a12f7f31e | |||
| ae3e661d7a | |||
| 0efa05142f | |||
| 6714e18e7c | 
| @ -29,6 +29,7 @@ add_executable(compiler | ||||
|     error.cpp error.hpp | ||||
|     binop.cpp binop.hpp | ||||
|     instruction.cpp instruction.hpp | ||||
|     graph.cpp graph.hpp | ||||
|     ${BISON_parser_OUTPUTS} | ||||
|     ${FLEX_scanner_OUTPUTS} | ||||
|     main.cpp | ||||
|  | ||||
| @ -1,7 +1,10 @@ | ||||
| #include "ast.hpp" | ||||
| #include <ostream> | ||||
| #include <iostream> | ||||
| #include "binop.hpp" | ||||
| #include "error.hpp" | ||||
| #include "type.hpp" | ||||
| #include "type_env.hpp" | ||||
| 
 | ||||
| static void print_indent(int n, std::ostream& to) { | ||||
|     while(n--) to << "  "; | ||||
| @ -12,7 +15,11 @@ void ast_int::print(int indent, std::ostream& to) const { | ||||
|     to << "INT: " << value << std::endl; | ||||
| } | ||||
| 
 | ||||
| type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) { | ||||
| void ast_int::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) { | ||||
|     this->env = env; | ||||
| } | ||||
| 
 | ||||
| type_ptr ast_int::typecheck(type_mgr& mgr) { | ||||
|     return type_ptr(new type_base("Int")); | ||||
| } | ||||
| 
 | ||||
| @ -25,8 +32,13 @@ void ast_lid::print(int indent, std::ostream& to) const { | ||||
|     to << "LID: " << id << std::endl; | ||||
| } | ||||
| 
 | ||||
| type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) { | ||||
|     return env.lookup(id); | ||||
| void ast_lid::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) { | ||||
|     this->env = env; | ||||
|     if(env->lookup(id) == nullptr) into.insert(id); | ||||
| } | ||||
| 
 | ||||
| type_ptr ast_lid::typecheck(type_mgr& mgr) { | ||||
|     return env->lookup(id)->instantiate(mgr); | ||||
| } | ||||
| 
 | ||||
| void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const { | ||||
| @ -41,8 +53,12 @@ void ast_uid::print(int indent, std::ostream& to) const { | ||||
|     to << "UID: " << id << std::endl; | ||||
| } | ||||
| 
 | ||||
| type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) { | ||||
|     return env.lookup(id); | ||||
| void ast_uid::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) { | ||||
|     this->env = env; | ||||
| } | ||||
| 
 | ||||
| type_ptr ast_uid::typecheck(type_mgr& mgr) { | ||||
|     return env->lookup(id)->instantiate(mgr); | ||||
| } | ||||
| 
 | ||||
| void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const { | ||||
| @ -56,10 +72,16 @@ void ast_binop::print(int indent, std::ostream& to) const { | ||||
|     right->print(indent + 1, to); | ||||
| } | ||||
| 
 | ||||
| type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) { | ||||
|     type_ptr ltype = left->typecheck(mgr, env); | ||||
|     type_ptr rtype = right->typecheck(mgr, env); | ||||
|     type_ptr ftype = env.lookup(op_name(op)); | ||||
| void ast_binop::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) { | ||||
|     this->env = env; | ||||
|     left->find_free(mgr, env, into); | ||||
|     right->find_free(mgr, env, into); | ||||
| } | ||||
| 
 | ||||
| type_ptr ast_binop::typecheck(type_mgr& mgr) { | ||||
|     type_ptr ltype = left->typecheck(mgr); | ||||
|     type_ptr rtype = right->typecheck(mgr); | ||||
|     type_ptr ftype = env->lookup(op_name(op))->instantiate(mgr); | ||||
|     if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op)); | ||||
| 
 | ||||
|     type_ptr return_type = mgr.new_type(); | ||||
| @ -86,9 +108,15 @@ void ast_app::print(int indent, std::ostream& to) const { | ||||
|     right->print(indent + 1, to); | ||||
| } | ||||
| 
 | ||||
| type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) { | ||||
|     type_ptr ltype = left->typecheck(mgr, env); | ||||
|     type_ptr rtype = right->typecheck(mgr, env); | ||||
| void ast_app::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) { | ||||
|     this->env = env; | ||||
|     left->find_free(mgr, env, into); | ||||
|     right->find_free(mgr, env, into); | ||||
| } | ||||
| 
 | ||||
| type_ptr ast_app::typecheck(type_mgr& mgr) { | ||||
|     type_ptr ltype = left->typecheck(mgr); | ||||
|     type_ptr rtype = right->typecheck(mgr); | ||||
| 
 | ||||
|     type_ptr return_type = mgr.new_type(); | ||||
|     type_ptr arrow = type_ptr(new type_arr(rtype, return_type)); | ||||
| @ -113,20 +141,33 @@ void ast_case::print(int indent, std::ostream& to) const { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) { | ||||
| void ast_case::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) { | ||||
|     this->env = env; | ||||
|     of->find_free(mgr, env, into); | ||||
|     for(auto& branch : branches) { | ||||
|         type_env_ptr new_env = type_scope(env); | ||||
|         branch->pat->insert_bindings(mgr, new_env); | ||||
|         branch->expr->find_free(mgr, new_env, into); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| type_ptr ast_case::typecheck(type_mgr& mgr) { | ||||
|     type_var* var; | ||||
|     type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var); | ||||
|     type_ptr case_type = mgr.resolve(of->typecheck(mgr), var); | ||||
|     type_ptr branch_type = mgr.new_type(); | ||||
| 
 | ||||
|     for(auto& branch : branches) { | ||||
|         type_env new_env = env.scope(); | ||||
|         branch->pat->match(case_type, mgr, new_env); | ||||
|         type_ptr curr_branch_type = branch->expr->typecheck(mgr, new_env); | ||||
|         branch->pat->typecheck(case_type, mgr, branch->expr->env); | ||||
|         type_ptr curr_branch_type = branch->expr->typecheck(mgr); | ||||
|         mgr.unify(branch_type, curr_branch_type); | ||||
|     } | ||||
| 
 | ||||
|     input_type = mgr.resolve(case_type, var); | ||||
|     if(!dynamic_cast<type_data*>(input_type.get())) { | ||||
|         std::cout << dynamic_cast<type_data*>(input_type.get()) << std::endl; | ||||
|         std::cout << dynamic_cast<type_base*>(input_type.get()) << std::endl; | ||||
|         std::cout << var << std::endl; | ||||
|         input_type->print(mgr, std::cout); std::cout << std::endl; | ||||
|         throw type_error("attempting case analysis of non-data type"); | ||||
|     } | ||||
| 
 | ||||
| @ -192,8 +233,12 @@ void pattern_var::print(std::ostream& to) const { | ||||
|     to << var; | ||||
| } | ||||
| 
 | ||||
| void pattern_var::match(type_ptr t, type_mgr& mgr, type_env& env) const { | ||||
|     env.bind(var, t); | ||||
| void pattern_var::insert_bindings(type_mgr& mgr, type_env_ptr& env) const { | ||||
|     env->bind(var, mgr.new_type()); | ||||
| } | ||||
| 
 | ||||
| void pattern_var::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const { | ||||
|     mgr.unify(env->lookup(var)->instantiate(mgr), t); | ||||
| } | ||||
| 
 | ||||
| void pattern_constr::print(std::ostream& to) const { | ||||
| @ -203,17 +248,23 @@ void pattern_constr::print(std::ostream& to) const { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const { | ||||
|     type_ptr constructor_type = env.lookup(constr); | ||||
| void pattern_constr::insert_bindings(type_mgr& mgr, type_env_ptr& env) const { | ||||
|     for(auto& param : params) { | ||||
|         env->bind(param, mgr.new_type()); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void pattern_constr::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const { | ||||
|     type_ptr constructor_type = env->lookup(constr)->instantiate(mgr); | ||||
|     if(!constructor_type) { | ||||
|         throw type_error(std::string("pattern using unknown constructor ") + constr); | ||||
|     } | ||||
| 
 | ||||
|     for(int i = 0; i < params.size(); i++) { | ||||
|     for(auto& param : params) { | ||||
|         type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get()); | ||||
|         if(!arr) throw type_error("too many parameters in constructor pattern"); | ||||
| 
 | ||||
|         env.bind(params[i], arr->left); | ||||
|         mgr.unify(env->lookup(param)->instantiate(mgr), arr->left); | ||||
|         constructor_type = arr->right; | ||||
|     } | ||||
| 
 | ||||
|  | ||||
| @ -1,6 +1,7 @@ | ||||
| #pragma once | ||||
| #include <memory> | ||||
| #include <vector> | ||||
| #include <set> | ||||
| #include "type.hpp" | ||||
| #include "type_env.hpp" | ||||
| #include "binop.hpp" | ||||
| @ -8,10 +9,14 @@ | ||||
| #include "env.hpp" | ||||
| 
 | ||||
| struct ast { | ||||
|     type_env_ptr env; | ||||
| 
 | ||||
|     virtual ~ast() = default; | ||||
| 
 | ||||
|     virtual void print(int indent, std::ostream& to) const = 0; | ||||
|     virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) = 0; | ||||
|     virtual void find_free(type_mgr& mgr, | ||||
|         type_env_ptr& env, std::set<std::string>& into) = 0; | ||||
|     virtual type_ptr typecheck(type_mgr& mgr) = 0; | ||||
|     virtual void compile(const env_ptr& env, | ||||
|         std::vector<instruction_ptr>& into) const = 0; | ||||
| }; | ||||
| @ -22,7 +27,8 @@ struct pattern { | ||||
|     virtual ~pattern() = default; | ||||
| 
 | ||||
|     virtual void print(std::ostream& to) const = 0; | ||||
|     virtual void match(type_ptr t, type_mgr& mgr, type_env& env) const = 0; | ||||
|     virtual void insert_bindings(type_mgr& mgr, type_env_ptr& env) const = 0; | ||||
|     virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const = 0; | ||||
| }; | ||||
| 
 | ||||
| using pattern_ptr = std::unique_ptr<pattern>; | ||||
| @ -44,7 +50,8 @@ struct ast_int : public ast { | ||||
|         : value(v) {} | ||||
| 
 | ||||
|     void print(int indent, std::ostream& to) const; | ||||
|     type_ptr typecheck(type_mgr& mgr, const type_env& env); | ||||
|     void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into); | ||||
|     type_ptr typecheck(type_mgr& mgr); | ||||
|     void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const; | ||||
| }; | ||||
| 
 | ||||
| @ -55,7 +62,8 @@ struct ast_lid : public ast { | ||||
|         : id(std::move(i)) {} | ||||
| 
 | ||||
|     void print(int indent, std::ostream& to) const; | ||||
|     type_ptr typecheck(type_mgr& mgr, const type_env& env); | ||||
|     void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into); | ||||
|     type_ptr typecheck(type_mgr& mgr); | ||||
|     void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const; | ||||
| }; | ||||
| 
 | ||||
| @ -66,7 +74,8 @@ struct ast_uid : public ast { | ||||
|         : id(std::move(i)) {} | ||||
| 
 | ||||
|     void print(int indent, std::ostream& to) const; | ||||
|     type_ptr typecheck(type_mgr& mgr, const type_env& env); | ||||
|     void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into); | ||||
|     type_ptr typecheck(type_mgr& mgr); | ||||
|     void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const; | ||||
| }; | ||||
| 
 | ||||
| @ -79,7 +88,8 @@ struct ast_binop : public ast { | ||||
|         : op(o), left(std::move(l)), right(std::move(r)) {} | ||||
| 
 | ||||
|     void print(int indent, std::ostream& to) const; | ||||
|     type_ptr typecheck(type_mgr& mgr, const type_env& env); | ||||
|     void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into); | ||||
|     type_ptr typecheck(type_mgr& mgr); | ||||
|     void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const; | ||||
| }; | ||||
| 
 | ||||
| @ -91,7 +101,8 @@ struct ast_app : public ast { | ||||
|         : left(std::move(l)), right(std::move(r)) {} | ||||
| 
 | ||||
|     void print(int indent, std::ostream& to) const; | ||||
|     type_ptr typecheck(type_mgr& mgr, const type_env& env); | ||||
|     void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into); | ||||
|     type_ptr typecheck(type_mgr& mgr); | ||||
|     void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const; | ||||
| }; | ||||
| 
 | ||||
| @ -104,7 +115,8 @@ struct ast_case : public ast { | ||||
|         : of(std::move(o)), branches(std::move(b)) {} | ||||
| 
 | ||||
|     void print(int indent, std::ostream& to) const; | ||||
|     type_ptr typecheck(type_mgr& mgr, const type_env& env); | ||||
|     void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into); | ||||
|     type_ptr typecheck(type_mgr& mgr); | ||||
|     void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const; | ||||
| }; | ||||
| 
 | ||||
| @ -115,7 +127,8 @@ struct pattern_var : public pattern { | ||||
|         : var(std::move(v)) {} | ||||
| 
 | ||||
|     void print(std::ostream &to) const; | ||||
|     void match(type_ptr t, type_mgr& mgr, type_env& env) const; | ||||
|     void insert_bindings(type_mgr& mgr, type_env_ptr& env) const; | ||||
|     void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const; | ||||
| }; | ||||
| 
 | ||||
| struct pattern_constr : public pattern { | ||||
| @ -126,5 +139,6 @@ struct pattern_constr : public pattern { | ||||
|         : constr(std::move(c)), params(std::move(p)) {} | ||||
| 
 | ||||
|     void print(std::ostream &to) const; | ||||
|     void match(type_ptr t, type_mgr&, type_env& env) const; | ||||
|     virtual void insert_bindings(type_mgr& mgr, type_env_ptr& env) const; | ||||
|     virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const; | ||||
| }; | ||||
|  | ||||
| @ -3,35 +3,34 @@ | ||||
| #include "ast.hpp" | ||||
| #include "instruction.hpp" | ||||
| #include "llvm_context.hpp" | ||||
| #include "type.hpp" | ||||
| #include "type_env.hpp" | ||||
| #include <llvm/IR/DerivedTypes.h> | ||||
| #include <llvm/IR/Function.h> | ||||
| #include <llvm/IR/Type.h> | ||||
| 
 | ||||
| void definition_defn::typecheck_first(type_mgr& mgr, type_env& env) { | ||||
| void definition_defn::find_free(type_mgr& mgr, type_env_ptr& env) { | ||||
|     this->env = env; | ||||
| 
 | ||||
|     var_env = type_scope(env); | ||||
|     return_type = mgr.new_type(); | ||||
|     type_ptr full_type = return_type; | ||||
|     full_type = return_type; | ||||
| 
 | ||||
|     for(auto it = params.rbegin(); it != params.rend(); it++) { | ||||
|         type_ptr param_type = mgr.new_type(); | ||||
|         full_type = type_ptr(new type_arr(param_type, full_type)); | ||||
|         param_types.push_back(param_type); | ||||
|         var_env->bind(*it, param_type); | ||||
|     } | ||||
| 
 | ||||
|     env.bind(name, full_type); | ||||
|     body->find_free(mgr, var_env, free_variables); | ||||
| } | ||||
| 
 | ||||
| void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const { | ||||
|     type_env new_env = env.scope(); | ||||
|     auto param_it = params.begin(); | ||||
|     auto type_it = param_types.rbegin(); | ||||
| 
 | ||||
|     while(param_it != params.end() && type_it != param_types.rend()) { | ||||
|         new_env.bind(*param_it, *type_it); | ||||
|         param_it++; | ||||
|         type_it++; | ||||
| void definition_defn::insert_types(type_mgr& mgr) { | ||||
|     env->bind(name, full_type); | ||||
| } | ||||
| 
 | ||||
|     type_ptr body_type = body->typecheck(mgr, new_env); | ||||
| void definition_defn::typecheck(type_mgr& mgr) { | ||||
|     type_ptr body_type = body->typecheck(mgr); | ||||
|     mgr.unify(return_type, body_type); | ||||
| } | ||||
| 
 | ||||
| @ -44,11 +43,12 @@ void definition_defn::compile() { | ||||
|     instructions.push_back(instruction_ptr(new instruction_update(params.size()))); | ||||
|     instructions.push_back(instruction_ptr(new instruction_pop(params.size()))); | ||||
| } | ||||
| void definition_defn::gen_llvm_first(llvm_context& ctx) { | ||||
| 
 | ||||
| void definition_defn::declare_llvm(llvm_context& ctx) { | ||||
|     generated_function = ctx.create_custom_function(name, params.size()); | ||||
| } | ||||
| 
 | ||||
| void definition_defn::gen_llvm_second(llvm_context& ctx) { | ||||
| void definition_defn::generate_llvm(llvm_context& ctx) { | ||||
|     ctx.builder.SetInsertPoint(&generated_function->getEntryBlock()); | ||||
|     for(auto& instruction : instructions) { | ||||
|         instruction->gen_llvm(ctx, generated_function); | ||||
| @ -56,9 +56,14 @@ void definition_defn::gen_llvm_second(llvm_context& ctx) { | ||||
|     ctx.builder.CreateRetVoid(); | ||||
| } | ||||
| 
 | ||||
| void definition_data::typecheck_first(type_mgr& mgr, type_env& env) { | ||||
|     type_data* this_type = new type_data(name); | ||||
|     type_ptr return_type = type_ptr(this_type); | ||||
| void definition_data::insert_types(type_mgr& mgr, type_env_ptr& env) { | ||||
|     this->env = env; | ||||
|     env->bind_type(name, type_ptr(new type_data(name))); | ||||
| } | ||||
| 
 | ||||
| void definition_data::insert_constructors() const { | ||||
|     type_ptr return_type = env->lookup_type(name); | ||||
|     type_data* this_type = static_cast<type_data*>(return_type.get()); | ||||
|     int next_tag = 0; | ||||
| 
 | ||||
|     for(auto& constructor : constructors) { | ||||
| @ -67,23 +72,16 @@ void definition_data::typecheck_first(type_mgr& mgr, type_env& env) { | ||||
| 
 | ||||
|         type_ptr full_type = return_type; | ||||
|         for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) { | ||||
|             type_ptr type = type_ptr(new type_base(*it)); | ||||
|             type_ptr type = env->lookup_type(*it); | ||||
|             if(!type) throw 0; | ||||
|             full_type = type_ptr(new type_arr(type, full_type)); | ||||
|         } | ||||
| 
 | ||||
|         env.bind(constructor->name, full_type); | ||||
|         env->bind(constructor->name, full_type); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const { | ||||
|     // Nothing
 | ||||
| } | ||||
| 
 | ||||
| void definition_data::compile() { | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| void definition_data::gen_llvm_first(llvm_context& ctx) { | ||||
| void definition_data::generate_llvm(llvm_context& ctx) { | ||||
|     for(auto& constructor : constructors) { | ||||
|         auto new_function = | ||||
|             ctx.create_custom_function(constructor->name, constructor->types.size()); | ||||
| @ -99,7 +97,3 @@ void definition_data::gen_llvm_first(llvm_context& ctx) { | ||||
|         ctx.builder.CreateRetVoid(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void definition_data::gen_llvm_second(llvm_context& ctx) { | ||||
|     // Nothing
 | ||||
| } | ||||
|  | ||||
| @ -1,6 +1,7 @@ | ||||
| #pragma once | ||||
| #include <memory> | ||||
| #include <vector> | ||||
| #include <set> | ||||
| #include "instruction.hpp" | ||||
| #include "llvm_context.hpp" | ||||
| #include "type_env.hpp" | ||||
| @ -8,18 +9,6 @@ | ||||
| struct ast; | ||||
| using ast_ptr = std::unique_ptr<ast>; | ||||
| 
 | ||||
| struct definition { | ||||
|     virtual ~definition() = default; | ||||
|      | ||||
|     virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0; | ||||
|     virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0; | ||||
|     virtual void compile() = 0; | ||||
|     virtual void gen_llvm_first(llvm_context& ctx) = 0; | ||||
|     virtual void gen_llvm_second(llvm_context& ctx) = 0; | ||||
| }; | ||||
| 
 | ||||
| using definition_ptr = std::unique_ptr<definition>; | ||||
| 
 | ||||
| struct constructor { | ||||
|     std::string name; | ||||
|     std::vector<std::string> types; | ||||
| @ -31,13 +20,16 @@ struct constructor { | ||||
| 
 | ||||
| using constructor_ptr = std::unique_ptr<constructor>; | ||||
| 
 | ||||
| struct definition_defn : public definition { | ||||
| struct definition_defn { | ||||
|     std::string name; | ||||
|     std::vector<std::string> params; | ||||
|     ast_ptr body; | ||||
| 
 | ||||
|     type_env_ptr env; | ||||
|     type_env_ptr var_env; | ||||
|     std::set<std::string> free_variables; | ||||
|     type_ptr full_type; | ||||
|     type_ptr return_type; | ||||
|     std::vector<type_ptr> param_types; | ||||
| 
 | ||||
|     std::vector<instruction_ptr> instructions; | ||||
| 
 | ||||
| @ -48,23 +40,28 @@ struct definition_defn : public definition { | ||||
| 
 | ||||
|     } | ||||
| 
 | ||||
|     void typecheck_first(type_mgr& mgr, type_env& env); | ||||
|     void typecheck_second(type_mgr& mgr, const type_env& env) const; | ||||
|     void find_free(type_mgr& mgr, type_env_ptr& env); | ||||
|     void insert_types(type_mgr& mgr); | ||||
|     void typecheck(type_mgr& mgr); | ||||
|     void compile(); | ||||
|     void gen_llvm_first(llvm_context& ctx); | ||||
|     void gen_llvm_second(llvm_context& ctx); | ||||
|     void declare_llvm(llvm_context& ctx); | ||||
|     void generate_llvm(llvm_context& ctx); | ||||
| }; | ||||
| 
 | ||||
| struct definition_data : public definition { | ||||
| using definition_defn_ptr = std::unique_ptr<definition_defn>; | ||||
| 
 | ||||
| struct definition_data { | ||||
|     std::string name; | ||||
|     std::vector<constructor_ptr> constructors; | ||||
| 
 | ||||
|     type_env_ptr env; | ||||
| 
 | ||||
|     definition_data(std::string n, std::vector<constructor_ptr> cs) | ||||
|         : name(std::move(n)), constructors(std::move(cs)) {} | ||||
| 
 | ||||
|     void typecheck_first(type_mgr& mgr, type_env& env); | ||||
|     void typecheck_second(type_mgr& mgr, const type_env& env) const; | ||||
|     void compile(); | ||||
|     void gen_llvm_first(llvm_context& ctx); | ||||
|     void gen_llvm_second(llvm_context& ctx); | ||||
|     void insert_types(type_mgr& mgr, type_env_ptr& env); | ||||
|     void insert_constructors() const; | ||||
|     void generate_llvm(llvm_context& ctx); | ||||
| }; | ||||
| 
 | ||||
| using definition_data_ptr = std::unique_ptr<definition_data>; | ||||
|  | ||||
							
								
								
									
										8
									
								
								code/compiler/10/examples/if.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								code/compiler/10/examples/if.txt
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,8 @@ | ||||
| data Bool = { True, False } | ||||
| defn if c t e = { | ||||
|     case c of { | ||||
|         True -> { t } | ||||
|         False -> { e } | ||||
|     } | ||||
| } | ||||
| defn main = { if (if True False True) 11 3 } | ||||
| @ -46,115 +46,7 @@ class function_graph { | ||||
|             std::map<group_id, data_ptr>&); | ||||
|      | ||||
|     public: | ||||
|     std::set<function>& add_function(const function& f); | ||||
|     void add_edge(const function& from, const function& to); | ||||
|     std::vector<group_ptr> compute_order(); | ||||
| }; | ||||
| 
 | ||||
| std::set<function_graph::edge> function_graph::compute_transitive_edges() { | ||||
|     std::set<edge> transitive_edges; | ||||
|     transitive_edges.insert(edges.begin(), edges.end()); | ||||
|     for(auto& connector : adjacency_lists) { | ||||
|         for(auto& from : adjacency_lists) { | ||||
|             edge to_connector { from.first, connector.first }; | ||||
|             for(auto& to : adjacency_lists) { | ||||
|                 edge full_jump { from.first, to.first }; | ||||
|                 if(transitive_edges.find(full_jump) != transitive_edges.end()) continue; | ||||
| 
 | ||||
|                 edge from_connector { connector.first, to.first }; | ||||
|                 if(transitive_edges.find(to_connector) != transitive_edges.end() && | ||||
|                         transitive_edges.find(from_connector) != transitive_edges.end()) | ||||
|                     transitive_edges.insert(std::move(full_jump)); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     return transitive_edges; | ||||
| } | ||||
| 
 | ||||
| void function_graph::create_groups( | ||||
|         const std::set<edge>& transitive_edges, | ||||
|         std::map<function, group_id>& group_ids, | ||||
|         std::map<group_id, data_ptr>& group_data_map) { | ||||
|     group_id id_counter = 0; | ||||
|     for(auto& vertex : adjacency_lists) { | ||||
|         if(group_ids.find(vertex.first) != group_ids.end()) | ||||
|             continue; | ||||
|         data_ptr new_group(new group_data); | ||||
|         new_group->functions.insert(vertex.first); | ||||
|         group_data_map[id_counter] = new_group; | ||||
|         group_ids[vertex.first] = id_counter; | ||||
|         for(auto& other_vertex : adjacency_lists) { | ||||
|             if(transitive_edges.find({vertex.first, other_vertex.first}) != transitive_edges.end() && | ||||
|                     transitive_edges.find({other_vertex.first, vertex.first}) != transitive_edges.end()) { | ||||
|                 group_ids[other_vertex.first] = id_counter; | ||||
|                 new_group->functions.insert(other_vertex.first); | ||||
|             } | ||||
|         } | ||||
|         id_counter++; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void function_graph::create_edges( | ||||
|         std::map<function, group_id>& group_ids, | ||||
|         std::map<group_id, data_ptr>& group_data_map) { | ||||
|     std::set<std::pair<group_id, group_id>> group_edges; | ||||
|     for(auto& vertex : adjacency_lists) { | ||||
|         auto vertex_id = group_ids[vertex.first]; | ||||
|         auto& vertex_data = group_data_map[vertex_id]; | ||||
|         for(auto& other_vertex : vertex.second) { | ||||
|             auto other_id = group_ids[other_vertex]; | ||||
|             if(vertex_id == other_id) continue; | ||||
|             if(group_edges.find({vertex_id, other_id}) != group_edges.end()) | ||||
|                 continue; | ||||
|             group_edges.insert({vertex_id, other_id}); | ||||
|             vertex_data->adjacency_list.insert(other_id); | ||||
|             group_data_map[other_id]->indegree++; | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| std::vector<group_ptr> function_graph::generate_order( | ||||
|         std::map<function, group_id>& group_ids, | ||||
|         std::map<group_id, data_ptr>& group_data_map) { | ||||
|     std::queue<group_id> id_queue; | ||||
|     std::vector<group_ptr> output; | ||||
|     for(auto& group : group_data_map) { | ||||
|         if(group.second->indegree == 0) id_queue.push(group.first); | ||||
|     } | ||||
| 
 | ||||
|     while(!id_queue.empty()) { | ||||
|         auto new_id = id_queue.front(); | ||||
|         auto& group_data = group_data_map[new_id]; | ||||
|         group_ptr output_group(new group); | ||||
|         output_group->members = std::move(group_data->functions); | ||||
|         id_queue.pop(); | ||||
| 
 | ||||
|         for(auto& adjacent_group : group_data->adjacency_list) { | ||||
|             if(--group_data_map[adjacent_group]->indegree == 0) | ||||
|                 id_queue.push(adjacent_group); | ||||
|         } | ||||
| 
 | ||||
|         output.push_back(std::move(output_group)); | ||||
|     } | ||||
| 
 | ||||
|     return output; | ||||
| } | ||||
| 
 | ||||
| void function_graph::add_edge(const function& from, const function& to) { | ||||
|     auto adjacency_list_it = adjacency_lists.find(from); | ||||
|     if(adjacency_list_it != adjacency_lists.end()) { | ||||
|         adjacency_list_it->second.insert(to); | ||||
|     } else { | ||||
|         adjacency_lists[from] = { to }; | ||||
|     } | ||||
|     edges.insert({ from, to }); | ||||
| } | ||||
| 
 | ||||
| std::vector<group_ptr> function_graph::compute_order() { | ||||
|     std::set<edge> transitive_edges = compute_transitive_edges(); | ||||
|     std::map<function, group_id> group_ids; | ||||
|     std::map<group_id, data_ptr> group_data_map; | ||||
| 
 | ||||
|     create_groups(transitive_edges, group_ids, group_data_map); | ||||
|     create_edges(group_ids, group_data_map); | ||||
|     return generate_order(group_ids, group_data_map); | ||||
| } | ||||
|  | ||||
| @ -2,6 +2,7 @@ | ||||
| #include <iostream> | ||||
| #include "binop.hpp" | ||||
| #include "definition.hpp" | ||||
| #include "graph.hpp" | ||||
| #include "instruction.hpp" | ||||
| #include "llvm_context.hpp" | ||||
| #include "parser.hpp" | ||||
| @ -20,43 +21,72 @@ void yy::parser::error(const std::string& msg) { | ||||
|     std::cout << "An error occured: " << msg << std::endl; | ||||
| } | ||||
| 
 | ||||
| extern std::vector<definition_ptr> program; | ||||
| extern std::map<std::string, definition_data_ptr> defs_data; | ||||
| extern std::map<std::string, definition_defn_ptr> defs_defn; | ||||
| 
 | ||||
| void typecheck_program( | ||||
|         const std::vector<definition_ptr>& prog, | ||||
|         type_mgr& mgr, type_env& env) { | ||||
|         const std::map<std::string, definition_data_ptr>& defs_data, | ||||
|         const std::map<std::string, definition_defn_ptr>& defs_defn, | ||||
|         type_mgr& mgr, type_env_ptr& env) { | ||||
|     type_ptr int_type = type_ptr(new type_base("Int"));  | ||||
|     env->bind_type("Int", int_type); | ||||
| 
 | ||||
|     type_ptr binop_type = type_ptr(new type_arr( | ||||
|                 int_type, | ||||
|                 type_ptr(new type_arr(int_type, int_type)))); | ||||
|     env->bind("+", binop_type); | ||||
|     env->bind("-", binop_type); | ||||
|     env->bind("*", binop_type); | ||||
|     env->bind("/", binop_type); | ||||
| 
 | ||||
|     env.bind("+", binop_type); | ||||
|     env.bind("-", binop_type); | ||||
|     env.bind("*", binop_type); | ||||
|     env.bind("/", binop_type); | ||||
| 
 | ||||
|     for(auto& def : prog) { | ||||
|         def->typecheck_first(mgr, env); | ||||
|     for(auto& def_data : defs_data) { | ||||
|         def_data.second->insert_types(mgr, env); | ||||
|     } | ||||
|     for(auto& def_data : defs_data) { | ||||
|         def_data.second->insert_constructors(); | ||||
|     } | ||||
| 
 | ||||
|     for(auto& def : prog) { | ||||
|         def->typecheck_second(mgr, env); | ||||
|     function_graph dependency_graph; | ||||
| 
 | ||||
|     for(auto& def_defn : defs_defn) { | ||||
|         def_defn.second->find_free(mgr, env); | ||||
|         dependency_graph.add_function(def_defn.second->name); | ||||
| 
 | ||||
|         for(auto& dependency : def_defn.second->free_variables) { | ||||
|             if(defs_defn.find(dependency) == defs_defn.end()) | ||||
|                 throw 0; | ||||
|             dependency_graph.add_edge(def_defn.second->name, dependency); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     for(auto& pair : env.names) { | ||||
|     std::vector<group_ptr> groups = dependency_graph.compute_order(); | ||||
|     for(auto it = groups.rbegin(); it != groups.rend(); it++) { | ||||
|         auto& group = *it; | ||||
|         for(auto& def_defnn_name : group->members) { | ||||
|             auto& def_defn = defs_defn.find(def_defnn_name)->second; | ||||
|             def_defn->insert_types(mgr); | ||||
|         } | ||||
|         for(auto& def_defnn_name : group->members) { | ||||
|             auto& def_defn = defs_defn.find(def_defnn_name)->second; | ||||
|             def_defn->typecheck(mgr); | ||||
|         } | ||||
|         for(auto& def_defnn_name : group->members) { | ||||
|             env->generalize(def_defnn_name, mgr); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     for(auto& pair : env->names) { | ||||
|         std::cout << pair.first << ": "; | ||||
|         pair.second->print(mgr, std::cout); | ||||
|         std::cout << std::endl; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void compile_program(const std::vector<definition_ptr>& prog) { | ||||
|     for(auto& def : prog) { | ||||
|         def->compile(); | ||||
| void compile_program(const std::map<std::string, definition_defn_ptr>& defs_defn) { | ||||
|     for(auto& def_defn : defs_defn) { | ||||
|         def_defn.second->compile(); | ||||
| 
 | ||||
|         definition_defn* defn = dynamic_cast<definition_defn*>(def.get()); | ||||
|         if(!defn) continue; | ||||
|         for(auto& instruction : defn->instructions) { | ||||
|         for(auto& instruction : def_defn.second->instructions) { | ||||
|             instruction->print(0, std::cout); | ||||
|         } | ||||
|         std::cout << std::endl; | ||||
| @ -120,20 +150,25 @@ void output_llvm(llvm_context& ctx, const std::string& filename) { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void gen_llvm(const std::vector<definition_ptr>& prog) { | ||||
| void gen_llvm( | ||||
|         const std::map<std::string, definition_data_ptr>& defs_data, | ||||
|         const std::map<std::string, definition_defn_ptr>& defs_defn) { | ||||
|     llvm_context ctx; | ||||
|     gen_llvm_internal_op(ctx, PLUS); | ||||
|     gen_llvm_internal_op(ctx, MINUS); | ||||
|     gen_llvm_internal_op(ctx, TIMES); | ||||
|     gen_llvm_internal_op(ctx, DIVIDE); | ||||
| 
 | ||||
|     for(auto& definition : prog) { | ||||
|         definition->gen_llvm_first(ctx); | ||||
|     for(auto& def_data : defs_data) { | ||||
|         def_data.second->generate_llvm(ctx); | ||||
|     } | ||||
|     for(auto& def_defn : defs_defn) { | ||||
|         def_defn.second->declare_llvm(ctx); | ||||
|     } | ||||
|     for(auto& def_defn : defs_defn) { | ||||
|         def_defn.second->generate_llvm(ctx); | ||||
|     } | ||||
| 
 | ||||
|     for(auto& definition : prog) { | ||||
|         definition->gen_llvm_second(ctx); | ||||
|     } | ||||
|     ctx.module.print(llvm::outs(), nullptr); | ||||
|     output_llvm(ctx, "program.o"); | ||||
| } | ||||
| @ -141,23 +176,20 @@ void gen_llvm(const std::vector<definition_ptr>& prog) { | ||||
| int main() { | ||||
|     yy::parser parser; | ||||
|     type_mgr mgr; | ||||
|     type_env env; | ||||
|     type_env_ptr env(new type_env); | ||||
| 
 | ||||
|     parser.parse(); | ||||
|     for(auto& definition : program) { | ||||
|         definition_defn* def = dynamic_cast<definition_defn*>(definition.get()); | ||||
|         if(!def) continue; | ||||
| 
 | ||||
|         std::cout << def->name; | ||||
|         for(auto& param : def->params) std::cout << " " << param; | ||||
|     for(auto& def_defn : defs_defn) { | ||||
|         std::cout << def_defn.second->name; | ||||
|         for(auto& param : def_defn.second->params) std::cout << " " << param; | ||||
|         std::cout << ":" << std::endl; | ||||
| 
 | ||||
|         def->body->print(1, std::cout); | ||||
|         def_defn.second->body->print(1, std::cout); | ||||
|     } | ||||
|     try { | ||||
|         typecheck_program(program, mgr, env); | ||||
|         compile_program(program); | ||||
|         gen_llvm(program); | ||||
|         typecheck_program(defs_data, defs_defn, mgr, env); | ||||
|         compile_program(defs_defn); | ||||
|         gen_llvm(defs_data, defs_defn); | ||||
|     } catch(unification_error& err) { | ||||
|         std::cout << "failed to unify types: " << std::endl; | ||||
|         std::cout << "  (1) \033[34m"; | ||||
|  | ||||
| @ -1,11 +1,14 @@ | ||||
| %{ | ||||
| #include <string> | ||||
| #include <iostream> | ||||
| #include <map> | ||||
| #include "ast.hpp" | ||||
| #include "definition.hpp" | ||||
| #include "parser.hpp" | ||||
| 
 | ||||
| std::vector<definition_ptr> program; | ||||
| std::map<std::string, definition_data_ptr> defs_data; | ||||
| std::map<std::string, definition_defn_ptr> defs_defn; | ||||
| 
 | ||||
| extern yy::parser::symbol_type yylex(); | ||||
| 
 | ||||
| %} | ||||
| @ -34,11 +37,11 @@ extern yy::parser::symbol_type yylex(); | ||||
| %define api.token.constructor | ||||
| 
 | ||||
| %type <std::vector<std::string>> lowercaseParams uppercaseParams | ||||
| %type <std::vector<definition_ptr>> program definitions | ||||
| %type <std::vector<branch_ptr>> branches | ||||
| %type <std::vector<constructor_ptr>> constructors | ||||
| %type <ast_ptr> aAdd aMul case app appBase | ||||
| %type <definition_ptr> definition defn data  | ||||
| %type <definition_data_ptr> data  | ||||
| %type <definition_defn_ptr> defn | ||||
| %type <branch_ptr> branch | ||||
| %type <pattern_ptr> pattern | ||||
| %type <constructor_ptr> constructor | ||||
| @ -48,22 +51,22 @@ extern yy::parser::symbol_type yylex(); | ||||
| %% | ||||
| 
 | ||||
| program | ||||
|     : definitions { program = std::move($1); } | ||||
|     : definitions { } | ||||
|     ; | ||||
| 
 | ||||
| definitions | ||||
|     : definitions definition { $$ = std::move($1); $$.push_back(std::move($2)); } | ||||
|     | definition { $$ = std::vector<definition_ptr>(); $$.push_back(std::move($1)); } | ||||
|     : definitions definition { } | ||||
|     | definition { } | ||||
|     ; | ||||
| 
 | ||||
| definition | ||||
|     : defn { $$ = std::move($1); } | ||||
|     | data { $$ = std::move($1); } | ||||
|     : defn { auto name = $1->name; defs_defn[name] = std::move($1); } | ||||
|     | data { auto name = $1->name; defs_data[name] = std::move($1); } | ||||
|     ; | ||||
| 
 | ||||
| defn | ||||
|     : DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY | ||||
|         { $$ = definition_ptr( | ||||
|         { $$ = definition_defn_ptr( | ||||
|             new definition_defn(std::move($2), std::move($3), std::move($6))); } | ||||
|     ; | ||||
| 
 | ||||
| @ -125,7 +128,7 @@ pattern | ||||
| 
 | ||||
| data | ||||
|     : DATA UID EQUAL OCURLY constructors CCURLY | ||||
|         { $$ = definition_ptr(new definition_data(std::move($2), std::move($5))); } | ||||
|         { $$ = definition_data_ptr(new definition_data(std::move($2), std::move($5))); } | ||||
|     ; | ||||
| 
 | ||||
| constructors | ||||
|  | ||||
| @ -1,8 +1,45 @@ | ||||
| #include "type.hpp" | ||||
| #include <ostream> | ||||
| #include <sstream> | ||||
| #include <algorithm> | ||||
| #include "error.hpp" | ||||
| 
 | ||||
| void type_scheme::print(const type_mgr& mgr, std::ostream& to) const { | ||||
|     if(forall.size() != 0) { | ||||
|         to << "forall "; | ||||
|         for(auto& var : forall) { | ||||
|             to << var << " "; | ||||
|         } | ||||
|         to << ". "; | ||||
|     } | ||||
|     monotype->print(mgr, to); | ||||
| } | ||||
| 
 | ||||
| type_ptr substitute(const type_mgr& mgr, const std::map<std::string, type_ptr>& subst, const type_ptr& t) { | ||||
|     type_var* var; | ||||
|     type_ptr resolved = mgr.resolve(t, var); | ||||
|     if(var) { | ||||
|         auto subst_it = subst.find(var->name); | ||||
|         if(subst_it == subst.end()) return resolved; | ||||
|         return subst_it->second; | ||||
|     } else if(type_arr* arr = dynamic_cast<type_arr*>(t.get())) { | ||||
|         auto left_result = substitute(mgr, subst, arr->left); | ||||
|         auto right_result = substitute(mgr, subst, arr->right); | ||||
|         if(left_result == arr->left && right_result == arr->right) return t; | ||||
|         return type_ptr(new type_arr(left_result, right_result)); | ||||
|     } | ||||
|     return t; | ||||
| } | ||||
| 
 | ||||
| type_ptr type_scheme::instantiate(type_mgr& mgr) const { | ||||
|     if(forall.size() == 0) return monotype; | ||||
|     std::map<std::string, type_ptr> subst; | ||||
|     for(auto& var : forall) { | ||||
|         subst[var] = mgr.new_type(); | ||||
|     } | ||||
|     return substitute(mgr, subst, monotype); | ||||
| } | ||||
| 
 | ||||
| void type_var::print(const type_mgr& mgr, std::ostream& to) const { | ||||
|     auto it = mgr.types.find(name); | ||||
|     if(it != mgr.types.end()) { | ||||
| @ -97,3 +134,15 @@ void type_mgr::bind(const std::string& s, type_ptr t) { | ||||
|     if(other && other->name == s) return; | ||||
|     types[s] = t; | ||||
| } | ||||
| 
 | ||||
| void type_mgr::find_free(const type_ptr& t, std::set<std::string>& into) const { | ||||
|     type_var* var; | ||||
|     type_ptr resolved = resolve(t, var); | ||||
| 
 | ||||
|     if(var) { | ||||
|         into.insert(var->name); | ||||
|     } else if(type_arr* arr = dynamic_cast<type_arr*>(resolved.get())) { | ||||
|         find_free(arr->left, into); | ||||
|         find_free(arr->right, into); | ||||
|     } | ||||
| } | ||||
|  | ||||
| @ -1,6 +1,8 @@ | ||||
| #pragma once | ||||
| #include <memory> | ||||
| #include <map> | ||||
| #include <vector> | ||||
| #include <set> | ||||
| 
 | ||||
| struct type_mgr; | ||||
| 
 | ||||
| @ -12,6 +14,18 @@ struct type { | ||||
| 
 | ||||
| using type_ptr = std::shared_ptr<type>; | ||||
| 
 | ||||
| struct type_scheme { | ||||
|     std::vector<std::string> forall; | ||||
|     type_ptr monotype; | ||||
| 
 | ||||
|     type_scheme(type_ptr type) : forall(), monotype(std::move(type)) {} | ||||
| 
 | ||||
|     void print(const type_mgr& mgr, std::ostream& to) const; | ||||
|     type_ptr instantiate(type_mgr& mgr) const; | ||||
| }; | ||||
| 
 | ||||
| using type_scheme_ptr = std::shared_ptr<type_scheme>; | ||||
| 
 | ||||
| struct type_var : public type { | ||||
|     std::string name; | ||||
| 
 | ||||
| @ -62,4 +76,5 @@ struct type_mgr { | ||||
|     void unify(type_ptr l, type_ptr r); | ||||
|     type_ptr resolve(type_ptr t, type_var*& var) const; | ||||
|     void bind(const std::string& s, type_ptr t); | ||||
|     void find_free(const type_ptr& t, std::set<std::string>& into) const; | ||||
| }; | ||||
|  | ||||
| @ -1,16 +1,45 @@ | ||||
| #include "type_env.hpp" | ||||
| #include "type.hpp" | ||||
| 
 | ||||
| type_ptr type_env::lookup(const std::string& name) const { | ||||
| type_scheme_ptr type_env::lookup(const std::string& name) const { | ||||
|     auto it = names.find(name); | ||||
|     if(it != names.end()) return it->second; | ||||
|     if(parent) return parent->lookup(name); | ||||
|     return nullptr; | ||||
| } | ||||
| 
 | ||||
| type_ptr type_env::lookup_type(const std::string& name) const { | ||||
|     auto it = type_names.find(name); | ||||
|     if(it != type_names.end()) return it->second; | ||||
|     if(parent) return parent->lookup_type(name); | ||||
|     return nullptr; | ||||
| } | ||||
| 
 | ||||
| void type_env::bind(const std::string& name, type_ptr t) { | ||||
|     names[name] = type_scheme_ptr(new type_scheme(t)); | ||||
| } | ||||
| 
 | ||||
| void type_env::bind(const std::string& name, type_scheme_ptr t) { | ||||
|     names[name] = t; | ||||
| } | ||||
| 
 | ||||
| type_env type_env::scope() const { | ||||
|     return type_env(this); | ||||
| void type_env::bind_type(const std::string& type_name, type_ptr t) { | ||||
|     if(lookup_type(type_name) != nullptr) throw 0; | ||||
|     type_names[type_name] = t; | ||||
| } | ||||
| 
 | ||||
| void type_env::generalize(const std::string& name, type_mgr& mgr) { | ||||
|     auto names_it = names.find(name); | ||||
|     if(names_it == names.end()) throw 0; | ||||
|     if(names_it->second->forall.size() > 0) throw 0; | ||||
| 
 | ||||
|     std::set<std::string> free_variables; | ||||
|     mgr.find_free(names_it->second->monotype, free_variables); | ||||
|     for(auto& free : free_variables) { | ||||
|         names_it->second->forall.push_back(free); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| type_env_ptr type_scope(type_env_ptr parent) { | ||||
|     return type_env_ptr(new type_env(std::move(parent))); | ||||
| } | ||||
|  | ||||
| @ -2,15 +2,24 @@ | ||||
| #include <map> | ||||
| #include "type.hpp" | ||||
| 
 | ||||
| struct type_env { | ||||
|     std::map<std::string, type_ptr> names; | ||||
|     type_env const* parent = nullptr; | ||||
| struct type_env; | ||||
| using type_env_ptr = std::shared_ptr<type_env>; | ||||
| 
 | ||||
|     type_env(type_env const* p) | ||||
|         : parent(p) {} | ||||
| struct type_env { | ||||
|     type_env_ptr parent; | ||||
|     std::map<std::string, type_scheme_ptr> names; | ||||
|     std::map<std::string, type_ptr> type_names; | ||||
| 
 | ||||
|     type_env(type_env_ptr p) : parent(std::move(p)) {} | ||||
|     type_env() : type_env(nullptr) {} | ||||
| 
 | ||||
|     type_ptr lookup(const std::string& name) const; | ||||
|     type_scheme_ptr lookup(const std::string& name) const; | ||||
|     type_ptr lookup_type(const std::string& name) const; | ||||
|     void bind(const std::string& name, type_ptr t); | ||||
|     type_env scope() const; | ||||
|     void bind(const std::string& name, type_scheme_ptr t); | ||||
|     void bind_type(const std::string& type_name, type_ptr t); | ||||
|     void generalize(const std::string& name, type_mgr& mgr); | ||||
| }; | ||||
| 
 | ||||
| 
 | ||||
| type_env_ptr type_scope(type_env_ptr parent); | ||||
|  | ||||
| @ -257,7 +257,8 @@ they are placed in one group. We then construct a dependency graph __of these gr | ||||
| 3. We compute a topological order of the group graph. This helps us typecheck the dependencies | ||||
| of functions before checking the functions themselves. In our specific case, this would ensure | ||||
| we check `if` first, and only then move on to `testOne` and `testTwo`. The order of typechecking | ||||
| within a group does not matter. | ||||
| within a group does not matter, as long as we generalize only after typechecking all functions | ||||
| in a group. | ||||
| 4. We typecheck the function groups, and functions within them, following the above topological order. | ||||
| 
 | ||||
| To find the transitive closure of a graph, we can use [Warshall's Algorithm](https://cs.winona.edu/lin/cs440/ch08-2.pdf). | ||||
| @ -326,7 +327,7 @@ I think that we should create a C++ class that will represent our function | ||||
| dependency graph. Let's call it `function_graph`. I propose the following | ||||
| definition: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/graph.hpp" 12 51 >}} | ||||
| {{< codelines "C++" "compiler/10/graph.hpp" 12 52 >}} | ||||
| 
 | ||||
| There's a lot to unpack here. First of all, we create a type alias `function` that | ||||
| represents the label of a function in our graph. It is probably most convenient | ||||
| @ -363,16 +364,22 @@ of each group are computed, as well as their adjacency lists. | ||||
| * `generate_order` uses the indegrees and adjacency lists produced in the prior step | ||||
| to establish a topological order. | ||||
| 
 | ||||
| Finally, the `add_edge` method is used to add a new dependency between two functions, | ||||
| while the `compute_order` method uses the internal methods described above to convert | ||||
| Following these, we have three public function definitions: | ||||
| * `add_function` adds a vertex to the graph. Sometimes, a function does not | ||||
| reference any other functions, and would not appear in the list of edges. | ||||
| We will call this function to make sure that the function graph is aware | ||||
| of such functions. For convenience, this function returns the adjacency list | ||||
| of the added function. | ||||
| * `add_edge` adds a new dependency between two functions. | ||||
| * `compute_order` method uses the internal methods described above to convert | ||||
| the function dependency graph into a properly ordered list of groups. | ||||
| 
 | ||||
| Let's start by looking at how to implement the internal methods. `compute_transitive_edges` | ||||
| is a very straightforward implementation of Warshall's: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/graph.hpp" 53 71 >}} | ||||
| {{< codelines "C++" "compiler/10/graph.cpp" 3 21 >}} | ||||
| 
 | ||||
| Next is `create_groups`, for each function, we iterate over all other functions. | ||||
| Next is `create_groups`. For each function, we iterate over all other functions. | ||||
| If the other function is mutually dependent with the first function, we add | ||||
| it to the same group. In the outer loop, we skip over functions that have | ||||
| already been added to the group. This is because  | ||||
| @ -392,7 +399,7 @@ is an [equivalence relation](https://en.wikipedia.org/wiki/Equivalence_relation) | ||||
| which means that if we already added a function to a group, all its | ||||
| group members were also already visited and added. | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/graph.hpp" 73 94 >}} | ||||
| {{< codelines "C++" "compiler/10/graph.cpp" 23 44 >}} | ||||
| 
 | ||||
| Once groups have been created, we use their functions' edges | ||||
| to create edges for the groups themselves, using `create_edges`. | ||||
| @ -400,26 +407,362 @@ We avoid creating edges from a group to itself, to avoid | ||||
| unnecessary cycles. While constructing the edges, we also | ||||
| increment the relevant indegree counter. | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/graph.hpp" 96 113 >}} | ||||
| {{< codelines "C++" "compiler/10/graph.cpp" 46 63 >}} | ||||
| 
 | ||||
| Finally, we apply Kahn's algorithm to create a topological order | ||||
| in `generate_order`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/graph.hpp" 115 140 >}} | ||||
| {{< codelines "C++" "compiler/10/graph.cpp" 65 90 >}} | ||||
| 
 | ||||
| These four steps are used in `compute_order`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/graph.hpp" 152 160 >}} | ||||
| {{< codelines "C++" "compiler/10/graph.cpp" 106 114 >}} | ||||
| 
 | ||||
| Finally, `add_edge` straightforwardly adds an edge | ||||
| to the graph: | ||||
| Let's now look at the remaining two public definitions. | ||||
| First comes `add_function`, which creates an adjacency list for the | ||||
| function to be inserted (if one does not already exist), | ||||
| and returns a reference to the resulting list: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/graph.hpp" 142 150 >}} | ||||
| {{< codelines "C++" "compiler/10/graph.cpp" 92 99 >}} | ||||
| 
 | ||||
| We use this in `add_edge`, which straightforwardly creates an edge | ||||
| between two functions: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/graph.cpp" 101 104 >}} | ||||
| 
 | ||||
| With this, we can now properly order our typechecking. | ||||
| However, there are a few pieces of the puzzle missing. | ||||
| First of all, we need to actually insert function | ||||
| dependencies into the graph. Second, we need to think | ||||
| about how our existing language features and implementation | ||||
| will interact with polymorphism. Third, we have to come up | ||||
| with an implementation of polymorphic data types. | ||||
| However, we are just getting started: there are still | ||||
| numerous changes we need to make to get our compiler | ||||
| to behave as we desire. | ||||
| 
 | ||||
| The first change is the least relevant, but will help clean | ||||
| up our code base in the presence of polymorphism: we will | ||||
| get rid of `resolve`, in both definitions and AST nodes. | ||||
| The reasons for this are twofold. First, | ||||
| {{< sidenote "right" "case-type-note" "only the case expression node actually uses the type it stores." >}} | ||||
| Recall that <code>ast_case</code> needs this information to properly | ||||
| account for the changes to the stack from when data is unpacked. | ||||
| {{< /sidenote >}} This means that | ||||
| all the rest of the infrastructure we've written around | ||||
| preserving types is somewhat pointless. Second, when | ||||
| we call `resolve`, we'd now have to distinguish | ||||
| between type variables captured by "forall" and actual, | ||||
| undefined variables. That's a lot of wasted work! | ||||
| To replace the now-removed `type` field, | ||||
| we make `ast_case` include a new member, `input_type`, | ||||
| which stores the type of the thing between `case` and `of`. | ||||
| Since `ast_case` requires its type to be a data type | ||||
| at the time of typechecking, we no longer need to resolve anything. | ||||
| 
 | ||||
| Next, we need to work in a step geared towards finding function calls | ||||
| (to determine dependencies). As we have noted in [part 6]({{< relref "06_compiler_compilation.md" >}}), | ||||
| it's pretty easy to tell apart calls to global functions from "local" ones. If | ||||
| we see that a variable was previously bound (perhaps as a function argument, | ||||
| or by a pattern in a case expression), we know for sure that it is not a global | ||||
| function call. Otherwise, if the variable isn't bound anywhere in the function | ||||
| definition (it's a __free variable__), it must refer to a global function. Then, | ||||
| we can traverse the function body, storing variables that are bound (but only within | ||||
| their scope), and noting references to variables we haven't yet seen. To | ||||
| implement this, we can use a linked list, where each node refers to a particular | ||||
| scope, points to the scope enclosing it, and contains a list of variables... | ||||
| 
 | ||||
| Wait a minute, this is identical to `type_env`! There's no reason to reimplement all | ||||
| this. But then, another question arises: do we throw away the `type_env` generated | ||||
| by the dependency-searching step? It seems wasteful, since we will eventually | ||||
| repeat this same work. Rather, we'll re-use the same `type_env` instances | ||||
| in both this new step and `typecheck`. To do this, we will now store a pointer | ||||
| to a `type_env` in every AST node, and set this pointer during our first traversal | ||||
| of the tree. Indeed, this makes our `type_env` more like a | ||||
| [symbol table](https://en.wikipedia.org/wiki/Symbol_table). With this change, | ||||
| our new dependency-finding step will be implemented by the `find_free` function | ||||
| with the following signature: | ||||
| 
 | ||||
| ```C++ | ||||
| void ast::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into); | ||||
| ``` | ||||
| 
 | ||||
| Let's take a look at how this will be implemented. The simplest case (as usual) | ||||
| is `ast_int`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/ast.cpp" 16 18 >}} | ||||
| 
 | ||||
| In this case, we associate the `type_env` with the node, but don't do anything | ||||
| else: a number is not a variable. A more interesting case is `ast_lid`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/ast.cpp" 33 36 >}} | ||||
| 
 | ||||
| If a lowercase variable has not yet been bound to something, it's free, | ||||
| and we store it. Somewhat counterintuitively, `ast_uid` behaves | ||||
| differently: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/ast.cpp" 54 56 >}} | ||||
| 
 | ||||
| We don't allow uppercase variables to be bound to anything outside of data type | ||||
| declarations, so we don't care about uppercase free variables. Next up is | ||||
| `ast_binop`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/ast.cpp" 73 77 >}} | ||||
| 
 | ||||
| A binary operator can have free variables in the subexpressions on the left and on the right, and | ||||
| the above implementation reflects that. This is identical to the implementation of | ||||
| `ast_app`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/ast.cpp" 109 113 >}} | ||||
| 
 | ||||
| Finally, `ast_case` requires the most complicated function (as usual): | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/ast.cpp" 142 150 >}} | ||||
| 
 | ||||
| The `type_scope` function replaces the `type_env::scope` method, | ||||
| which cannot (without significant effort) operate on smart pointers. | ||||
| Importantly, we are using a new `pattern` method here, `insert_bindings`. This | ||||
| is because we split "introducing variables" and "typechecking variables" | ||||
| into two steps for patterns, as well. The implementation of `insert_bindings` | ||||
| for `pattern_var` is as follows: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/ast.cpp" 230 232 >}} | ||||
| 
 | ||||
| A variable pattern always introduces the variable it is made up of. | ||||
| On the other hand, the implementation for `pattern_constr` is as follows: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/ast.cpp" 245 249 >}} | ||||
| 
 | ||||
| All the variables of the pattern are placed into the environment. For now, we don't worry | ||||
| about arity; this is the job of typechecking. | ||||
| 
 | ||||
| These changes are reflected in all instances of our `typecheck` function. First of | ||||
| all, `typecheck` no longer needs to receive a `type_env` parameter, since each | ||||
| tree node has a `type_env_ptr`. Furthermore, `typecheck` should no longer call | ||||
| `bind`, since this was already done by `find_free`. For example, | ||||
| `ast_lid::typecheck` will now use `env::lookup`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/ast.cpp" 38 40 >}} | ||||
| 
 | ||||
| Don't worry about `instantiate` for now; that's coming up. Similarly to | ||||
| `ast_lid`, `ast_case::typecheck` will no longer introduce new bindings, | ||||
| and unify instead: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/ast.cpp" 152 169 >}} | ||||
| 
 | ||||
| The above implementation uses another new `pattern` method, `typecheck`. | ||||
| This method inherits the type checking functionality previously | ||||
| contained in `pattern::match`. Here's the implementation for `pattern_var`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/ast.cpp" 234 236 >}} | ||||
| 
 | ||||
| And here's the implementation for `pattern_constr`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/ast.cpp" 251 266 >}} | ||||
| 
 | ||||
| So far, so good. However, for all of this to reach the main typechecking | ||||
| code, not only `ast` subclasses need to be updated, but also | ||||
| the `definition`s. Here things get more complicated, because | ||||
| `definition_data` and `definition_defn` are growing more and more apart. | ||||
| Previously, we had two typechecking steps: `typecheck_first` (which registered | ||||
| function names into the environment) and `typecheck_second` (which performed | ||||
| the actual typechecking). However, not only are these names not informative, | ||||
| but the algorithms for typechecking the two types of definition will soon | ||||
| have different numbers of "major" steps. | ||||
| 
 | ||||
| Let's take a look at how we would typecheck data types. I propose the following | ||||
| steps: | ||||
| 
 | ||||
| 1. Iterate all declared data types, storing them into some kind of "known" list. | ||||
| 2. Iterate again, and for each constructor of a type, verify that | ||||
| it refers to "known" types. Add valid constructors to the global environment as functions. | ||||
| 
 | ||||
| We don't currently verify that types are "known"; A user could declare a list of `Floobs`, | ||||
| and never say what a `Floob` is. This isn't too big of an issue (good luck constructing | ||||
| a value of a non-existent type), but a mature compiler should prevent this from happening. | ||||
| 
 | ||||
| On the other hand, here are the steps for function definitions: | ||||
| 
 | ||||
| 1. Find the free variables of each function to create the ordered list of groups as described above. | ||||
| 2. Within each group, insert a general function type (like \\(a \\rightarrow b \\rightarrow c\\)) | ||||
| into the environment for each function. | ||||
| 3. Within each group (in the same pass) run typechecking | ||||
| (including polymorphism, using the rules as described above). | ||||
| 
 | ||||
| The two types of definitions further diverge when generating LLVM and compiling to G-machine instructions: | ||||
| data types immediately construct and insert their functions, and do not emit G-machine instructions, | ||||
| while functions generate G-machine instructions, declare prototypes, and emit LLVM in three distinct phases. | ||||
| Overall, there are virtually no similarities between the two data type declarations, and any inheritance | ||||
| of common functions starts to appear somewhat forced. To address this, we remove the `definition` class | ||||
| altogether, and sever the relationship between `definition_data` and `definition_defn`. The | ||||
| two now look as follows: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/definition.hpp" 23 67 >}} | ||||
| 
 | ||||
| In `definition_defn`, the functions are arranged as follows: | ||||
| 
 | ||||
| * `find_free` locates the free variables in the definition, populating | ||||
| the `free_variables` field and thereby finding edges for the function graph. | ||||
| * `insert_types` stores the type of the function into the global environment | ||||
| (a pointer to which is now stored as a field). | ||||
| * `typecheck` runs the standard typechecking steps. | ||||
| * `compile` generates G-machine instructions. | ||||
| * `declare_llvm` inserts LLVM function prototypes into the `llvm_context`. | ||||
| * `generate_llvm` converts G-machine instructions into LLVM IR. | ||||
| 
 | ||||
| In `definition_data`, the steps are significantly simpler: | ||||
| 
 | ||||
| * `insert_types` registers the type being declared as a "known" type. | ||||
| * `insert_constructors` inserts constructors (which are verified to | ||||
| refer to "known" types) into the global environment. | ||||
| * `generate_llvm` creates the LLVM functions (and their IR). | ||||
| 
 | ||||
| While the last three methods of `definition_defn` remain unchanged save | ||||
| for the name, the implementations of the first three see some updates. | ||||
| First is `find_free`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/definition.cpp" 12 26 >}} | ||||
| 
 | ||||
| First, to make sure we don't pollute the global scope | ||||
| with function parameters, `find_free` creates a new environment | ||||
| `var_env`. Then, it stores into this new environment the function parameters, | ||||
| ensuring that the parameters of a function aren't marked "free". | ||||
| Concurrently, `find_free` constructs the "general" function | ||||
| type (used by `insert_types`). Once all the arguments have been bound, `definition_defn::find_free` | ||||
| makes a call to `ast::find_free`, which does the work of actually | ||||
| finding free variables. | ||||
| 
 | ||||
| Since the function type is created by `find_free`, `insert_types` has very little to do: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/definition.cpp" 28 30 >}} | ||||
| 
 | ||||
| Finally, `typecheck`, which no longer has to bind the function | ||||
| arguments to new types, is also fairly simple: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/definition.cpp" 32 35 >}} | ||||
| 
 | ||||
| Let's move on to data types. In order to implement `definition_data::insert_types`, | ||||
| we need to store somewhere a list of all the valid type names. We do this | ||||
| by adding a new `type_names` field to `type_env`, and implementing the | ||||
| corresponding methods `lookup_type`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/type_env.cpp" 11 16 >}} | ||||
| 
 | ||||
| And `bind_type`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/type_env.cpp" 26 29 >}} | ||||
| 
 | ||||
| Note in the above snippets that we disallow redeclaring type names; | ||||
| declaring two data types (or other types) with the same name in | ||||
| our language will not be valid. In `insert_types`, we create a new | ||||
| data type and store it in the environment: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/definition.cpp" 59 62 >}} | ||||
| 
 | ||||
| We then update `insert_constructors` to query the environment | ||||
| when creating constructor types, rather than blindly using `new type_base(...)` | ||||
| like before: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/definition.cpp" 64 82 >}} | ||||
| 
 | ||||
| The separation of data and function definitions must be reconciled with code | ||||
| going back as far as the parser. While previously, we populated a single, global | ||||
| vector of definitions called `program`, we can no longer do that. Instead, we'll | ||||
| split our program into two maps, one for data types and one for functions. We | ||||
| use maps for convenience: since the groups generated by our function graph refer | ||||
| to functions by name, and it would be nice to quickly look up the data | ||||
| the names refer to. Rather than returning such maps, we change our semantic | ||||
| actions to simply insert new data into one of two global maps. Below | ||||
| is a snippet that includes all the changes: | ||||
| 
 | ||||
| {{< codelines "plaintext" "compiler/10/parser.y" 39 65 >}} | ||||
| 
 | ||||
| Note that `program` and `definitions` no longer have a type, and that `data` and `defn` | ||||
| have been changed to return `definition_data_ptr` and `definition_defn_ptr`, respectively. | ||||
| This necessitates changes to our main file. First of all, we declare the two new maps | ||||
| we hope to receive from Bison: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/main.cpp" 24 25 >}} | ||||
| 
 | ||||
| We then change all affected functions, which in many cases amounts to splitting the `program` parameter | ||||
| into `defs_data` and `defs_defn` parameters. We also make other, largely mechanical changes: code iterating | ||||
| over definitions now requires the use of `second` to refer to the value stored in the map, and LLVM | ||||
| generation now needs to separately process the two different types of definitions. The biggest change | ||||
| occurs in `typecheck_program`, which not only undergoes all the aforementioned modifications, but  | ||||
| is also updated to use topological ordering: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/main.cpp" 27 84 >}} | ||||
| 
 | ||||
| The above code uses the yet-unexplained `generalize` method. What's going on? | ||||
| 
 | ||||
| Observe that the __Var__ rule of the Hindley-Milner type system says that a variable \\(x\\) | ||||
| can have a __polytype__ in the environment \\(\\Gamma\\). Our `type_ptr` can only represent monotypes, | ||||
| so we must change what `type_env` associates with names to a new struct for representing polytypes, | ||||
| which we will call `type_scheme`. The `type_scheme` struct, just like the formal definition of | ||||
| a polytype, contains zero or more "forall"-quantified type variables, followed by a monotype which | ||||
| may use these variables: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/type.hpp" 17 27 >}} | ||||
| 
 | ||||
| The `type_scheme::instantiate` method is effectively an implementation of the special | ||||
| case of the __Inst__ rule, in which a polytype is specialized to a monotype. Since | ||||
| the __App__ and __Case__ rules only use monotypes, we'll be using this special case a lot. | ||||
| We implement this method as follows: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/type.cpp" 34 41 >}} | ||||
| 
 | ||||
| In the above code, if the type scheme represents a monotype (i.e., it has no quantified variables), | ||||
| we simply return that monotype. Otherwise, we must perform a substitution, replacing "forall"-quantified | ||||
| variables with fresh type parameters to be determined (we will never determine a single type for any of | ||||
| the quantified variables, since they are specifically meant to represent any type). | ||||
| We build a substitution map, which assigns to each quantified type variable a corresponding | ||||
| "fresh" type, and then create a new type with with the substitution applied using `substitute`, | ||||
| which is implemented as follows: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/type.cpp" 18 32 >}} | ||||
| 
 | ||||
| In principle, the function is fairly simple: if the current type is equivalent to a | ||||
| quantified type, we return the corresponding "fresh" type. If, on the other hand, | ||||
| the type represents a function, we perform a substitution in the function's input | ||||
| and output types. This method avoids creating new types where possible; a new type | ||||
| is only created if a function's input or output type is changed by a substitution | ||||
| (in which case, the function itself is changed by the substitution). In all | ||||
| other cases, substitution won't do anything, so we just return the original type. | ||||
| 
 | ||||
| Now it is a bit more clear why we saw `instantiate` in a code snippet some time ago; | ||||
| to compute a monotype for a variable reference, we must take into account the | ||||
| possibility that the variable has a polymorphic type, which needs to be specialized | ||||
| (potentially differently in every occurrence of the variable). | ||||
| 
 | ||||
| When talking about our new typechecking algorithm, we mentioned using __Gen__ to sprinkle | ||||
| polymorphism wherever possible. Whenever possible, __Gen__ will add free variables | ||||
| in a type to the "forall" quantifier at the front, making that type polymorphic.  | ||||
| We implement this using a new `generalize` added to the `type_env`, which (as per | ||||
| convention) generalizes the type of a given variable as much as possible: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/type_env.cpp" 31 41 >}} | ||||
| 
 | ||||
| For now, we disallow types to be generalized twice, and we naturally disallow generalizing | ||||
| types of nonexistent variables. If neither of those things occurs, we find all the free | ||||
| variables in the variable's current type using a new method called `type_mgr::find_free`, | ||||
| and put them into the "forall" quantifier. `type_mgr::find_free` is implemented as follows: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/10/type.cpp" 138 148 >}} | ||||
| 
 | ||||
| The above code is fairly straightforward; if a type is a variable that is not yet bound to anything, | ||||
| it is free; if the type is a function, we search for free variables in its input and output types; | ||||
| otherwise, the type has no free variables. | ||||
| 
 | ||||
| Finally, we have made the necessary changes. Let's test it out with the example from the beginning: | ||||
| 
 | ||||
| {{< rawblock "compiler/10/examples/if.txt" >}} | ||||
| 
 | ||||
| Running it, we get the output: | ||||
| 
 | ||||
| ``` | ||||
| 3 | ||||
| ``` | ||||
| 
 | ||||
| Hooray! | ||||
| 
 | ||||
| While this is a major success, we are not yet done. Although our functions can now  | ||||
| have polymorphic types, the same cannot be said for our data types! We want to | ||||
| have lists of integers __and__ lists of booleans, without having to duplicate any code! | ||||
| While this also falls into the category of polymorphism, this post has already gotten very long, | ||||
| and we will return to it in the near future. Once we're done with that, I still intend | ||||
| to go over `let/in` expressions, __lambda functions__, and __Input/Output__ together with | ||||
| __strings__. See you in these future posts! | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user