Compare commits
No commits in common. "2255543d94bc3ddf388ca6365ca315bce78e2f56" and "d7846e0b32feac44fe6e3b7e0e76822aa9a5c4d5" have entirely different histories.
2255543d94
...
d7846e0b32
@ -21,9 +21,7 @@ llvm_map_components_to_libnames(LLVM_LIBS core x86asmparser x86codegen)
|
||||
|
||||
# Create compiler executable
|
||||
add_executable(compiler
|
||||
definition.cpp definition.hpp
|
||||
parsed_type.cpp parsed_type.hpp
|
||||
ast.cpp ast.hpp
|
||||
ast.cpp ast.hpp definition.cpp
|
||||
llvm_context.cpp llvm_context.hpp
|
||||
type_env.cpp type_env.hpp
|
||||
env.cpp env.hpp
|
||||
|
@ -18,7 +18,7 @@ void ast_int::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>&
|
||||
}
|
||||
|
||||
type_ptr ast_int::typecheck(type_mgr& mgr) {
|
||||
return type_ptr(new type_app(env->lookup_type("Int")));
|
||||
return type_ptr(new type_base("Int"));
|
||||
}
|
||||
|
||||
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
@ -161,9 +161,7 @@ type_ptr ast_case::typecheck(type_mgr& mgr) {
|
||||
}
|
||||
|
||||
input_type = mgr.resolve(case_type, var);
|
||||
type_app* app_type;
|
||||
if(!(app_type = dynamic_cast<type_app*>(input_type.get())) ||
|
||||
!dynamic_cast<type_data*>(app_type->constructor.get())) {
|
||||
if(!dynamic_cast<type_data*>(input_type.get())) {
|
||||
throw type_error("attempting case analysis of non-data type");
|
||||
}
|
||||
|
||||
@ -171,8 +169,7 @@ type_ptr ast_case::typecheck(type_mgr& mgr) {
|
||||
}
|
||||
|
||||
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
type_app* app_type = dynamic_cast<type_app*>(input_type.get());
|
||||
type_data* type = dynamic_cast<type_data*>(app_type->constructor.get());
|
||||
type_data* type = dynamic_cast<type_data*>(input_type.get());
|
||||
|
||||
of->compile(env, into);
|
||||
into.push_back(instruction_ptr(new instruction_eval()));
|
||||
|
@ -56,38 +56,28 @@ void definition_defn::generate_llvm(llvm_context& ctx) {
|
||||
ctx.builder.CreateRetVoid();
|
||||
}
|
||||
|
||||
void definition_data::insert_types(type_env_ptr& env) {
|
||||
void definition_data::insert_types(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = env;
|
||||
env->bind_type(name, type_ptr(new type_data(name)));
|
||||
}
|
||||
|
||||
void definition_data::insert_constructors() const {
|
||||
type_ptr this_type_ptr = env->lookup_type(name);
|
||||
type_data* this_type = static_cast<type_data*>(this_type_ptr.get());
|
||||
type_ptr return_type = env->lookup_type(name);
|
||||
type_data* this_type = static_cast<type_data*>(return_type.get());
|
||||
int next_tag = 0;
|
||||
|
||||
std::set<std::string> var_set;
|
||||
type_app* return_app = new type_app(std::move(this_type_ptr));
|
||||
type_ptr return_type(return_app);
|
||||
for(auto& var : vars) {
|
||||
if(var_set.find(var) != var_set.end()) throw 0;
|
||||
var_set.insert(var);
|
||||
return_app->arguments.push_back(type_ptr(new type_var(var)));
|
||||
}
|
||||
|
||||
for(auto& constructor : constructors) {
|
||||
constructor->tag = next_tag;
|
||||
this_type->constructors[constructor->name] = { next_tag++ };
|
||||
|
||||
type_ptr full_type = return_type;
|
||||
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
|
||||
type_ptr type = (*it)->to_type(var_set, env);
|
||||
type_ptr type = env->lookup_type(*it);
|
||||
if(!type) throw 0;
|
||||
full_type = type_ptr(new type_arr(type, full_type));
|
||||
}
|
||||
|
||||
type_scheme_ptr full_scheme(new type_scheme(std::move(full_type)));
|
||||
full_scheme->forall.insert(full_scheme->forall.begin(), vars.begin(), vars.end());
|
||||
env->bind(constructor->name, full_scheme);
|
||||
env->bind(constructor->name, full_type);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4,7 +4,6 @@
|
||||
#include <set>
|
||||
#include "instruction.hpp"
|
||||
#include "llvm_context.hpp"
|
||||
#include "parsed_type.hpp"
|
||||
#include "type_env.hpp"
|
||||
|
||||
struct ast;
|
||||
@ -12,10 +11,10 @@ using ast_ptr = std::unique_ptr<ast>;
|
||||
|
||||
struct constructor {
|
||||
std::string name;
|
||||
std::vector<parsed_type_ptr> types;
|
||||
std::vector<std::string> types;
|
||||
int8_t tag;
|
||||
|
||||
constructor(std::string n, std::vector<parsed_type_ptr> ts)
|
||||
constructor(std::string n, std::vector<std::string> ts)
|
||||
: name(std::move(n)), types(std::move(ts)) {}
|
||||
};
|
||||
|
||||
@ -53,18 +52,14 @@ using definition_defn_ptr = std::unique_ptr<definition_defn>;
|
||||
|
||||
struct definition_data {
|
||||
std::string name;
|
||||
std::vector<std::string> vars;
|
||||
std::vector<constructor_ptr> constructors;
|
||||
|
||||
type_env_ptr env;
|
||||
|
||||
definition_data(
|
||||
std::string n,
|
||||
std::vector<std::string> vs,
|
||||
std::vector<constructor_ptr> cs)
|
||||
: name(std::move(n)), vars(std::move(vs)), constructors(std::move(cs)) {}
|
||||
definition_data(std::string n, std::vector<constructor_ptr> cs)
|
||||
: name(std::move(n)), constructors(std::move(cs)) {}
|
||||
|
||||
void insert_types(type_env_ptr& env);
|
||||
void insert_types(type_mgr& mgr, type_env_ptr& env);
|
||||
void insert_constructors() const;
|
||||
void generate_llvm(llvm_context& ctx);
|
||||
};
|
||||
|
@ -30,18 +30,17 @@ void typecheck_program(
|
||||
type_mgr& mgr, type_env_ptr& env) {
|
||||
type_ptr int_type = type_ptr(new type_base("Int"));
|
||||
env->bind_type("Int", int_type);
|
||||
type_ptr int_type_app = type_ptr(new type_app(int_type));
|
||||
|
||||
type_ptr binop_type = type_ptr(new type_arr(
|
||||
int_type_app,
|
||||
type_ptr(new type_arr(int_type_app, int_type_app))));
|
||||
int_type,
|
||||
type_ptr(new type_arr(int_type, int_type))));
|
||||
env->bind("+", binop_type);
|
||||
env->bind("-", binop_type);
|
||||
env->bind("*", binop_type);
|
||||
env->bind("/", binop_type);
|
||||
|
||||
for(auto& def_data : defs_data) {
|
||||
def_data.second->insert_types(env);
|
||||
def_data.second->insert_types(mgr, env);
|
||||
}
|
||||
for(auto& def_data : defs_data) {
|
||||
def_data.second->insert_constructors();
|
||||
|
@ -1,35 +0,0 @@
|
||||
#include "parsed_type.hpp"
|
||||
#include "type.hpp"
|
||||
#include "type_env.hpp"
|
||||
|
||||
type_ptr parsed_type_app::to_type(
|
||||
const std::set<std::string>& vars,
|
||||
const type_env& e) const {
|
||||
auto parent_type = e.lookup_type(name);
|
||||
if(parent_type == nullptr) throw 0;
|
||||
type_base* base_type;
|
||||
if(!(base_type = dynamic_cast<type_base*>(parent_type.get()))) throw 0;
|
||||
|
||||
type_app* new_app = new type_app(std::move(parent_type));
|
||||
type_ptr to_return(new_app);
|
||||
for(auto& arg : arguments) {
|
||||
new_app->arguments.push_back(arg->to_type(vars, e));
|
||||
}
|
||||
return to_return;
|
||||
}
|
||||
|
||||
type_ptr parsed_type_var::to_type(
|
||||
const std::set<std::string>& vars,
|
||||
const type_env& e) const {
|
||||
if(vars.find(var) == vars.end()) throw 0;
|
||||
return type_ptr(new type_var(var));
|
||||
}
|
||||
|
||||
|
||||
type_ptr parsed_type_arr::to_type(
|
||||
const std::set<std::string>& vars,
|
||||
const type_env& env) const {
|
||||
auto new_left = left->to_type(vars, env);
|
||||
auto new_right = right->to_type(vars, env);
|
||||
return type_ptr(new type_arr(std::move(new_left), std::move(new_right)));
|
||||
}
|
@ -1,43 +0,0 @@
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include "type_env.hpp"
|
||||
|
||||
struct parsed_type {
|
||||
virtual type_ptr to_type(
|
||||
const std::set<std::string>& vars,
|
||||
const type_env& env) const = 0;
|
||||
};
|
||||
|
||||
using parsed_type_ptr = std::unique_ptr<parsed_type>;
|
||||
|
||||
struct parsed_type_app : parsed_type {
|
||||
std::string name;
|
||||
std::vector<parsed_type_ptr> arguments;
|
||||
|
||||
parsed_type_app(
|
||||
std::string n,
|
||||
std::vector<parsed_type_ptr> as)
|
||||
: name(std::move(n)), arguments(std::move(as)) {}
|
||||
|
||||
type_ptr to_type(const std::set<std::string>& vars, const type_env& env) const;
|
||||
};
|
||||
|
||||
struct parsed_type_var : parsed_type {
|
||||
std::string var;
|
||||
|
||||
parsed_type_var(std::string v) : var(std::move(v)) {}
|
||||
|
||||
type_ptr to_type(const std::set<std::string>& vars, const type_env& env) const;
|
||||
};
|
||||
|
||||
struct parsed_type_arr : parsed_type {
|
||||
parsed_type_ptr left;
|
||||
parsed_type_ptr right;
|
||||
|
||||
parsed_type_arr(parsed_type_ptr l, parsed_type_ptr r)
|
||||
: left(std::move(l)), right(std::move(r)) {}
|
||||
|
||||
type_ptr to_type(const std::set<std::string>& vars, const type_env& env) const;
|
||||
};
|
@ -5,7 +5,6 @@
|
||||
#include "ast.hpp"
|
||||
#include "definition.hpp"
|
||||
#include "parser.hpp"
|
||||
#include "parsed_type.hpp"
|
||||
|
||||
std::map<std::string, definition_data_ptr> defs_data;
|
||||
std::map<std::string, definition_defn_ptr> defs_defn;
|
||||
@ -37,11 +36,9 @@ extern yy::parser::symbol_type yylex();
|
||||
%define api.value.type variant
|
||||
%define api.token.constructor
|
||||
|
||||
%type <std::vector<std::string>> lowercaseParams
|
||||
%type <std::vector<std::string>> lowercaseParams uppercaseParams
|
||||
%type <std::vector<branch_ptr>> branches
|
||||
%type <std::vector<constructor_ptr>> constructors
|
||||
%type <std::vector<parsed_type_ptr>> typeList
|
||||
%type <parsed_type_ptr> type nullaryType typeListElement
|
||||
%type <ast_ptr> aAdd aMul case app appBase
|
||||
%type <definition_data_ptr> data
|
||||
%type <definition_defn_ptr> defn
|
||||
@ -78,6 +75,11 @@ lowercaseParams
|
||||
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||
;
|
||||
|
||||
uppercaseParams
|
||||
: %empty { $$ = std::vector<std::string>(); }
|
||||
| uppercaseParams UID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||
;
|
||||
|
||||
aAdd
|
||||
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); }
|
||||
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); }
|
||||
@ -125,8 +127,8 @@ pattern
|
||||
;
|
||||
|
||||
data
|
||||
: DATA UID lowercaseParams EQUAL OCURLY constructors CCURLY
|
||||
{ $$ = definition_data_ptr(new definition_data(std::move($2), std::move($3), std::move($6))); }
|
||||
: DATA UID EQUAL OCURLY constructors CCURLY
|
||||
{ $$ = definition_data_ptr(new definition_data(std::move($2), std::move($5))); }
|
||||
;
|
||||
|
||||
constructors
|
||||
@ -136,28 +138,7 @@ constructors
|
||||
;
|
||||
|
||||
constructor
|
||||
: UID typeList
|
||||
: UID uppercaseParams
|
||||
{ $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); }
|
||||
;
|
||||
|
||||
type
|
||||
: nullaryType ARROW type { $$ = parsed_type_ptr(new parsed_type_arr(std::move($1), std::move($3))); }
|
||||
| nullaryType { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
nullaryType
|
||||
: UID typeList { $$ = parsed_type_ptr(new parsed_type_app(std::move($1), std::move($2))); }
|
||||
| LID { $$ = parsed_type_ptr(new parsed_type_var(std::move($1))); }
|
||||
| OPAREN type CPAREN { $$ = std::move($2); }
|
||||
;
|
||||
|
||||
typeListElement
|
||||
: OPAREN type CPAREN { $$ = std::move($2); }
|
||||
| UID { $$ = parsed_type_ptr(new parsed_type_app(std::move($1), {})); }
|
||||
| LID { $$ = parsed_type_ptr(new parsed_type_var(std::move($1))); }
|
||||
;
|
||||
|
||||
typeList
|
||||
: %empty { $$ = std::vector<parsed_type_ptr>(); }
|
||||
| typeList typeListElement { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||
;
|
||||
|
@ -2,7 +2,6 @@
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include "error.hpp"
|
||||
|
||||
void type_scheme::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
@ -16,13 +15,29 @@ void type_scheme::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
monotype->print(mgr, to);
|
||||
}
|
||||
|
||||
type_ptr substitute(const type_mgr& mgr, const std::map<std::string, type_ptr>& subst, const type_ptr& t) {
|
||||
type_var* var;
|
||||
type_ptr resolved = mgr.resolve(t, var);
|
||||
if(var) {
|
||||
auto subst_it = subst.find(var->name);
|
||||
if(subst_it == subst.end()) return resolved;
|
||||
return subst_it->second;
|
||||
} else if(type_arr* arr = dynamic_cast<type_arr*>(t.get())) {
|
||||
auto left_result = substitute(mgr, subst, arr->left);
|
||||
auto right_result = substitute(mgr, subst, arr->right);
|
||||
if(left_result == arr->left && right_result == arr->right) return t;
|
||||
return type_ptr(new type_arr(left_result, right_result));
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
type_ptr type_scheme::instantiate(type_mgr& mgr) const {
|
||||
if(forall.size() == 0) return monotype;
|
||||
std::map<std::string, type_ptr> subst;
|
||||
for(auto& var : forall) {
|
||||
subst[var] = mgr.new_type();
|
||||
}
|
||||
return mgr.substitute(subst, monotype);
|
||||
return substitute(mgr, subst, monotype);
|
||||
}
|
||||
|
||||
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
@ -45,15 +60,6 @@ void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
to << ")";
|
||||
}
|
||||
|
||||
void type_app::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
constructor->print(mgr, to);
|
||||
to << "* ";
|
||||
for(auto& arg : arguments) {
|
||||
to << " ";
|
||||
arg->print(mgr, to);
|
||||
}
|
||||
}
|
||||
|
||||
std::string type_mgr::new_type_name() {
|
||||
int temp = last_id++;
|
||||
std::string str = "";
|
||||
@ -93,10 +99,12 @@ type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
|
||||
}
|
||||
|
||||
void type_mgr::unify(type_ptr l, type_ptr r) {
|
||||
type_var *lvar, *rvar;
|
||||
type_arr *larr, *rarr;
|
||||
type_base *lid, *rid;
|
||||
type_app *lapp, *rapp;
|
||||
type_var* lvar;
|
||||
type_var* rvar;
|
||||
type_arr* larr;
|
||||
type_arr* rarr;
|
||||
type_base* lid;
|
||||
type_base* rid;
|
||||
|
||||
l = resolve(l, lvar);
|
||||
r = resolve(r, rvar);
|
||||
@ -115,55 +123,11 @@ void type_mgr::unify(type_ptr l, type_ptr r) {
|
||||
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
|
||||
(rid = dynamic_cast<type_base*>(r.get()))) {
|
||||
if(lid->name == rid->name) return;
|
||||
} else if((lapp = dynamic_cast<type_app*>(l.get())) &&
|
||||
(rapp = dynamic_cast<type_app*>(r.get()))) {
|
||||
unify(lapp->constructor, rapp->constructor);
|
||||
auto left_it = lapp->arguments.begin();
|
||||
auto right_it = rapp->arguments.begin();
|
||||
while(left_it != lapp->arguments.end() &&
|
||||
right_it != rapp->arguments.end()) {
|
||||
unify(*left_it, *right_it);
|
||||
left_it++, right_it++;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
throw unification_error(l, r);
|
||||
}
|
||||
|
||||
type_ptr type_mgr::substitute(const std::map<std::string, type_ptr>& subst, const type_ptr& t) const {
|
||||
type_ptr temp = t;
|
||||
while(type_var* var = dynamic_cast<type_var*>(temp.get())) {
|
||||
auto subst_it = subst.find(var->name);
|
||||
if(subst_it != subst.end()) return subst_it->second;
|
||||
auto var_it = types.find(var->name);
|
||||
if(var_it == types.end()) return t;
|
||||
temp = var_it->second;
|
||||
}
|
||||
|
||||
if(type_arr* arr = dynamic_cast<type_arr*>(temp.get())) {
|
||||
auto left_result = substitute(subst, arr->left);
|
||||
auto right_result = substitute(subst, arr->right);
|
||||
if(left_result == arr->left && right_result == arr->right) return t;
|
||||
return type_ptr(new type_arr(left_result, right_result));
|
||||
} else if(type_app* app = dynamic_cast<type_app*>(temp.get())) {
|
||||
auto constructor_result = substitute(subst, app->constructor);
|
||||
bool arg_changed = false;
|
||||
std::vector<type_ptr> new_args;
|
||||
for(auto& arg : app->arguments) {
|
||||
auto arg_result = substitute(subst, arg);
|
||||
arg_changed |= arg_result != arg;
|
||||
new_args.push_back(std::move(arg_result));
|
||||
}
|
||||
|
||||
if(constructor_result == app->constructor && !arg_changed) return t;
|
||||
type_app* new_app = new type_app(std::move(constructor_result));
|
||||
std::swap(new_app->arguments, new_args);
|
||||
return type_ptr(new_app);
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
void type_mgr::bind(const std::string& s, type_ptr t) {
|
||||
type_var* other = dynamic_cast<type_var*>(t.get());
|
||||
|
||||
@ -180,8 +144,5 @@ void type_mgr::find_free(const type_ptr& t, std::set<std::string>& into) const {
|
||||
} else if(type_arr* arr = dynamic_cast<type_arr*>(resolved.get())) {
|
||||
find_free(arr->left, into);
|
||||
find_free(arr->right, into);
|
||||
} else if(type_app* app = dynamic_cast<type_app*>(resolved.get())) {
|
||||
find_free(app->constructor, into);
|
||||
for(auto& arg : app->arguments) find_free(arg, into);
|
||||
}
|
||||
}
|
||||
|
@ -38,10 +38,9 @@ struct type_var : public type {
|
||||
|
||||
struct type_base : public type {
|
||||
std::string name;
|
||||
int32_t arity;
|
||||
|
||||
type_base(std::string n, int32_t a = 0)
|
||||
: name(std::move(n)), arity(a) {}
|
||||
type_base(std::string n)
|
||||
: name(std::move(n)) {}
|
||||
|
||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||
};
|
||||
@ -67,16 +66,6 @@ struct type_arr : public type {
|
||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||
};
|
||||
|
||||
struct type_app : public type {
|
||||
type_ptr constructor;
|
||||
std::vector<type_ptr> arguments;
|
||||
|
||||
type_app(type_ptr c)
|
||||
: constructor(std::move(c)) {}
|
||||
|
||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||
};
|
||||
|
||||
struct type_mgr {
|
||||
int last_id = 0;
|
||||
std::map<std::string, type_ptr> types;
|
||||
@ -86,9 +75,6 @@ struct type_mgr {
|
||||
type_ptr new_arrow_type();
|
||||
|
||||
void unify(type_ptr l, type_ptr r);
|
||||
type_ptr substitute(
|
||||
const std::map<std::string, type_ptr>& subst,
|
||||
const type_ptr& t) const;
|
||||
type_ptr resolve(type_ptr t, type_var*& var) const;
|
||||
void bind(const std::string& s, type_ptr t);
|
||||
void find_free(const type_ptr& t, std::set<std::string>& into) const;
|
||||
|
@ -104,117 +104,3 @@ In effect, they take zero arguments and produce types (themselves).
|
||||
|
||||
Polytypes (type schemes) in our system can be all of the above, but may also include a "forall"
|
||||
quantifier at the front, generalizing the type (like \\(\\forall a \\; . \\; \\text{List} \\; a \\rightarrow \\text{Int}\\)).
|
||||
|
||||
Let's start implementing all of this. Why don't we start with the change to the syntax of our language?
|
||||
We have complicated the situation quite a bit. Let's take a look at the _old_ grammar
|
||||
for data type declarations (this is going back as far as [part 2]({{< relref "02_compiler_parsing.md" >}})).
|
||||
Here, \\(L\_D\\) is the nonterminal for the things that go between the curly braces of a data type
|
||||
declaration, \\(D\\) is the nonterminal representing a single constructor definition,
|
||||
and \\(L\_U\\) is a list of zero or more uppercase variable names:
|
||||
|
||||
{{< latex >}}
|
||||
\begin{aligned}
|
||||
L_D & \rightarrow D \; , \; L_D \\
|
||||
L_D & \rightarrow D \\
|
||||
D & \rightarrow \text{upperVar} \; L_U \\
|
||||
L_U & \rightarrow \text{upperVar} \; L_U \\
|
||||
L_U & \rightarrow \epsilon
|
||||
\end{aligned}
|
||||
{{< /latex >}}
|
||||
|
||||
This grammar was actually too simple even for our monomorphically typed language!
|
||||
Since functions are not represented using a single uppercase variable, it wasn't possible for us
|
||||
to define constructors that accept as arguments anything other than integers and user-defined
|
||||
data types. Now, we also need to modify this grammar to allow for constructor applications (which can be nested!)
|
||||
To do so, we will define a new nonterminal, \\(Y\\), for types:
|
||||
|
||||
{{< latex >}}
|
||||
\begin{aligned}
|
||||
Y & \rightarrow N \; ``\rightarrow" Y \\
|
||||
Y & \rightarrow N
|
||||
\end{aligned}
|
||||
{{< /latex >}}
|
||||
|
||||
We make it right-recursive (because the \\(\\rightarrow\\) operator is right-associative). Next, we define
|
||||
a nonterminal for all types _except_ those constructed with the arrow, \\(N\\).
|
||||
|
||||
{{< latex >}}
|
||||
\begin{aligned}
|
||||
N & \rightarrow \text{upperVar} \; L_Y \\
|
||||
N & \rightarrow \text{typeVar} \\
|
||||
N & \rightarrow ( Y )
|
||||
\end{aligned}
|
||||
{{< /latex >}}
|
||||
|
||||
The first of the above rules allows a type to be a constructor applied to zero or more arguments
|
||||
(generated by \\(L\_Y\\)). The second rule allows a type to be a placeholder type variable. Finally,
|
||||
the third rule allows for any type (including functions, again) to occur between parentheses.
|
||||
This is so that higher-order functions, like \\((a \rightarrow b) \rightarrow a \rightarrow a \\),
|
||||
can be represented.
|
||||
|
||||
Unfortunately, the definition of \\(L\_Y\\) is not as straightforward as we imagine. We could define
|
||||
it as just a list of \\(Y\\) nonterminals, but this would make the grammar ambigous: something
|
||||
like `List Maybe Int` could be interpreted as "`List`, applied to types `Maybe` and `Int`", and
|
||||
"`List`, applied to type `Maybe Int`". To avoid this, we define a "type list element" \\(Y'\\),
|
||||
which does not take arguments:
|
||||
|
||||
{{< latex >}}
|
||||
\begin{aligned}
|
||||
Y' & \rightarrow \text{upperVar} \\
|
||||
Y' & \rightarrow \text{lowerVar} \\
|
||||
Y' & \rightarrow ( Y )
|
||||
\end{aligned}
|
||||
{{< /latex >}}
|
||||
|
||||
We then make \\(L\_Y\\) a list of \\(Y'\\):
|
||||
|
||||
{{< latex >}}
|
||||
\begin{aligned}
|
||||
L_Y & \rightarrow Y' \; L_Y \\
|
||||
L_Y & \rightarrow \epsilon
|
||||
\end{aligned}
|
||||
{{< /latex >}}
|
||||
|
||||
Finally, we update the rules for the data type declaration, as well as for a single
|
||||
constructor:
|
||||
|
||||
{{< latex >}}
|
||||
\begin{aligned}
|
||||
T & \rightarrow \text{data} \; \text{upperVar} \; L_T = \{ L_D \} \\
|
||||
D & \rightarrow \text{upperVar} \; L_Y \\
|
||||
\end{aligned}
|
||||
{{< /latex >}}
|
||||
|
||||
Now that we have a grammar for all these things, we have to implement
|
||||
the corresponding data structures. We define a new family of structs,
|
||||
extending `parsed_type`, which represent types as they are
|
||||
received from the parser. These differ from regular types in that they
|
||||
do not require that the types they represent are valid; validating
|
||||
types requires two passes, which is a luxury we do not have when
|
||||
parsing. We can define them as follows:
|
||||
|
||||
{{< codeblock "C++" "compiler/11/parsed_type.hpp" >}}
|
||||
|
||||
We define the conversion function `to_type`, which requires
|
||||
a set of type variables quantified in the given type, and
|
||||
the environment in which to look up the arities of various
|
||||
type constructors. The implementation is as follows:
|
||||
|
||||
{{< codeblock "C++" "compiler/11/parsed_type.cpp" >}}
|
||||
|
||||
With this definition in hand, we can now update the grammar in our Bison file.
|
||||
First things first, we'll add the type parameters to the data type definition:
|
||||
|
||||
{{< codelines "plaintext" "compiler/11/parser.y" 127 130 >}}
|
||||
|
||||
Next, we add the new grammar rules we came up with:
|
||||
|
||||
{{< codelines "plaintext" "compiler/11/parser.y" 138 163 >}}
|
||||
|
||||
Finally, we define the types for these new rules at the top of the file:
|
||||
|
||||
{{< codelines "plaintext" "compiler/11/parser.y" 43 44 >}}
|
||||
|
||||
{{< todo >}}
|
||||
Nullary is not the right word.
|
||||
{{< /todo >}}
|
||||
|
Loading…
Reference in New Issue
Block a user