Compare commits
3 Commits
d9c151d774
...
d90993a93c
Author | SHA1 | Date | |
---|---|---|---|
d90993a93c | |||
7e9bd95846 | |||
d3d73e0e9c |
|
@ -6,6 +6,20 @@ void print_indent(int n, std::ostream& to) {
|
|||
while(n--) to << " ";
|
||||
}
|
||||
|
||||
type_ptr ast::typecheck_common(type_mgr& mgr, const type_env& env) {
|
||||
node_type = typecheck(mgr, env);
|
||||
return node_type;
|
||||
}
|
||||
|
||||
void ast::resolve_common(const type_mgr& mgr) {
|
||||
type_var* var;
|
||||
type_ptr resolved_type = mgr.resolve(node_type, var);
|
||||
if(var) throw type_error("ambiguously typed program");
|
||||
|
||||
resolve(mgr);
|
||||
node_type = std::move(resolved_type);
|
||||
}
|
||||
|
||||
void ast_int::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "INT: " << value << std::endl;
|
||||
|
@ -15,6 +29,10 @@ type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) const {
|
|||
return type_ptr(new type_base("Int"));
|
||||
}
|
||||
|
||||
void ast_int::resolve(const type_mgr& mgr) const {
|
||||
|
||||
}
|
||||
|
||||
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
into.push_back(instruction_ptr(new instruction_pushint(value)));
|
||||
}
|
||||
|
@ -28,8 +46,15 @@ type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) const {
|
|||
return env.lookup(id);
|
||||
}
|
||||
|
||||
void ast_lid::resolve(const type_mgr& mgr) const {
|
||||
|
||||
}
|
||||
|
||||
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
|
||||
into.push_back(instruction_ptr(
|
||||
env->has_variable(id) ?
|
||||
(instruction*) new instruction_push(env->get_offset(id)) :
|
||||
(instruction*) new instruction_pushglobal(id)));
|
||||
}
|
||||
|
||||
void ast_uid::print(int indent, std::ostream& to) const {
|
||||
|
@ -41,11 +66,12 @@ type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) const {
|
|||
return env.lookup(id);
|
||||
}
|
||||
|
||||
void ast_uid::resolve(const type_mgr& mgr) const {
|
||||
|
||||
}
|
||||
|
||||
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
into.push_back(instruction_ptr(
|
||||
env->has_variable(id) ?
|
||||
(instruction*) new instruction_push(env->get_offset(id)) :
|
||||
(instruction*) new instruction_pushglobal(id)));
|
||||
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
|
||||
}
|
||||
|
||||
void ast_binop::print(int indent, std::ostream& to) const {
|
||||
|
@ -56,8 +82,8 @@ void ast_binop::print(int indent, std::ostream& to) const {
|
|||
}
|
||||
|
||||
type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||
type_ptr ltype = left->typecheck(mgr, env);
|
||||
type_ptr rtype = right->typecheck(mgr, env);
|
||||
type_ptr ltype = left->typecheck_common(mgr, env);
|
||||
type_ptr rtype = right->typecheck_common(mgr, env);
|
||||
type_ptr ftype = env.lookup(op_name(op));
|
||||
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
|
||||
|
||||
|
@ -69,9 +95,15 @@ type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
|
|||
return return_type;
|
||||
}
|
||||
|
||||
void ast_binop::resolve(const type_mgr& mgr) const {
|
||||
left->resolve_common(mgr);
|
||||
right->resolve_common(mgr);
|
||||
}
|
||||
|
||||
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
left->compile(env, into);
|
||||
right->compile(env, into);
|
||||
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||
|
||||
into.push_back(instruction_ptr(new instruction_pushglobal(op_name(op))));
|
||||
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||
|
@ -85,8 +117,8 @@ void ast_app::print(int indent, std::ostream& to) const {
|
|||
}
|
||||
|
||||
type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||
type_ptr ltype = left->typecheck(mgr, env);
|
||||
type_ptr rtype = right->typecheck(mgr, env);
|
||||
type_ptr ltype = left->typecheck_common(mgr, env);
|
||||
type_ptr rtype = right->typecheck_common(mgr, env);
|
||||
|
||||
type_ptr return_type = mgr.new_type();
|
||||
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
|
||||
|
@ -94,9 +126,14 @@ type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
|
|||
return return_type;
|
||||
}
|
||||
|
||||
void ast_app::resolve(const type_mgr& mgr) const {
|
||||
left->resolve_common(mgr);
|
||||
right->resolve_common(mgr);
|
||||
}
|
||||
|
||||
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
left->compile(env, into);
|
||||
right->compile(env, into);
|
||||
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||
}
|
||||
|
||||
|
@ -113,25 +150,77 @@ void ast_case::print(int indent, std::ostream& to) const {
|
|||
|
||||
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||
type_var* var;
|
||||
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var);
|
||||
type_ptr case_type = mgr.resolve(of->typecheck_common(mgr, env), var);
|
||||
type_ptr branch_type = mgr.new_type();
|
||||
|
||||
if(!dynamic_cast<type_base*>(case_type.get())) {
|
||||
throw type_error("attempting case analysis of non-data type");
|
||||
}
|
||||
|
||||
for(auto& branch : branches) {
|
||||
type_env new_env = env.scope();
|
||||
branch->pat->match(case_type, mgr, new_env);
|
||||
type_ptr curr_branch_type = branch->expr->typecheck(mgr, new_env);
|
||||
type_ptr curr_branch_type = branch->expr->typecheck_common(mgr, new_env);
|
||||
mgr.unify(branch_type, curr_branch_type);
|
||||
}
|
||||
|
||||
case_type = mgr.resolve(case_type, var);
|
||||
if(!dynamic_cast<type_data*>(case_type.get())) {
|
||||
throw type_error("attempting case analysis of non-data type");
|
||||
}
|
||||
|
||||
return branch_type;
|
||||
}
|
||||
|
||||
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
void ast_case::resolve(const type_mgr& mgr) const {
|
||||
of->resolve_common(mgr);
|
||||
for(auto& branch : branches) {
|
||||
branch->expr->resolve_common(mgr);
|
||||
}
|
||||
}
|
||||
|
||||
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
type_data* type = dynamic_cast<type_data*>(node_type.get());
|
||||
|
||||
of->compile(env, into);
|
||||
into.push_back(instruction_ptr(new instruction_eval()));
|
||||
|
||||
instruction_jump* jump_instruction = new instruction_jump();
|
||||
into.push_back(instruction_ptr(jump_instruction));
|
||||
for(auto& branch : branches) {
|
||||
std::vector<instruction_ptr> branch_instructions;
|
||||
pattern_var* vpat;
|
||||
pattern_constr* cpat;
|
||||
|
||||
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
|
||||
branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions);
|
||||
|
||||
for(auto& constr_pair : type->constructors) {
|
||||
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) !=
|
||||
jump_instruction->tag_mappings.end())
|
||||
break;
|
||||
|
||||
jump_instruction->tag_mappings[constr_pair.second.tag] =
|
||||
jump_instruction->branches.size();
|
||||
}
|
||||
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
|
||||
branch_instructions.push_back(instruction_ptr(new instruction_split()));
|
||||
branch->expr->compile(env_ptr(new env_offset(cpat->params.size(), env)),
|
||||
branch_instructions);
|
||||
|
||||
int new_tag = type->constructors[cpat->constr].tag;
|
||||
if(jump_instruction->tag_mappings.find(new_tag) !=
|
||||
jump_instruction->tag_mappings.end())
|
||||
throw type_error("technically not a type error: duplicate pattern");
|
||||
|
||||
jump_instruction->tag_mappings[new_tag] =
|
||||
jump_instruction->branches.size();
|
||||
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||
}
|
||||
}
|
||||
|
||||
for(auto& constr_pair : type->constructors) {
|
||||
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) ==
|
||||
jump_instruction->tag_mappings.end())
|
||||
throw type_error("non-total pattern");
|
||||
}
|
||||
}
|
||||
|
||||
void pattern_var::print(std::ostream& to) const {
|
||||
|
|
|
@ -8,12 +8,18 @@
|
|||
#include "env.hpp"
|
||||
|
||||
struct ast {
|
||||
type_ptr node_type;
|
||||
|
||||
virtual ~ast() = default;
|
||||
|
||||
virtual void print(int indent, std::ostream& to) const = 0;
|
||||
virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const = 0;
|
||||
virtual void resolve(const type_mgr& mgr) const = 0;
|
||||
virtual void compile(const env_ptr& env,
|
||||
std::vector<instruction_ptr>& into) const = 0;
|
||||
|
||||
type_ptr typecheck_common(type_mgr& mgr, const type_env& env);
|
||||
void resolve_common(const type_mgr& mgr);
|
||||
};
|
||||
|
||||
using ast_ptr = std::unique_ptr<ast>;
|
||||
|
@ -52,6 +58,7 @@ struct definition {
|
|||
|
||||
virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0;
|
||||
virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0;
|
||||
virtual void resolve(const type_mgr& mgr) const = 0;
|
||||
};
|
||||
|
||||
using definition_ptr = std::unique_ptr<definition>;
|
||||
|
@ -64,6 +71,7 @@ struct ast_int : public ast {
|
|||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||
void resolve(const type_mgr& mgr) const;
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
|
@ -75,6 +83,7 @@ struct ast_lid : public ast {
|
|||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||
void resolve(const type_mgr& mgr) const;
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
|
@ -86,6 +95,7 @@ struct ast_uid : public ast {
|
|||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||
void resolve(const type_mgr& mgr) const;
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
|
@ -99,6 +109,7 @@ struct ast_binop : public ast {
|
|||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||
void resolve(const type_mgr& mgr) const;
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
|
@ -111,6 +122,7 @@ struct ast_app : public ast {
|
|||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||
void resolve(const type_mgr& mgr) const;
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
|
@ -123,6 +135,7 @@ struct ast_case : public ast {
|
|||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||
void resolve(const type_mgr& mgr) const;
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
|
@ -162,6 +175,7 @@ struct definition_defn : public definition {
|
|||
|
||||
void typecheck_first(type_mgr& mgr, type_env& env);
|
||||
void typecheck_second(type_mgr& mgr, const type_env& env) const;
|
||||
void resolve(const type_mgr& mgr) const;
|
||||
};
|
||||
|
||||
struct definition_data : public definition {
|
||||
|
@ -173,4 +187,5 @@ struct definition_data : public definition {
|
|||
|
||||
void typecheck_first(type_mgr& mgr, type_env& env);
|
||||
void typecheck_second(type_mgr& mgr, const type_env& env) const;
|
||||
void resolve(const type_mgr& mgr) const;
|
||||
};
|
||||
|
|
|
@ -24,16 +24,23 @@ void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const
|
|||
type_it++;
|
||||
}
|
||||
|
||||
type_ptr body_type = body->typecheck(mgr, new_env);
|
||||
type_ptr body_type = body->typecheck_common(mgr, new_env);
|
||||
mgr.unify(return_type, body_type);
|
||||
}
|
||||
|
||||
void definition_defn::resolve(const type_mgr& mgr) const {
|
||||
body->resolve_common(mgr);
|
||||
}
|
||||
|
||||
void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
|
||||
type_ptr return_type = type_ptr(new type_base(name));
|
||||
type_data* this_type = new type_data(name);
|
||||
type_ptr return_type = type_ptr(this_type);
|
||||
int next_tag = 0;
|
||||
|
||||
for(auto& constructor : constructors) {
|
||||
type_ptr full_type = return_type;
|
||||
this_type->constructors[constructor->name] = { next_tag++ };
|
||||
|
||||
type_ptr full_type = return_type;
|
||||
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
|
||||
type_ptr type = type_ptr(new type_base(*it));
|
||||
full_type = type_ptr(new type_arr(type, full_type));
|
||||
|
@ -46,3 +53,8 @@ void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
|
|||
void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const {
|
||||
// Nothing
|
||||
}
|
||||
|
||||
void definition_data::resolve(const type_mgr& mgr) const {
|
||||
// Nothing
|
||||
}
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ struct env {
|
|||
|
||||
using env_ptr = std::shared_ptr<env>;
|
||||
|
||||
struct env_var {
|
||||
struct env_var : public env {
|
||||
std::string name;
|
||||
env_ptr parent;
|
||||
|
||||
|
@ -22,7 +22,7 @@ struct env_var {
|
|||
bool has_variable(const std::string& name) const;
|
||||
};
|
||||
|
||||
struct env_offset {
|
||||
struct env_offset : public env {
|
||||
int offset;
|
||||
env_ptr parent;
|
||||
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
#include <string>
|
||||
#include <memory>
|
||||
#include "binop.hpp"
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
struct instruction {
|
||||
virtual ~instruction() = default;
|
||||
|
@ -53,6 +55,11 @@ struct instruction_split : public instruction {
|
|||
|
||||
};
|
||||
|
||||
struct instruction_jump : public instruction {
|
||||
std::vector<std::vector<instruction_ptr>> branches;
|
||||
std::map<int, int> tag_mappings;
|
||||
};
|
||||
|
||||
struct instruction_slide : public instruction {
|
||||
int offset;
|
||||
|
||||
|
|
|
@ -36,6 +36,10 @@ void typecheck_program(
|
|||
pair.second->print(mgr, std::cout);
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
for(auto& def : prog) {
|
||||
def->resolve(mgr);
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
|
|
|
@ -44,7 +44,7 @@ type_ptr type_mgr::new_arrow_type() {
|
|||
return type_ptr(new type_arr(new_type(), new_type()));
|
||||
}
|
||||
|
||||
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) {
|
||||
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
|
||||
type_var* cast;
|
||||
|
||||
var = nullptr;
|
||||
|
|
|
@ -30,6 +30,17 @@ struct type_base : public type {
|
|||
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||
};
|
||||
|
||||
struct type_data : public type_base {
|
||||
struct constructor {
|
||||
int tag;
|
||||
};
|
||||
|
||||
std::map<std::string, constructor> constructors;
|
||||
|
||||
type_data(std::string n)
|
||||
: type_base(std::move(n)) {}
|
||||
};
|
||||
|
||||
struct type_arr : public type {
|
||||
type_ptr left;
|
||||
type_ptr right;
|
||||
|
@ -49,6 +60,6 @@ struct type_mgr {
|
|||
type_ptr new_arrow_type();
|
||||
|
||||
void unify(type_ptr l, type_ptr r);
|
||||
type_ptr resolve(type_ptr t, type_var*& var);
|
||||
type_ptr resolve(type_ptr t, type_var*& var) const;
|
||||
void bind(const std::string& s, type_ptr t);
|
||||
};
|
||||
|
|
|
@ -144,7 +144,7 @@ Now, it's time for compiling the whole case expression. We first want
|
|||
to construct the graph for the expression we want to perform case analysis on.
|
||||
Next, we want to evaluate it (since we need a packed value, not a graph,
|
||||
to read the tag). Finally, we perform a jump depending on the tag. This
|
||||
is capture by the following rule:
|
||||
is captured by the following rule:
|
||||
|
||||
$$
|
||||
\\mathcal{C} ⟦\\text{case} \\; e \\; \\text{of} \\; \\text{alt}_1 ... \\text{alt}_n⟧ \\; \\rho =
|
||||
|
@ -196,7 +196,37 @@ And here's the source file:
|
|||
|
||||
{{< codeblock "C++" "compiler/06/env.cpp" >}}
|
||||
|
||||
{{< todo >}}Explain the code drops. {{< /todo >}}
|
||||
There's not that much to see here, but let's go through it anyway.
|
||||
We define an environment as a linked list, kind of like
|
||||
we did with the type environment. This time, though,
|
||||
we use shared pointers instead of raw pointers to reference the parent.
|
||||
I decided on this because we will need to be using virtual methods
|
||||
(since we have two subclasses of `env`), and thus will need to
|
||||
be passing the `env` by pointer. At that point, we might as well
|
||||
use the "proper" way!
|
||||
|
||||
I implemented the environment as a linked list because it is, in essence,
|
||||
a stack. However, not every "offset" in a stack is introduced by
|
||||
binding variables - for instance, when we create an application node,
|
||||
we first build the argument value on the stack, and then,
|
||||
with that value still on the stack, build the left hand side of the application.
|
||||
Thus, all the variable positions are offset by the presence of the argument
|
||||
on the stack, and we must account for that. Similarly, in cases when we will
|
||||
allocate space on the stack (we will run into these cases later), we will
|
||||
need to account for that change. Thus, since we can increment
|
||||
the offset by two ways (binding a variable and building something on the stack),
|
||||
we allow for two types of nodes in our `env` stack.
|
||||
|
||||
During recursion we will be tweaking the return value of `get_offset` to
|
||||
calculate the final location of a variable on the stack (if the
|
||||
parent of a node returned offset `1`, but the node itself is a variable
|
||||
node and thus introduces another offset, we need to return `2`). Because
|
||||
of this, we cannot reasonably return a constant like `-1` (it will quickly
|
||||
be made positive on a long list), and thus we throw an exception. To
|
||||
allow for a safe way to check for an offset, without try-catch,
|
||||
we also add a `has_variable` method which checks if the lookup will succeed.
|
||||
A better approach would be to use `std::optional`, but it's C++17, so
|
||||
we'll shy away from it.
|
||||
|
||||
It will also help to move some of the functions on the `binop` enum
|
||||
into a separate file. The new neader is pretty small:
|
||||
|
@ -207,4 +237,166 @@ The new source file is not much longer:
|
|||
|
||||
{{< codeblock "C++" "compiler/06/binop.cpp" >}}
|
||||
|
||||
And now, we begin our implementation.
|
||||
And now, we begin our implementation. Let's start with the easy ones:
|
||||
`ast_int`, `ast_lid` and `ast_uid`. The code for `ast_int` involves just pushing
|
||||
the integer into the stack:
|
||||
|
||||
{{< codelines "C++" "compiler/06/ast.cpp" 36 38 >}}
|
||||
|
||||
The code for `ast_lid` needs to check if the variable is global or local,
|
||||
just like we discussed:
|
||||
|
||||
{{< codelines "C++" "compiler/06/ast.cpp" 53 58 >}}
|
||||
|
||||
We do not have to do this for `ast_uid`:
|
||||
|
||||
{{< codelines "C++" "compiler/06/ast.cpp" 73 75 >}}
|
||||
|
||||
On to `ast_binop`! This is the first time we have to change our environment.
|
||||
As we said earlier, once we build the right operand on the stack, every offset that we counted
|
||||
from the top of the stack will have been shifted by 1 (we see this
|
||||
in our compilation scheme for function application). So,
|
||||
we create a new environment with `env_offset`, and use that
|
||||
when we compile the left child:
|
||||
|
||||
{{< codelines "C++" "compiler/06/ast.cpp" 103 110 >}}
|
||||
|
||||
`ast_binop` performs two applications: `(+) lhs rhs`.
|
||||
We push `rhs`, then `lhs`, then `(+)`, and then use MkApp
|
||||
twice. In `ast_app`, we only need to perform one application,
|
||||
`lhs rhs`:
|
||||
|
||||
{{< codelines "C++" "compiler/06/ast.cpp" 134 138 >}}
|
||||
|
||||
Note that we also extend our environment in this one,
|
||||
for the exact same reason as before.
|
||||
|
||||
Case expressions are the only thing left on the agenda. This
|
||||
is the time during which we have to perform desugaring. Here,
|
||||
though, we run into an issue: we don't have tags assigned to constructors!
|
||||
We need to adjust our code to keep track of the tags of the various
|
||||
constructors of a type. To do this, we add a subclass for the `type_base`
|
||||
struct, called `type_data`:
|
||||
|
||||
{{< codelines "C++" "compiler/06/type.hpp" 33 42 >}}
|
||||
|
||||
When we create types from `definition_data`, we tag the corresponding constructors:
|
||||
|
||||
{{< codelines "C++" "compiler/06/definition.cpp" 35 51 >}}
|
||||
|
||||
Ah, but adding constructor info to the type doesn't solve the problem.
|
||||
Once we performed type checking, we don't keep
|
||||
the types that we computed for an AST node, in the node. And obviously, we don't want
|
||||
to go looking for them again. Furthermore, we can't just look up a constructor
|
||||
in the environment, since we can well have patterns that don't have __any__ constructors:
|
||||
|
||||
```
|
||||
match l {
|
||||
l -> { 0 }
|
||||
}
|
||||
```
|
||||
|
||||
So, we want each `ast` node to store its type (well, in practice we only need this for
|
||||
`ast_case`, but we might as well store it for all nodes). We can add it, no problem.
|
||||
To add to that, we can add another, non-virtual `typecheck` method (let's call it `typecheck_common`,
|
||||
since naming is hard). This method will call `typecheck`, and store the output into
|
||||
the `node_type` field.
|
||||
|
||||
The signature is identical to `typecheck`, except it's neither virtual nor const:
|
||||
```
|
||||
type_ptr typecheck_common(type_mgr& mgr, const type_env& env);
|
||||
```
|
||||
|
||||
And the implementation is as simple as you think:
|
||||
|
||||
{{< codelines "C++" "compiler/06/ast.cpp" 9 12 >}}
|
||||
|
||||
In client code (`definition_defn::typecheck_first` for instance), we should now
|
||||
use `typecheck_common` instead of `typecheck`. With that done, we're almost there.
|
||||
However, we're still missing something: most likely, the initial type assigned to any
|
||||
node is a `type_var`, or a type variable. In this case, `type_var` __needs__ the information
|
||||
from `type_mgr`, which we will not be keeping around. Besides, it's cleaner to keep the actual type
|
||||
as a member of the node, not a variable type that references it. In order
|
||||
to address this, we write two conversion functions that call `resolve` on all
|
||||
types in an AST, given a type manager. After this is done, the type manager can be thrown away.
|
||||
The signatures of the functions are as follows:
|
||||
|
||||
```
|
||||
void resolve_common(const type_mgr& mgr);
|
||||
virtual void resolve(const type_mgr& mgr) const = 0;
|
||||
```
|
||||
|
||||
We also add the `resolve` method to `definition`, so that we can call it
|
||||
without having to run `dynamic_cast`. The implementation for `ast::resolve_common`
|
||||
just resolves the type:
|
||||
|
||||
{{< codelines "C++" "compiler/06/ast.cpp" 14 21 >}}
|
||||
|
||||
The virtual `ast::resolve` just calls `ast::resolve_common` on an all `ast` children
|
||||
of a node. Here's a sample implementation from `ast_binop`:
|
||||
|
||||
{{< codelines "C++" "compiler/06/ast.cpp" 98 101 >}}
|
||||
|
||||
And here's the implementation of `definition::resolve` on `definition_defn`:
|
||||
|
||||
{{< codelines "C++" "compiler/06/definition.cpp" 31 33 >}}
|
||||
|
||||
Finally, we call `resolve` at the end `typecheck_program` in `main.cpp`:
|
||||
|
||||
{{< codelines "C++" "compiler/06/main.cpp" 40 42 >}}
|
||||
|
||||
At last, we're ready to implement the code for compiling `ast_case`.
|
||||
Here it is, in all its glory:
|
||||
|
||||
{{< codelines "C++" "compiler/06/ast.cpp" 178 224 >}}
|
||||
|
||||
There's a lot to unpack here. First of all, just like we said in the compilation
|
||||
scheme, we want to build and evaluate the expression that's being analyzed.
|
||||
Once that's done, however, things get more tricky. We know that each
|
||||
branch of a case expression will correspond to a vector of instructions -
|
||||
in fact, our jump instruction contains a mapping from tags to instructions.
|
||||
As we also discussed above, each list of instructions can be mapped to
|
||||
by multiple tags. We don't want to recompile the same sequence of instructions
|
||||
multiple times (or indeed, generate machine code for it). So, we keep
|
||||
a mapping of tags to their corresponding sequences of instructions. We implement
|
||||
this by having a vector of vectors of instructions (in which each inner vector
|
||||
represents the code for a branch), and a map of tag number to index
|
||||
in the vector containing all the branches. This way, multiple tags
|
||||
can point to the same instruction set without duplicating information.
|
||||
|
||||
We also don't allow a tag to be mapped to more than one sequence of instructions.
|
||||
This is handled differently depending on whether a variable pattern or a
|
||||
constructor pattern are encountered. Variable patterns map all
|
||||
tags that haven't been mapped yet, so no error can occur. Constructor patterns,
|
||||
though, can explicitly try to map the same tag twice, and we don't want that.
|
||||
|
||||
I implied in the previous paragraph the implementation of our case expression
|
||||
compilation algorithm, but let's go through it. Once we've compiled
|
||||
the expression to be analyzed, and evaluated it (just like in our definitions
|
||||
above), we proceed to look at all the branches specified in the case expression.
|
||||
|
||||
If a branch has a variable pattern, we must map to the result of the compilation
|
||||
all the remaining, unmapped tags. We also aren't going to be taking apart
|
||||
our value, so we don't need to use Split, but we do need to add 1 to the
|
||||
environment offset to account the the presence of that value. So,
|
||||
we compile the branch body with that offset, and iterate through
|
||||
all the constructors of our data type. We skip a constructor
|
||||
if it's been mapped, and if it hasn't been, we map it to the index
|
||||
that this branch body will have in our list. Finally,
|
||||
we push the newly compiled instruction sequence into the list of branch
|
||||
bodies.
|
||||
|
||||
If a branch is a constructor pattern, on the other hand, we lead our compilation
|
||||
output with a Split. This takes off the value from the stack, but pushes on
|
||||
all the parameters of the constructor. We account for this by incrementing the
|
||||
environment with the offset given by the number of arguments (just like we did
|
||||
in our definitions of our compilation scheme). Before we map the tag,
|
||||
we ensure that it hasn't already been mapped (and throw an exception, currently
|
||||
in the form of a type error due to the growing length of this post),
|
||||
and finally map it and insert the new branch code into the list of branches.
|
||||
|
||||
After we're done with all the branches, we also check for non-exhaustive patterns,
|
||||
since otherwise we could run into runtime errors. With this, the case expression,
|
||||
an the last of the AST nodes, can be compiled.
|
||||
|
||||
{{< todo >}}Backport bugfix in case's typecheck{{< /todo >}}
|
||||
|
|
Loading…
Reference in New Issue
Block a user