Write explanations of AST refactor in compiler series
This commit is contained in:
parent
d3d73e0e9c
commit
7e9bd95846
code/compiler/06
content/blog
@ -6,6 +6,20 @@ void print_indent(int n, std::ostream& to) {
|
||||
while(n--) to << " ";
|
||||
}
|
||||
|
||||
type_ptr ast::typecheck_common(type_mgr& mgr, const type_env& env) {
|
||||
node_type = typecheck(mgr, env);
|
||||
return node_type;
|
||||
}
|
||||
|
||||
void ast::resolve_common(const type_mgr& mgr) {
|
||||
type_var* var;
|
||||
type_ptr resolved_type = mgr.resolve(node_type, var);
|
||||
if(var) throw type_error("ambiguously typed program");
|
||||
|
||||
resolve(mgr);
|
||||
node_type = std::move(resolved_type);
|
||||
}
|
||||
|
||||
void ast_int::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "INT: " << value << std::endl;
|
||||
@ -15,6 +29,10 @@ type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||
return type_ptr(new type_base("Int"));
|
||||
}
|
||||
|
||||
void ast_int::resolve(const type_mgr& mgr) const {
|
||||
|
||||
}
|
||||
|
||||
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
into.push_back(instruction_ptr(new instruction_pushint(value)));
|
||||
}
|
||||
@ -28,6 +46,10 @@ type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||
return env.lookup(id);
|
||||
}
|
||||
|
||||
void ast_lid::resolve(const type_mgr& mgr) const {
|
||||
|
||||
}
|
||||
|
||||
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
into.push_back(instruction_ptr(
|
||||
env->has_variable(id) ?
|
||||
@ -44,6 +66,10 @@ type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||
return env.lookup(id);
|
||||
}
|
||||
|
||||
void ast_uid::resolve(const type_mgr& mgr) const {
|
||||
|
||||
}
|
||||
|
||||
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
|
||||
}
|
||||
@ -56,8 +82,8 @@ void ast_binop::print(int indent, std::ostream& to) const {
|
||||
}
|
||||
|
||||
type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||
type_ptr ltype = left->typecheck(mgr, env);
|
||||
type_ptr rtype = right->typecheck(mgr, env);
|
||||
type_ptr ltype = left->typecheck_common(mgr, env);
|
||||
type_ptr rtype = right->typecheck_common(mgr, env);
|
||||
type_ptr ftype = env.lookup(op_name(op));
|
||||
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
|
||||
|
||||
@ -69,6 +95,11 @@ type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||
return return_type;
|
||||
}
|
||||
|
||||
void ast_binop::resolve(const type_mgr& mgr) const {
|
||||
left->resolve_common(mgr);
|
||||
right->resolve_common(mgr);
|
||||
}
|
||||
|
||||
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
right->compile(env, into);
|
||||
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||
@ -86,8 +117,8 @@ void ast_app::print(int indent, std::ostream& to) const {
|
||||
}
|
||||
|
||||
type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||
type_ptr ltype = left->typecheck(mgr, env);
|
||||
type_ptr rtype = right->typecheck(mgr, env);
|
||||
type_ptr ltype = left->typecheck_common(mgr, env);
|
||||
type_ptr rtype = right->typecheck_common(mgr, env);
|
||||
|
||||
type_ptr return_type = mgr.new_type();
|
||||
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
|
||||
@ -95,6 +126,11 @@ type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||
return return_type;
|
||||
}
|
||||
|
||||
void ast_app::resolve(const type_mgr& mgr) const {
|
||||
left->resolve_common(mgr);
|
||||
right->resolve_common(mgr);
|
||||
}
|
||||
|
||||
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
right->compile(env, into);
|
||||
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||
@ -114,23 +150,31 @@ void ast_case::print(int indent, std::ostream& to) const {
|
||||
|
||||
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||
type_var* var;
|
||||
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var);
|
||||
type_ptr case_type = mgr.resolve(of->typecheck_common(mgr, env), var);
|
||||
type_ptr branch_type = mgr.new_type();
|
||||
|
||||
if(!dynamic_cast<type_base*>(case_type.get())) {
|
||||
throw type_error("attempting case analysis of non-data type");
|
||||
}
|
||||
|
||||
for(auto& branch : branches) {
|
||||
type_env new_env = env.scope();
|
||||
branch->pat->match(case_type, mgr, new_env);
|
||||
type_ptr curr_branch_type = branch->expr->typecheck(mgr, new_env);
|
||||
type_ptr curr_branch_type = branch->expr->typecheck_common(mgr, new_env);
|
||||
mgr.unify(branch_type, curr_branch_type);
|
||||
}
|
||||
|
||||
case_type = mgr.resolve(case_type, var);
|
||||
if(!dynamic_cast<type_data*>(case_type.get())) {
|
||||
throw type_error("attempting case analysis of non-data type");
|
||||
}
|
||||
|
||||
return branch_type;
|
||||
}
|
||||
|
||||
void ast_case::resolve(const type_mgr& mgr) const {
|
||||
of->resolve_common(mgr);
|
||||
for(auto& branch : branches) {
|
||||
branch->expr->resolve_common(mgr);
|
||||
}
|
||||
}
|
||||
|
||||
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
|
||||
}
|
||||
|
@ -8,12 +8,18 @@
|
||||
#include "env.hpp"
|
||||
|
||||
struct ast {
|
||||
type_ptr node_type;
|
||||
|
||||
virtual ~ast() = default;
|
||||
|
||||
virtual void print(int indent, std::ostream& to) const = 0;
|
||||
virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const = 0;
|
||||
virtual void resolve(const type_mgr& mgr) const = 0;
|
||||
virtual void compile(const env_ptr& env,
|
||||
std::vector<instruction_ptr>& into) const = 0;
|
||||
|
||||
type_ptr typecheck_common(type_mgr& mgr, const type_env& env);
|
||||
void resolve_common(const type_mgr& mgr);
|
||||
};
|
||||
|
||||
using ast_ptr = std::unique_ptr<ast>;
|
||||
@ -52,6 +58,7 @@ struct definition {
|
||||
|
||||
virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0;
|
||||
virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0;
|
||||
virtual void resolve(const type_mgr& mgr) const = 0;
|
||||
};
|
||||
|
||||
using definition_ptr = std::unique_ptr<definition>;
|
||||
@ -64,6 +71,7 @@ struct ast_int : public ast {
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||
void resolve(const type_mgr& mgr) const;
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
@ -75,6 +83,7 @@ struct ast_lid : public ast {
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||
void resolve(const type_mgr& mgr) const;
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
@ -86,6 +95,7 @@ struct ast_uid : public ast {
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||
void resolve(const type_mgr& mgr) const;
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
@ -99,6 +109,7 @@ struct ast_binop : public ast {
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||
void resolve(const type_mgr& mgr) const;
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
@ -111,6 +122,7 @@ struct ast_app : public ast {
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||
void resolve(const type_mgr& mgr) const;
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
@ -123,6 +135,7 @@ struct ast_case : public ast {
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||
void resolve(const type_mgr& mgr) const;
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
@ -162,6 +175,7 @@ struct definition_defn : public definition {
|
||||
|
||||
void typecheck_first(type_mgr& mgr, type_env& env);
|
||||
void typecheck_second(type_mgr& mgr, const type_env& env) const;
|
||||
void resolve(const type_mgr& mgr) const;
|
||||
};
|
||||
|
||||
struct definition_data : public definition {
|
||||
@ -173,4 +187,5 @@ struct definition_data : public definition {
|
||||
|
||||
void typecheck_first(type_mgr& mgr, type_env& env);
|
||||
void typecheck_second(type_mgr& mgr, const type_env& env) const;
|
||||
void resolve(const type_mgr& mgr) const;
|
||||
};
|
||||
|
@ -24,16 +24,23 @@ void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const
|
||||
type_it++;
|
||||
}
|
||||
|
||||
type_ptr body_type = body->typecheck(mgr, new_env);
|
||||
type_ptr body_type = body->typecheck_common(mgr, new_env);
|
||||
mgr.unify(return_type, body_type);
|
||||
}
|
||||
|
||||
void definition_defn::resolve(const type_mgr& mgr) const {
|
||||
body->resolve_common(mgr);
|
||||
}
|
||||
|
||||
void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
|
||||
type_ptr return_type = type_ptr(new type_base(name));
|
||||
type_data* this_type = new type_data(name);
|
||||
type_ptr return_type = type_ptr(this_type);
|
||||
int next_tag = 0;
|
||||
|
||||
for(auto& constructor : constructors) {
|
||||
type_ptr full_type = return_type;
|
||||
this_type->constructors[constructor->name] = { next_tag++ };
|
||||
|
||||
type_ptr full_type = return_type;
|
||||
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
|
||||
type_ptr type = type_ptr(new type_base(*it));
|
||||
full_type = type_ptr(new type_arr(type, full_type));
|
||||
@ -46,3 +53,8 @@ void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
|
||||
void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const {
|
||||
// Nothing
|
||||
}
|
||||
|
||||
void definition_data::resolve(const type_mgr& mgr) const {
|
||||
// Nothing
|
||||
}
|
||||
|
||||
|
@ -36,6 +36,10 @@ void typecheck_program(
|
||||
pair.second->print(mgr, std::cout);
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
for(auto& def : prog) {
|
||||
def->resolve(mgr);
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
|
@ -44,7 +44,7 @@ type_ptr type_mgr::new_arrow_type() {
|
||||
return type_ptr(new type_arr(new_type(), new_type()));
|
||||
}
|
||||
|
||||
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) {
|
||||
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
|
||||
type_var* cast;
|
||||
|
||||
var = nullptr;
|
||||
|
@ -30,6 +30,17 @@ struct type_base : public type {
|
||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||
};
|
||||
|
||||
struct type_data : public type_base {
|
||||
struct constructor {
|
||||
int tag;
|
||||
};
|
||||
|
||||
std::map<std::string, constructor> constructors;
|
||||
|
||||
type_data(std::string n)
|
||||
: type_base(std::move(n)) {}
|
||||
};
|
||||
|
||||
struct type_arr : public type {
|
||||
type_ptr left;
|
||||
type_ptr right;
|
||||
@ -49,6 +60,6 @@ struct type_mgr {
|
||||
type_ptr new_arrow_type();
|
||||
|
||||
void unify(type_ptr l, type_ptr r);
|
||||
type_ptr resolve(type_ptr t, type_var*& var);
|
||||
type_ptr resolve(type_ptr t, type_var*& var) const;
|
||||
void bind(const std::string& s, type_ptr t);
|
||||
};
|
||||
|
@ -253,7 +253,7 @@ We do not have to do this for `ast_uid`:
|
||||
{{< codelines "C++" "compiler/06/ast.cpp" 47 49 >}}
|
||||
|
||||
On to `ast_binop`! This is the first time we have to change our environment.
|
||||
Once we build the right operand on the stack, every offset that we counted
|
||||
As we said earlier, once we build the right operand on the stack, every offset that we counted
|
||||
from the top of the stack will have been shifted by 1 (we see this
|
||||
in our compilation scheme for function application). So,
|
||||
we create a new environment with `env_offset`, and use that
|
||||
@ -274,3 +274,81 @@ for the exact same reason as before.
|
||||
Case expressions are the only thing left on the agenda. This
|
||||
is the time during which we have to perform desugaring. Here,
|
||||
though, we run into an issue: we don't have tags assigned to constructors!
|
||||
We need to adjust our code to keep track of the tags of the various
|
||||
constructors of a type. To do this, we add a subclass for the `type_base`
|
||||
struct, called `type_data`:
|
||||
|
||||
{{< todo >}}Link code{{< /todo >}}
|
||||
|
||||
When we create types from `definition_data`, we tag the corresponding constructors:
|
||||
|
||||
{{< todo >}}Link code{{< /todo >}}
|
||||
|
||||
Ah, but that doesn't solve the problem. Once we performed type checking, we don't keep
|
||||
the types that we computed for an AST node in the node. And obviously, we don't want
|
||||
to go looking for them again. Furthermore, we can't just look up a constructor
|
||||
in the environment, since we can well have patterns that don't have __any__ constructors:
|
||||
|
||||
```
|
||||
match l {
|
||||
l -> { 0 }
|
||||
}
|
||||
```
|
||||
|
||||
So, we want each `ast` node to store its type (well, in practice we only need this for
|
||||
`ast_case`, but we might as well store it for all nodes). We can add it, no problem:
|
||||
|
||||
{{< todo >}}Link code{{< /todo >}}
|
||||
|
||||
Now, we can add another, non-virtual `typecheck` method (let's call it `typecheck_common`,
|
||||
since naming is hard). This method will call `typecheck`, and store the output into
|
||||
the `node_type` field.
|
||||
|
||||
The signature is identical to `typecheck`, except it's neither virtual nor const:
|
||||
```
|
||||
type_ptr typecheck_common(type_mgr& mgr, const type_env& env);
|
||||
```
|
||||
|
||||
And the implementation is as simple as you think:
|
||||
|
||||
{{< todo >}}Link code{{< /todo >}}
|
||||
|
||||
In client code (`definition_defn::typecheck_first` for instance), we should now
|
||||
use `typecheck_common` instead of `typecheck`. With that done, we're almost there.
|
||||
However, we're still missing something: most likely, the initial type assigned to any
|
||||
node is a `type_var`, or a type variable. In this case, `type_var` __needs__ the information
|
||||
from `type_mgr`, which we will not be keeping around. Besides, it's cleaner to keep the actual type
|
||||
as a member of the node, not a variable type that references it. In order
|
||||
to address this, we write two conversion functions that call `resolve` on all
|
||||
types in an AST, given a type manager. After this is done, the type manager can be thrown away.
|
||||
The signatures of the functions are as follows:
|
||||
|
||||
```
|
||||
void resolve_common(const type_mgr& mgr);
|
||||
virtual void resolve(const type_mgr& mgr) const = 0;
|
||||
```
|
||||
|
||||
We also add the `resolve` method to `definition`, so that we can call it
|
||||
without having to run `dynamic_cast`. The implementation for `resolve_common`
|
||||
just resolves the type:
|
||||
|
||||
{{< todo >}}Link code{{< /todo >}}
|
||||
|
||||
The virtual `resolve` just calls `resolve_common` on an all `ast` children
|
||||
of a node. Here's a sample implementation from `ast_binop`:
|
||||
|
||||
{{< todo >}}Link code{{< /todo >}}
|
||||
|
||||
And here's the implementation of `resolve` on `definition_defn`:
|
||||
|
||||
{{< todo >}}Link code{{< /todo >}}
|
||||
|
||||
Finally, we call `resolve` from inside `typecheck_program` in `main.cpp`:
|
||||
|
||||
{{< todo >}}Link code{{< /todo >}}
|
||||
|
||||
Finally, we're ready to implement the code for compiling `ast_case`.
|
||||
|
||||
{{< todo >}}Figure out how to keep all trees not requiring a type manager. {{< /todo >}}
|
||||
|
||||
{{< todo >}}Backport bugfix in case's typecheck{{< /todo >}}
|
||||
|
Loading…
Reference in New Issue
Block a user