Compare commits

...

3 Commits

9 changed files with 358 additions and 28 deletions

View File

@ -6,6 +6,20 @@ void print_indent(int n, std::ostream& to) {
while(n--) to << " ";
}
type_ptr ast::typecheck_common(type_mgr& mgr, const type_env& env) {
node_type = typecheck(mgr, env);
return node_type;
}
void ast::resolve_common(const type_mgr& mgr) {
type_var* var;
type_ptr resolved_type = mgr.resolve(node_type, var);
if(var) throw type_error("ambiguously typed program");
resolve(mgr);
node_type = std::move(resolved_type);
}
void ast_int::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "INT: " << value << std::endl;
@ -15,6 +29,10 @@ type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) const {
return type_ptr(new type_base("Int"));
}
void ast_int::resolve(const type_mgr& mgr) const {
}
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(new instruction_pushint(value)));
}
@ -28,8 +46,15 @@ type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) const {
return env.lookup(id);
}
void ast_lid::resolve(const type_mgr& mgr) const {
}
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
into.push_back(instruction_ptr(
env->has_variable(id) ?
(instruction*) new instruction_push(env->get_offset(id)) :
(instruction*) new instruction_pushglobal(id)));
}
void ast_uid::print(int indent, std::ostream& to) const {
@ -41,11 +66,12 @@ type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) const {
return env.lookup(id);
}
void ast_uid::resolve(const type_mgr& mgr) const {
}
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(
env->has_variable(id) ?
(instruction*) new instruction_push(env->get_offset(id)) :
(instruction*) new instruction_pushglobal(id)));
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
}
void ast_binop::print(int indent, std::ostream& to) const {
@ -56,8 +82,8 @@ void ast_binop::print(int indent, std::ostream& to) const {
}
type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr ltype = left->typecheck(mgr, env);
type_ptr rtype = right->typecheck(mgr, env);
type_ptr ltype = left->typecheck_common(mgr, env);
type_ptr rtype = right->typecheck_common(mgr, env);
type_ptr ftype = env.lookup(op_name(op));
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
@ -69,9 +95,15 @@ type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
return return_type;
}
void ast_binop::resolve(const type_mgr& mgr) const {
left->resolve_common(mgr);
right->resolve_common(mgr);
}
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
left->compile(env, into);
right->compile(env, into);
left->compile(env_ptr(new env_offset(1, env)), into);
into.push_back(instruction_ptr(new instruction_pushglobal(op_name(op))));
into.push_back(instruction_ptr(new instruction_mkapp()));
into.push_back(instruction_ptr(new instruction_mkapp()));
@ -85,8 +117,8 @@ void ast_app::print(int indent, std::ostream& to) const {
}
type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr ltype = left->typecheck(mgr, env);
type_ptr rtype = right->typecheck(mgr, env);
type_ptr ltype = left->typecheck_common(mgr, env);
type_ptr rtype = right->typecheck_common(mgr, env);
type_ptr return_type = mgr.new_type();
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
@ -94,9 +126,14 @@ type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
return return_type;
}
void ast_app::resolve(const type_mgr& mgr) const {
left->resolve_common(mgr);
right->resolve_common(mgr);
}
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
left->compile(env, into);
right->compile(env, into);
left->compile(env_ptr(new env_offset(1, env)), into);
into.push_back(instruction_ptr(new instruction_mkapp()));
}
@ -113,25 +150,77 @@ void ast_case::print(int indent, std::ostream& to) const {
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
type_var* var;
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var);
type_ptr case_type = mgr.resolve(of->typecheck_common(mgr, env), var);
type_ptr branch_type = mgr.new_type();
if(!dynamic_cast<type_base*>(case_type.get())) {
throw type_error("attempting case analysis of non-data type");
}
for(auto& branch : branches) {
type_env new_env = env.scope();
branch->pat->match(case_type, mgr, new_env);
type_ptr curr_branch_type = branch->expr->typecheck(mgr, new_env);
type_ptr curr_branch_type = branch->expr->typecheck_common(mgr, new_env);
mgr.unify(branch_type, curr_branch_type);
}
case_type = mgr.resolve(case_type, var);
if(!dynamic_cast<type_data*>(case_type.get())) {
throw type_error("attempting case analysis of non-data type");
}
return branch_type;
}
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
void ast_case::resolve(const type_mgr& mgr) const {
of->resolve_common(mgr);
for(auto& branch : branches) {
branch->expr->resolve_common(mgr);
}
}
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
type_data* type = dynamic_cast<type_data*>(node_type.get());
of->compile(env, into);
into.push_back(instruction_ptr(new instruction_eval()));
instruction_jump* jump_instruction = new instruction_jump();
into.push_back(instruction_ptr(jump_instruction));
for(auto& branch : branches) {
std::vector<instruction_ptr> branch_instructions;
pattern_var* vpat;
pattern_constr* cpat;
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions);
for(auto& constr_pair : type->constructors) {
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) !=
jump_instruction->tag_mappings.end())
break;
jump_instruction->tag_mappings[constr_pair.second.tag] =
jump_instruction->branches.size();
}
jump_instruction->branches.push_back(std::move(branch_instructions));
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
branch_instructions.push_back(instruction_ptr(new instruction_split()));
branch->expr->compile(env_ptr(new env_offset(cpat->params.size(), env)),
branch_instructions);
int new_tag = type->constructors[cpat->constr].tag;
if(jump_instruction->tag_mappings.find(new_tag) !=
jump_instruction->tag_mappings.end())
throw type_error("technically not a type error: duplicate pattern");
jump_instruction->tag_mappings[new_tag] =
jump_instruction->branches.size();
jump_instruction->branches.push_back(std::move(branch_instructions));
}
}
for(auto& constr_pair : type->constructors) {
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) ==
jump_instruction->tag_mappings.end())
throw type_error("non-total pattern");
}
}
void pattern_var::print(std::ostream& to) const {

View File

@ -8,12 +8,18 @@
#include "env.hpp"
struct ast {
type_ptr node_type;
virtual ~ast() = default;
virtual void print(int indent, std::ostream& to) const = 0;
virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const = 0;
virtual void resolve(const type_mgr& mgr) const = 0;
virtual void compile(const env_ptr& env,
std::vector<instruction_ptr>& into) const = 0;
type_ptr typecheck_common(type_mgr& mgr, const type_env& env);
void resolve_common(const type_mgr& mgr);
};
using ast_ptr = std::unique_ptr<ast>;
@ -52,6 +58,7 @@ struct definition {
virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0;
virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0;
virtual void resolve(const type_mgr& mgr) const = 0;
};
using definition_ptr = std::unique_ptr<definition>;
@ -64,6 +71,7 @@ struct ast_int : public ast {
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
@ -75,6 +83,7 @@ struct ast_lid : public ast {
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
@ -86,6 +95,7 @@ struct ast_uid : public ast {
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
@ -99,6 +109,7 @@ struct ast_binop : public ast {
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
@ -111,6 +122,7 @@ struct ast_app : public ast {
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
@ -123,6 +135,7 @@ struct ast_case : public ast {
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
@ -162,6 +175,7 @@ struct definition_defn : public definition {
void typecheck_first(type_mgr& mgr, type_env& env);
void typecheck_second(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
};
struct definition_data : public definition {
@ -173,4 +187,5 @@ struct definition_data : public definition {
void typecheck_first(type_mgr& mgr, type_env& env);
void typecheck_second(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
};

View File

@ -24,16 +24,23 @@ void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const
type_it++;
}
type_ptr body_type = body->typecheck(mgr, new_env);
type_ptr body_type = body->typecheck_common(mgr, new_env);
mgr.unify(return_type, body_type);
}
void definition_defn::resolve(const type_mgr& mgr) const {
body->resolve_common(mgr);
}
void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
type_ptr return_type = type_ptr(new type_base(name));
type_data* this_type = new type_data(name);
type_ptr return_type = type_ptr(this_type);
int next_tag = 0;
for(auto& constructor : constructors) {
type_ptr full_type = return_type;
this_type->constructors[constructor->name] = { next_tag++ };
type_ptr full_type = return_type;
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
type_ptr type = type_ptr(new type_base(*it));
full_type = type_ptr(new type_arr(type, full_type));
@ -46,3 +53,8 @@ void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const {
// Nothing
}
void definition_data::resolve(const type_mgr& mgr) const {
// Nothing
}

View File

@ -11,7 +11,7 @@ struct env {
using env_ptr = std::shared_ptr<env>;
struct env_var {
struct env_var : public env {
std::string name;
env_ptr parent;
@ -22,7 +22,7 @@ struct env_var {
bool has_variable(const std::string& name) const;
};
struct env_offset {
struct env_offset : public env {
int offset;
env_ptr parent;

View File

@ -2,6 +2,8 @@
#include <string>
#include <memory>
#include "binop.hpp"
#include <vector>
#include <map>
struct instruction {
virtual ~instruction() = default;
@ -53,6 +55,11 @@ struct instruction_split : public instruction {
};
struct instruction_jump : public instruction {
std::vector<std::vector<instruction_ptr>> branches;
std::map<int, int> tag_mappings;
};
struct instruction_slide : public instruction {
int offset;

View File

@ -36,6 +36,10 @@ void typecheck_program(
pair.second->print(mgr, std::cout);
std::cout << std::endl;
}
for(auto& def : prog) {
def->resolve(mgr);
}
}
int main() {

View File

@ -44,7 +44,7 @@ type_ptr type_mgr::new_arrow_type() {
return type_ptr(new type_arr(new_type(), new_type()));
}
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) {
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
type_var* cast;
var = nullptr;

View File

@ -30,6 +30,17 @@ struct type_base : public type {
void print(const type_mgr& mgr, std::ostream& to) const;
};
struct type_data : public type_base {
struct constructor {
int tag;
};
std::map<std::string, constructor> constructors;
type_data(std::string n)
: type_base(std::move(n)) {}
};
struct type_arr : public type {
type_ptr left;
type_ptr right;
@ -49,6 +60,6 @@ struct type_mgr {
type_ptr new_arrow_type();
void unify(type_ptr l, type_ptr r);
type_ptr resolve(type_ptr t, type_var*& var);
type_ptr resolve(type_ptr t, type_var*& var) const;
void bind(const std::string& s, type_ptr t);
};

View File

@ -144,7 +144,7 @@ Now, it's time for compiling the whole case expression. We first want
to construct the graph for the expression we want to perform case analysis on.
Next, we want to evaluate it (since we need a packed value, not a graph,
to read the tag). Finally, we perform a jump depending on the tag. This
is capture by the following rule:
is captured by the following rule:
$$
\\mathcal{C} ⟦\\text{case} \\; e \\; \\text{of} \\; \\text{alt}_1 ... \\text{alt}_n⟧ \\; \\rho =
@ -196,7 +196,37 @@ And here's the source file:
{{< codeblock "C++" "compiler/06/env.cpp" >}}
{{< todo >}}Explain the code drops. {{< /todo >}}
There's not that much to see here, but let's go through it anyway.
We define an environment as a linked list, kind of like
we did with the type environment. This time, though,
we use shared pointers instead of raw pointers to reference the parent.
I decided on this because we will need to be using virtual methods
(since we have two subclasses of `env`), and thus will need to
be passing the `env` by pointer. At that point, we might as well
use the "proper" way!
I implemented the environment as a linked list because it is, in essence,
a stack. However, not every "offset" in a stack is introduced by
binding variables - for instance, when we create an application node,
we first build the argument value on the stack, and then,
with that value still on the stack, build the left hand side of the application.
Thus, all the variable positions are offset by the presence of the argument
on the stack, and we must account for that. Similarly, in cases when we will
allocate space on the stack (we will run into these cases later), we will
need to account for that change. Thus, since we can increment
the offset by two ways (binding a variable and building something on the stack),
we allow for two types of nodes in our `env` stack.
During recursion we will be tweaking the return value of `get_offset` to
calculate the final location of a variable on the stack (if the
parent of a node returned offset `1`, but the node itself is a variable
node and thus introduces another offset, we need to return `2`). Because
of this, we cannot reasonably return a constant like `-1` (it will quickly
be made positive on a long list), and thus we throw an exception. To
allow for a safe way to check for an offset, without try-catch,
we also add a `has_variable` method which checks if the lookup will succeed.
A better approach would be to use `std::optional`, but it's C++17, so
we'll shy away from it.
It will also help to move some of the functions on the `binop` enum
into a separate file. The new neader is pretty small:
@ -207,4 +237,166 @@ The new source file is not much longer:
{{< codeblock "C++" "compiler/06/binop.cpp" >}}
And now, we begin our implementation.
And now, we begin our implementation. Let's start with the easy ones:
`ast_int`, `ast_lid` and `ast_uid`. The code for `ast_int` involves just pushing
the integer into the stack:
{{< codelines "C++" "compiler/06/ast.cpp" 36 38 >}}
The code for `ast_lid` needs to check if the variable is global or local,
just like we discussed:
{{< codelines "C++" "compiler/06/ast.cpp" 53 58 >}}
We do not have to do this for `ast_uid`:
{{< codelines "C++" "compiler/06/ast.cpp" 73 75 >}}
On to `ast_binop`! This is the first time we have to change our environment.
As we said earlier, once we build the right operand on the stack, every offset that we counted
from the top of the stack will have been shifted by 1 (we see this
in our compilation scheme for function application). So,
we create a new environment with `env_offset`, and use that
when we compile the left child:
{{< codelines "C++" "compiler/06/ast.cpp" 103 110 >}}
`ast_binop` performs two applications: `(+) lhs rhs`.
We push `rhs`, then `lhs`, then `(+)`, and then use MkApp
twice. In `ast_app`, we only need to perform one application,
`lhs rhs`:
{{< codelines "C++" "compiler/06/ast.cpp" 134 138 >}}
Note that we also extend our environment in this one,
for the exact same reason as before.
Case expressions are the only thing left on the agenda. This
is the time during which we have to perform desugaring. Here,
though, we run into an issue: we don't have tags assigned to constructors!
We need to adjust our code to keep track of the tags of the various
constructors of a type. To do this, we add a subclass for the `type_base`
struct, called `type_data`:
{{< codelines "C++" "compiler/06/type.hpp" 33 42 >}}
When we create types from `definition_data`, we tag the corresponding constructors:
{{< codelines "C++" "compiler/06/definition.cpp" 35 51 >}}
Ah, but adding constructor info to the type doesn't solve the problem.
Once we performed type checking, we don't keep
the types that we computed for an AST node, in the node. And obviously, we don't want
to go looking for them again. Furthermore, we can't just look up a constructor
in the environment, since we can well have patterns that don't have __any__ constructors:
```
match l {
l -> { 0 }
}
```
So, we want each `ast` node to store its type (well, in practice we only need this for
`ast_case`, but we might as well store it for all nodes). We can add it, no problem.
To add to that, we can add another, non-virtual `typecheck` method (let's call it `typecheck_common`,
since naming is hard). This method will call `typecheck`, and store the output into
the `node_type` field.
The signature is identical to `typecheck`, except it's neither virtual nor const:
```
type_ptr typecheck_common(type_mgr& mgr, const type_env& env);
```
And the implementation is as simple as you think:
{{< codelines "C++" "compiler/06/ast.cpp" 9 12 >}}
In client code (`definition_defn::typecheck_first` for instance), we should now
use `typecheck_common` instead of `typecheck`. With that done, we're almost there.
However, we're still missing something: most likely, the initial type assigned to any
node is a `type_var`, or a type variable. In this case, `type_var` __needs__ the information
from `type_mgr`, which we will not be keeping around. Besides, it's cleaner to keep the actual type
as a member of the node, not a variable type that references it. In order
to address this, we write two conversion functions that call `resolve` on all
types in an AST, given a type manager. After this is done, the type manager can be thrown away.
The signatures of the functions are as follows:
```
void resolve_common(const type_mgr& mgr);
virtual void resolve(const type_mgr& mgr) const = 0;
```
We also add the `resolve` method to `definition`, so that we can call it
without having to run `dynamic_cast`. The implementation for `ast::resolve_common`
just resolves the type:
{{< codelines "C++" "compiler/06/ast.cpp" 14 21 >}}
The virtual `ast::resolve` just calls `ast::resolve_common` on an all `ast` children
of a node. Here's a sample implementation from `ast_binop`:
{{< codelines "C++" "compiler/06/ast.cpp" 98 101 >}}
And here's the implementation of `definition::resolve` on `definition_defn`:
{{< codelines "C++" "compiler/06/definition.cpp" 31 33 >}}
Finally, we call `resolve` at the end `typecheck_program` in `main.cpp`:
{{< codelines "C++" "compiler/06/main.cpp" 40 42 >}}
At last, we're ready to implement the code for compiling `ast_case`.
Here it is, in all its glory:
{{< codelines "C++" "compiler/06/ast.cpp" 178 224 >}}
There's a lot to unpack here. First of all, just like we said in the compilation
scheme, we want to build and evaluate the expression that's being analyzed.
Once that's done, however, things get more tricky. We know that each
branch of a case expression will correspond to a vector of instructions -
in fact, our jump instruction contains a mapping from tags to instructions.
As we also discussed above, each list of instructions can be mapped to
by multiple tags. We don't want to recompile the same sequence of instructions
multiple times (or indeed, generate machine code for it). So, we keep
a mapping of tags to their corresponding sequences of instructions. We implement
this by having a vector of vectors of instructions (in which each inner vector
represents the code for a branch), and a map of tag number to index
in the vector containing all the branches. This way, multiple tags
can point to the same instruction set without duplicating information.
We also don't allow a tag to be mapped to more than one sequence of instructions.
This is handled differently depending on whether a variable pattern or a
constructor pattern are encountered. Variable patterns map all
tags that haven't been mapped yet, so no error can occur. Constructor patterns,
though, can explicitly try to map the same tag twice, and we don't want that.
I implied in the previous paragraph the implementation of our case expression
compilation algorithm, but let's go through it. Once we've compiled
the expression to be analyzed, and evaluated it (just like in our definitions
above), we proceed to look at all the branches specified in the case expression.
If a branch has a variable pattern, we must map to the result of the compilation
all the remaining, unmapped tags. We also aren't going to be taking apart
our value, so we don't need to use Split, but we do need to add 1 to the
environment offset to account the the presence of that value. So,
we compile the branch body with that offset, and iterate through
all the constructors of our data type. We skip a constructor
if it's been mapped, and if it hasn't been, we map it to the index
that this branch body will have in our list. Finally,
we push the newly compiled instruction sequence into the list of branch
bodies.
If a branch is a constructor pattern, on the other hand, we lead our compilation
output with a Split. This takes off the value from the stack, but pushes on
all the parameters of the constructor. We account for this by incrementing the
environment with the offset given by the number of arguments (just like we did
in our definitions of our compilation scheme). Before we map the tag,
we ensure that it hasn't already been mapped (and throw an exception, currently
in the form of a type error due to the growing length of this post),
and finally map it and insert the new branch code into the list of branches.
After we're done with all the branches, we also check for non-exhaustive patterns,
since otherwise we could run into runtime errors. With this, the case expression,
an the last of the AST nodes, can be compiled.
{{< todo >}}Backport bugfix in case's typecheck{{< /todo >}}