Fix up compile in compiler blog part 6, and add more text.
This commit is contained in:
parent
d9c151d774
commit
d3d73e0e9c
|
@ -29,7 +29,10 @@ type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
|
into.push_back(instruction_ptr(
|
||||||
|
env->has_variable(id) ?
|
||||||
|
(instruction*) new instruction_push(env->get_offset(id)) :
|
||||||
|
(instruction*) new instruction_pushglobal(id)));
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_uid::print(int indent, std::ostream& to) const {
|
void ast_uid::print(int indent, std::ostream& to) const {
|
||||||
|
@ -42,10 +45,7 @@ type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
into.push_back(instruction_ptr(
|
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
|
||||||
env->has_variable(id) ?
|
|
||||||
(instruction*) new instruction_push(env->get_offset(id)) :
|
|
||||||
(instruction*) new instruction_pushglobal(id)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_binop::print(int indent, std::ostream& to) const {
|
void ast_binop::print(int indent, std::ostream& to) const {
|
||||||
|
@ -70,8 +70,9 @@ type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
left->compile(env, into);
|
|
||||||
right->compile(env, into);
|
right->compile(env, into);
|
||||||
|
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||||
|
|
||||||
into.push_back(instruction_ptr(new instruction_pushglobal(op_name(op))));
|
into.push_back(instruction_ptr(new instruction_pushglobal(op_name(op))));
|
||||||
into.push_back(instruction_ptr(new instruction_mkapp()));
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
into.push_back(instruction_ptr(new instruction_mkapp()));
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
@ -95,8 +96,8 @@ type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
left->compile(env, into);
|
|
||||||
right->compile(env, into);
|
right->compile(env, into);
|
||||||
|
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||||
into.push_back(instruction_ptr(new instruction_mkapp()));
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@ struct env {
|
||||||
|
|
||||||
using env_ptr = std::shared_ptr<env>;
|
using env_ptr = std::shared_ptr<env>;
|
||||||
|
|
||||||
struct env_var {
|
struct env_var : public env {
|
||||||
std::string name;
|
std::string name;
|
||||||
env_ptr parent;
|
env_ptr parent;
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ struct env_var {
|
||||||
bool has_variable(const std::string& name) const;
|
bool has_variable(const std::string& name) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct env_offset {
|
struct env_offset : public env {
|
||||||
int offset;
|
int offset;
|
||||||
env_ptr parent;
|
env_ptr parent;
|
||||||
|
|
||||||
|
|
|
@ -144,7 +144,7 @@ Now, it's time for compiling the whole case expression. We first want
|
||||||
to construct the graph for the expression we want to perform case analysis on.
|
to construct the graph for the expression we want to perform case analysis on.
|
||||||
Next, we want to evaluate it (since we need a packed value, not a graph,
|
Next, we want to evaluate it (since we need a packed value, not a graph,
|
||||||
to read the tag). Finally, we perform a jump depending on the tag. This
|
to read the tag). Finally, we perform a jump depending on the tag. This
|
||||||
is capture by the following rule:
|
is captured by the following rule:
|
||||||
|
|
||||||
$$
|
$$
|
||||||
\\mathcal{C} ⟦\\text{case} \\; e \\; \\text{of} \\; \\text{alt}_1 ... \\text{alt}_n⟧ \\; \\rho =
|
\\mathcal{C} ⟦\\text{case} \\; e \\; \\text{of} \\; \\text{alt}_1 ... \\text{alt}_n⟧ \\; \\rho =
|
||||||
|
@ -196,7 +196,37 @@ And here's the source file:
|
||||||
|
|
||||||
{{< codeblock "C++" "compiler/06/env.cpp" >}}
|
{{< codeblock "C++" "compiler/06/env.cpp" >}}
|
||||||
|
|
||||||
{{< todo >}}Explain the code drops. {{< /todo >}}
|
There's not that much to see here, but let's go through it anyway.
|
||||||
|
We define an environment as a linked list, kind of like
|
||||||
|
we did with the type environment. This time, though,
|
||||||
|
we use shared pointers instead of raw pointers to reference the parent.
|
||||||
|
I decided on this because we will need to be using virtual methods
|
||||||
|
(since we have two subclasses of `env`), and thus will need to
|
||||||
|
be passing the `env` by pointer. At that point, we might as well
|
||||||
|
use the "proper" way!
|
||||||
|
|
||||||
|
I implemented the environment as a linked list because it is, in essence,
|
||||||
|
a stack. However, not every "offset" in a stack is introduced by
|
||||||
|
binding variables - for instance, when we create an application node,
|
||||||
|
we first build the argument value on the stack, and then,
|
||||||
|
with that value still on the stack, build the left hand side of the application.
|
||||||
|
Thus, all the variable positions are offset by the presence of the argument
|
||||||
|
on the stack, and we must account for that. Similarly, in cases when we will
|
||||||
|
allocate space on the stack (we will run into these cases later), we will
|
||||||
|
need to account for that change. Thus, since we can increment
|
||||||
|
the offset by two ways (binding a variable and building something on the stack),
|
||||||
|
we allow for two types of nodes in our `env` stack.
|
||||||
|
|
||||||
|
During recursion we will be tweaking the return value of `get_offset` to
|
||||||
|
calculate the final location of a variable on the stack (if the
|
||||||
|
parent of a node returned offset `1`, but the node itself is a variable
|
||||||
|
node and thus introduces another offset, we need to return `2`). Because
|
||||||
|
of this, we cannot reasonably return a constant like `-1` (it will quickly
|
||||||
|
be made positive on a long list), and thus we throw an exception. To
|
||||||
|
allow for a safe way to check for an offset, without try-catch,
|
||||||
|
we also add a `has_variable` method which checks if the lookup will succeed.
|
||||||
|
A better approach would be to use `std::optional`, but it's C++17, so
|
||||||
|
we'll shy away from it.
|
||||||
|
|
||||||
It will also help to move some of the functions on the `binop` enum
|
It will also help to move some of the functions on the `binop` enum
|
||||||
into a separate file. The new neader is pretty small:
|
into a separate file. The new neader is pretty small:
|
||||||
|
@ -207,4 +237,40 @@ The new source file is not much longer:
|
||||||
|
|
||||||
{{< codeblock "C++" "compiler/06/binop.cpp" >}}
|
{{< codeblock "C++" "compiler/06/binop.cpp" >}}
|
||||||
|
|
||||||
And now, we begin our implementation.
|
And now, we begin our implementation. Let's start with the easy ones:
|
||||||
|
`ast_int`, `ast_lid` and `ast_uid`. The code for `ast_int` involves just pushing
|
||||||
|
the integer into the stack:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/ast.cpp" 18 20 >}}
|
||||||
|
|
||||||
|
The code for `ast_lid` needs to check if the variable is global or local,
|
||||||
|
just like we discussed:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/ast.cpp" 31 36 >}}
|
||||||
|
|
||||||
|
We do not have to do this for `ast_uid`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/ast.cpp" 47 49 >}}
|
||||||
|
|
||||||
|
On to `ast_binop`! This is the first time we have to change our environment.
|
||||||
|
Once we build the right operand on the stack, every offset that we counted
|
||||||
|
from the top of the stack will have been shifted by 1 (we see this
|
||||||
|
in our compilation scheme for function application). So,
|
||||||
|
we create a new environment with `env_offset`, and use that
|
||||||
|
when we compile the left child:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/ast.cpp" 72 79 >}}
|
||||||
|
|
||||||
|
`ast_binop` performs two applications: `(+) lhs rhs`.
|
||||||
|
We push `rhs`, then `lhs`, then `(+)`, and then use MkApp
|
||||||
|
twice. In `ast_app`, we only need to perform one application,
|
||||||
|
`lhs rhs`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/ast.cpp" 98 102 >}}
|
||||||
|
|
||||||
|
Note that we also extend our environment in this one,
|
||||||
|
for the exact same reason as before.
|
||||||
|
|
||||||
|
Case expressions are the only thing left on the agenda. This
|
||||||
|
is the time during which we have to perform desugaring. Here,
|
||||||
|
though, we run into an issue: we don't have tags assigned to constructors!
|
||||||
|
|
Loading…
Reference in New Issue
Block a user