Implement ast_case::compile for compiler series and reference code
This commit is contained in:
parent
7e9bd95846
commit
d90993a93c
|
@ -176,7 +176,51 @@ void ast_case::resolve(const type_mgr& mgr) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
type_data* type = dynamic_cast<type_data*>(node_type.get());
|
||||||
|
|
||||||
|
of->compile(env, into);
|
||||||
|
into.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
|
||||||
|
instruction_jump* jump_instruction = new instruction_jump();
|
||||||
|
into.push_back(instruction_ptr(jump_instruction));
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
std::vector<instruction_ptr> branch_instructions;
|
||||||
|
pattern_var* vpat;
|
||||||
|
pattern_constr* cpat;
|
||||||
|
|
||||||
|
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
|
||||||
|
branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions);
|
||||||
|
|
||||||
|
for(auto& constr_pair : type->constructors) {
|
||||||
|
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) !=
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
break;
|
||||||
|
|
||||||
|
jump_instruction->tag_mappings[constr_pair.second.tag] =
|
||||||
|
jump_instruction->branches.size();
|
||||||
|
}
|
||||||
|
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||||
|
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
|
||||||
|
branch_instructions.push_back(instruction_ptr(new instruction_split()));
|
||||||
|
branch->expr->compile(env_ptr(new env_offset(cpat->params.size(), env)),
|
||||||
|
branch_instructions);
|
||||||
|
|
||||||
|
int new_tag = type->constructors[cpat->constr].tag;
|
||||||
|
if(jump_instruction->tag_mappings.find(new_tag) !=
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
throw type_error("technically not a type error: duplicate pattern");
|
||||||
|
|
||||||
|
jump_instruction->tag_mappings[new_tag] =
|
||||||
|
jump_instruction->branches.size();
|
||||||
|
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& constr_pair : type->constructors) {
|
||||||
|
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) ==
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
throw type_error("non-total pattern");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void pattern_var::print(std::ostream& to) const {
|
void pattern_var::print(std::ostream& to) const {
|
||||||
|
|
|
@ -2,6 +2,8 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include "binop.hpp"
|
#include "binop.hpp"
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
struct instruction {
|
struct instruction {
|
||||||
virtual ~instruction() = default;
|
virtual ~instruction() = default;
|
||||||
|
@ -53,6 +55,11 @@ struct instruction_split : public instruction {
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct instruction_jump : public instruction {
|
||||||
|
std::vector<std::vector<instruction_ptr>> branches;
|
||||||
|
std::map<int, int> tag_mappings;
|
||||||
|
};
|
||||||
|
|
||||||
struct instruction_slide : public instruction {
|
struct instruction_slide : public instruction {
|
||||||
int offset;
|
int offset;
|
||||||
|
|
||||||
|
|
|
@ -241,16 +241,16 @@ And now, we begin our implementation. Let's start with the easy ones:
|
||||||
`ast_int`, `ast_lid` and `ast_uid`. The code for `ast_int` involves just pushing
|
`ast_int`, `ast_lid` and `ast_uid`. The code for `ast_int` involves just pushing
|
||||||
the integer into the stack:
|
the integer into the stack:
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/06/ast.cpp" 18 20 >}}
|
{{< codelines "C++" "compiler/06/ast.cpp" 36 38 >}}
|
||||||
|
|
||||||
The code for `ast_lid` needs to check if the variable is global or local,
|
The code for `ast_lid` needs to check if the variable is global or local,
|
||||||
just like we discussed:
|
just like we discussed:
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/06/ast.cpp" 31 36 >}}
|
{{< codelines "C++" "compiler/06/ast.cpp" 53 58 >}}
|
||||||
|
|
||||||
We do not have to do this for `ast_uid`:
|
We do not have to do this for `ast_uid`:
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/06/ast.cpp" 47 49 >}}
|
{{< codelines "C++" "compiler/06/ast.cpp" 73 75 >}}
|
||||||
|
|
||||||
On to `ast_binop`! This is the first time we have to change our environment.
|
On to `ast_binop`! This is the first time we have to change our environment.
|
||||||
As we said earlier, once we build the right operand on the stack, every offset that we counted
|
As we said earlier, once we build the right operand on the stack, every offset that we counted
|
||||||
|
@ -259,14 +259,14 @@ in our compilation scheme for function application). So,
|
||||||
we create a new environment with `env_offset`, and use that
|
we create a new environment with `env_offset`, and use that
|
||||||
when we compile the left child:
|
when we compile the left child:
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/06/ast.cpp" 72 79 >}}
|
{{< codelines "C++" "compiler/06/ast.cpp" 103 110 >}}
|
||||||
|
|
||||||
`ast_binop` performs two applications: `(+) lhs rhs`.
|
`ast_binop` performs two applications: `(+) lhs rhs`.
|
||||||
We push `rhs`, then `lhs`, then `(+)`, and then use MkApp
|
We push `rhs`, then `lhs`, then `(+)`, and then use MkApp
|
||||||
twice. In `ast_app`, we only need to perform one application,
|
twice. In `ast_app`, we only need to perform one application,
|
||||||
`lhs rhs`:
|
`lhs rhs`:
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/06/ast.cpp" 98 102 >}}
|
{{< codelines "C++" "compiler/06/ast.cpp" 134 138 >}}
|
||||||
|
|
||||||
Note that we also extend our environment in this one,
|
Note that we also extend our environment in this one,
|
||||||
for the exact same reason as before.
|
for the exact same reason as before.
|
||||||
|
@ -278,14 +278,15 @@ We need to adjust our code to keep track of the tags of the various
|
||||||
constructors of a type. To do this, we add a subclass for the `type_base`
|
constructors of a type. To do this, we add a subclass for the `type_base`
|
||||||
struct, called `type_data`:
|
struct, called `type_data`:
|
||||||
|
|
||||||
{{< todo >}}Link code{{< /todo >}}
|
{{< codelines "C++" "compiler/06/type.hpp" 33 42 >}}
|
||||||
|
|
||||||
When we create types from `definition_data`, we tag the corresponding constructors:
|
When we create types from `definition_data`, we tag the corresponding constructors:
|
||||||
|
|
||||||
{{< todo >}}Link code{{< /todo >}}
|
{{< codelines "C++" "compiler/06/definition.cpp" 35 51 >}}
|
||||||
|
|
||||||
Ah, but that doesn't solve the problem. Once we performed type checking, we don't keep
|
Ah, but adding constructor info to the type doesn't solve the problem.
|
||||||
the types that we computed for an AST node in the node. And obviously, we don't want
|
Once we performed type checking, we don't keep
|
||||||
|
the types that we computed for an AST node, in the node. And obviously, we don't want
|
||||||
to go looking for them again. Furthermore, we can't just look up a constructor
|
to go looking for them again. Furthermore, we can't just look up a constructor
|
||||||
in the environment, since we can well have patterns that don't have __any__ constructors:
|
in the environment, since we can well have patterns that don't have __any__ constructors:
|
||||||
|
|
||||||
|
@ -296,11 +297,8 @@ match l {
|
||||||
```
|
```
|
||||||
|
|
||||||
So, we want each `ast` node to store its type (well, in practice we only need this for
|
So, we want each `ast` node to store its type (well, in practice we only need this for
|
||||||
`ast_case`, but we might as well store it for all nodes). We can add it, no problem:
|
`ast_case`, but we might as well store it for all nodes). We can add it, no problem.
|
||||||
|
To add to that, we can add another, non-virtual `typecheck` method (let's call it `typecheck_common`,
|
||||||
{{< todo >}}Link code{{< /todo >}}
|
|
||||||
|
|
||||||
Now, we can add another, non-virtual `typecheck` method (let's call it `typecheck_common`,
|
|
||||||
since naming is hard). This method will call `typecheck`, and store the output into
|
since naming is hard). This method will call `typecheck`, and store the output into
|
||||||
the `node_type` field.
|
the `node_type` field.
|
||||||
|
|
||||||
|
@ -311,7 +309,7 @@ type_ptr typecheck_common(type_mgr& mgr, const type_env& env);
|
||||||
|
|
||||||
And the implementation is as simple as you think:
|
And the implementation is as simple as you think:
|
||||||
|
|
||||||
{{< todo >}}Link code{{< /todo >}}
|
{{< codelines "C++" "compiler/06/ast.cpp" 9 12 >}}
|
||||||
|
|
||||||
In client code (`definition_defn::typecheck_first` for instance), we should now
|
In client code (`definition_defn::typecheck_first` for instance), we should now
|
||||||
use `typecheck_common` instead of `typecheck`. With that done, we're almost there.
|
use `typecheck_common` instead of `typecheck`. With that done, we're almost there.
|
||||||
|
@ -329,26 +327,76 @@ virtual void resolve(const type_mgr& mgr) const = 0;
|
||||||
```
|
```
|
||||||
|
|
||||||
We also add the `resolve` method to `definition`, so that we can call it
|
We also add the `resolve` method to `definition`, so that we can call it
|
||||||
without having to run `dynamic_cast`. The implementation for `resolve_common`
|
without having to run `dynamic_cast`. The implementation for `ast::resolve_common`
|
||||||
just resolves the type:
|
just resolves the type:
|
||||||
|
|
||||||
{{< todo >}}Link code{{< /todo >}}
|
{{< codelines "C++" "compiler/06/ast.cpp" 14 21 >}}
|
||||||
|
|
||||||
The virtual `resolve` just calls `resolve_common` on an all `ast` children
|
The virtual `ast::resolve` just calls `ast::resolve_common` on an all `ast` children
|
||||||
of a node. Here's a sample implementation from `ast_binop`:
|
of a node. Here's a sample implementation from `ast_binop`:
|
||||||
|
|
||||||
{{< todo >}}Link code{{< /todo >}}
|
{{< codelines "C++" "compiler/06/ast.cpp" 98 101 >}}
|
||||||
|
|
||||||
And here's the implementation of `resolve` on `definition_defn`:
|
And here's the implementation of `definition::resolve` on `definition_defn`:
|
||||||
|
|
||||||
{{< todo >}}Link code{{< /todo >}}
|
{{< codelines "C++" "compiler/06/definition.cpp" 31 33 >}}
|
||||||
|
|
||||||
Finally, we call `resolve` from inside `typecheck_program` in `main.cpp`:
|
Finally, we call `resolve` at the end `typecheck_program` in `main.cpp`:
|
||||||
|
|
||||||
{{< todo >}}Link code{{< /todo >}}
|
{{< codelines "C++" "compiler/06/main.cpp" 40 42 >}}
|
||||||
|
|
||||||
Finally, we're ready to implement the code for compiling `ast_case`.
|
At last, we're ready to implement the code for compiling `ast_case`.
|
||||||
|
Here it is, in all its glory:
|
||||||
|
|
||||||
{{< todo >}}Figure out how to keep all trees not requiring a type manager. {{< /todo >}}
|
{{< codelines "C++" "compiler/06/ast.cpp" 178 224 >}}
|
||||||
|
|
||||||
|
There's a lot to unpack here. First of all, just like we said in the compilation
|
||||||
|
scheme, we want to build and evaluate the expression that's being analyzed.
|
||||||
|
Once that's done, however, things get more tricky. We know that each
|
||||||
|
branch of a case expression will correspond to a vector of instructions -
|
||||||
|
in fact, our jump instruction contains a mapping from tags to instructions.
|
||||||
|
As we also discussed above, each list of instructions can be mapped to
|
||||||
|
by multiple tags. We don't want to recompile the same sequence of instructions
|
||||||
|
multiple times (or indeed, generate machine code for it). So, we keep
|
||||||
|
a mapping of tags to their corresponding sequences of instructions. We implement
|
||||||
|
this by having a vector of vectors of instructions (in which each inner vector
|
||||||
|
represents the code for a branch), and a map of tag number to index
|
||||||
|
in the vector containing all the branches. This way, multiple tags
|
||||||
|
can point to the same instruction set without duplicating information.
|
||||||
|
|
||||||
|
We also don't allow a tag to be mapped to more than one sequence of instructions.
|
||||||
|
This is handled differently depending on whether a variable pattern or a
|
||||||
|
constructor pattern are encountered. Variable patterns map all
|
||||||
|
tags that haven't been mapped yet, so no error can occur. Constructor patterns,
|
||||||
|
though, can explicitly try to map the same tag twice, and we don't want that.
|
||||||
|
|
||||||
|
I implied in the previous paragraph the implementation of our case expression
|
||||||
|
compilation algorithm, but let's go through it. Once we've compiled
|
||||||
|
the expression to be analyzed, and evaluated it (just like in our definitions
|
||||||
|
above), we proceed to look at all the branches specified in the case expression.
|
||||||
|
|
||||||
|
If a branch has a variable pattern, we must map to the result of the compilation
|
||||||
|
all the remaining, unmapped tags. We also aren't going to be taking apart
|
||||||
|
our value, so we don't need to use Split, but we do need to add 1 to the
|
||||||
|
environment offset to account the the presence of that value. So,
|
||||||
|
we compile the branch body with that offset, and iterate through
|
||||||
|
all the constructors of our data type. We skip a constructor
|
||||||
|
if it's been mapped, and if it hasn't been, we map it to the index
|
||||||
|
that this branch body will have in our list. Finally,
|
||||||
|
we push the newly compiled instruction sequence into the list of branch
|
||||||
|
bodies.
|
||||||
|
|
||||||
|
If a branch is a constructor pattern, on the other hand, we lead our compilation
|
||||||
|
output with a Split. This takes off the value from the stack, but pushes on
|
||||||
|
all the parameters of the constructor. We account for this by incrementing the
|
||||||
|
environment with the offset given by the number of arguments (just like we did
|
||||||
|
in our definitions of our compilation scheme). Before we map the tag,
|
||||||
|
we ensure that it hasn't already been mapped (and throw an exception, currently
|
||||||
|
in the form of a type error due to the growing length of this post),
|
||||||
|
and finally map it and insert the new branch code into the list of branches.
|
||||||
|
|
||||||
|
After we're done with all the branches, we also check for non-exhaustive patterns,
|
||||||
|
since otherwise we could run into runtime errors. With this, the case expression,
|
||||||
|
an the last of the AST nodes, can be compiled.
|
||||||
|
|
||||||
{{< todo >}}Backport bugfix in case's typecheck{{< /todo >}}
|
{{< todo >}}Backport bugfix in case's typecheck{{< /todo >}}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user