Compare commits

...

6 Commits

12 changed files with 273 additions and 28 deletions

View File

@ -97,10 +97,6 @@ type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var);
type_ptr branch_type = mgr.new_type();
if(!dynamic_cast<type_base*>(case_type.get())) {
throw type_error("attempting case analysis of non-data type");
}
for(auto& branch : branches) {
type_env new_env = env.scope();
branch->pat->match(case_type, mgr, new_env);
@ -108,6 +104,11 @@ type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
mgr.unify(branch_type, curr_branch_type);
}
case_type = mgr.resolve(case_type, var);
if(!dynamic_cast<type_base*>(case_type.get())) {
throw type_error("attempting case analysis of non-data type");
}
return branch_type;
}

View File

@ -19,6 +19,7 @@ add_executable(compiler
type.cpp type.hpp
error.cpp error.hpp
binop.cpp binop.hpp
instruction.cpp instruction.hpp
${BISON_parser_OUTPUTS}
${FLEX_scanner_OUTPUTS}
main.cpp

View File

@ -2,7 +2,7 @@
#include <ostream>
#include "error.hpp"
void print_indent(int n, std::ostream& to) {
static void print_indent(int n, std::ostream& to) {
while(n--) to << " ";
}
@ -176,7 +176,7 @@ void ast_case::resolve(const type_mgr& mgr) const {
}
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
type_data* type = dynamic_cast<type_data*>(node_type.get());
type_data* type = dynamic_cast<type_data*>(of->node_type.get());
of->compile(env, into);
into.push_back(instruction_ptr(new instruction_eval()));
@ -201,9 +201,15 @@ void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) c
}
jump_instruction->branches.push_back(std::move(branch_instructions));
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
env_ptr new_env = env;
for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) {
new_env = env_ptr(new env_var(*it, new_env));
}
branch_instructions.push_back(instruction_ptr(new instruction_split()));
branch->expr->compile(env_ptr(new env_offset(cpat->params.size(), env)),
branch_instructions);
branch->expr->compile(new_env, branch_instructions);
branch_instructions.push_back(instruction_ptr(new instruction_slide(
cpat->params.size())));
int new_tag = type->constructors[cpat->constr].tag;
if(jump_instruction->tag_mappings.find(new_tag) !=

View File

@ -58,7 +58,8 @@ struct definition {
virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0;
virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0;
virtual void resolve(const type_mgr& mgr) const = 0;
virtual void resolve(const type_mgr& mgr) = 0;
virtual void compile() = 0;
};
using definition_ptr = std::unique_ptr<definition>;
@ -168,6 +169,8 @@ struct definition_defn : public definition {
type_ptr return_type;
std::vector<type_ptr> param_types;
std::vector<instruction_ptr> instructions;
definition_defn(std::string n, std::vector<std::string> p, ast_ptr b)
: name(std::move(n)), params(std::move(p)), body(std::move(b)) {
@ -175,7 +178,8 @@ struct definition_defn : public definition {
void typecheck_first(type_mgr& mgr, type_env& env);
void typecheck_second(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void resolve(const type_mgr& mgr);
void compile();
};
struct definition_data : public definition {
@ -187,5 +191,6 @@ struct definition_data : public definition {
void typecheck_first(type_mgr& mgr, type_env& env);
void typecheck_second(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void resolve(const type_mgr& mgr);
void compile();
};

View File

@ -1,4 +1,5 @@
#include "ast.hpp"
#include "error.hpp"
void definition_defn::typecheck_first(type_mgr& mgr, type_env& env) {
return_type = mgr.new_type();
@ -28,8 +29,25 @@ void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const
mgr.unify(return_type, body_type);
}
void definition_defn::resolve(const type_mgr& mgr) const {
void definition_defn::resolve(const type_mgr& mgr) {
type_var* var;
body->resolve_common(mgr);
return_type = mgr.resolve(return_type, var);
if(var) throw type_error("ambiguously typed program");
for(auto& param_type : param_types) {
param_type = mgr.resolve(param_type, var);
if(var) throw type_error("ambiguously typed program");
}
}
void definition_defn::compile() {
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
for(auto it = params.rbegin(); it != params.rend(); it++) {
new_env = env_ptr(new env_var(*it, new_env));
}
body->compile(new_env, instructions);
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
}
void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
@ -54,7 +72,10 @@ void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const
// Nothing
}
void definition_data::resolve(const type_mgr& mgr) const {
void definition_data::resolve(const type_mgr& mgr) {
// Nothing
}
void definition_data::compile() {
}

View File

@ -0,0 +1,78 @@
#include "instruction.hpp"
static void print_indent(int n, std::ostream& to) {
while(n--) to << " ";
}
void instruction_pushint::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "PushInt(" << value << ")" << std::endl;
}
void instruction_pushglobal::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "PushGlobal(" << name << ")" << std::endl;
}
void instruction_push::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Push(" << offset << ")" << std::endl;
}
void instruction_mkapp::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "MkApp()" << std::endl;
}
void instruction_update::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Update(" << offset << ")" << std::endl;
}
void instruction_pack::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Pack(" << tag << ", " << size << ")" << std::endl;
}
void instruction_split::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Split()" << std::endl;
}
void instruction_jump::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Jump(" << std::endl;
for(auto& instruction_set : branches) {
for(auto& instruction : instruction_set) {
instruction->print(indent + 2, to);
}
to << std::endl;
}
print_indent(indent, to);
to << ")" << std::endl;
}
void instruction_slide::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Slide(" << offset << ")" << std::endl;
}
void instruction_binop::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "BinOp(" << op_action(op) << ")" << std::endl;
}
void instruction_eval::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Eval()" << std::endl;
}
void instruction_alloc::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Alloc(" << amount << ")" << std::endl;
}
void instruction_unwind::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Unwind()" << std::endl;
}

View File

@ -1,12 +1,15 @@
#pragma once
#include <string>
#include <memory>
#include "binop.hpp"
#include <vector>
#include <map>
#include <ostream>
#include "binop.hpp"
struct instruction {
virtual ~instruction() = default;
virtual void print(int indent, std::ostream& to) const = 0;
};
using instruction_ptr = std::unique_ptr<instruction>;
@ -16,6 +19,8 @@ struct instruction_pushint : public instruction {
instruction_pushint(int v)
: value(v) {}
void print(int indent, std::ostream& to) const;
};
struct instruction_pushglobal : public instruction {
@ -23,6 +28,8 @@ struct instruction_pushglobal : public instruction {
instruction_pushglobal(std::string n)
: name(std::move(n)) {}
void print(int indent, std::ostream& to) const;
};
struct instruction_push : public instruction {
@ -30,10 +37,12 @@ struct instruction_push : public instruction {
instruction_push(int o)
: offset(o) {}
void print(int indent, std::ostream& to) const;
};
struct instruction_mkapp : public instruction {
void print(int indent, std::ostream& to) const;
};
struct instruction_update : public instruction {
@ -41,6 +50,8 @@ struct instruction_update : public instruction {
instruction_update(int o)
: offset(o) {}
void print(int indent, std::ostream& to) const;
};
struct instruction_pack : public instruction {
@ -49,15 +60,19 @@ struct instruction_pack : public instruction {
instruction_pack(int t, int s)
: tag(t), size(s) {}
void print(int indent, std::ostream& to) const;
};
struct instruction_split : public instruction {
void print(int indent, std::ostream& to) const;
};
struct instruction_jump : public instruction {
std::vector<std::vector<instruction_ptr>> branches;
std::map<int, int> tag_mappings;
void print(int indent, std::ostream& to) const;
};
struct instruction_slide : public instruction {
@ -65,6 +80,8 @@ struct instruction_slide : public instruction {
instruction_slide(int o)
: offset(o) {}
void print(int indent, std::ostream& to) const;
};
struct instruction_binop : public instruction {
@ -72,10 +89,12 @@ struct instruction_binop : public instruction {
instruction_binop(binop o)
: op(o) {}
void print(int indent, std::ostream& to) const;
};
struct instruction_eval : public instruction {
void print(int indent, std::ostream& to) const;
};
struct instruction_alloc : public instruction {
@ -83,8 +102,10 @@ struct instruction_alloc : public instruction {
instruction_alloc(int a)
: amount(a) {}
void print(int indent, std::ostream& to) const;
};
struct instruction_unwind : public instruction {
void print(int indent, std::ostream& to) const;
};

View File

@ -42,6 +42,19 @@ void typecheck_program(
}
}
void compile_program(const std::vector<definition_ptr>& prog) {
for(auto& def : prog) {
def->compile();
definition_defn* defn = dynamic_cast<definition_defn*>(def.get());
if(!defn) continue;
for(auto& instruction : defn->instructions) {
instruction->print(0, std::cout);
}
std::cout << std::endl;
}
}
int main() {
yy::parser parser;
type_mgr mgr;
@ -60,6 +73,7 @@ int main() {
}
try {
typecheck_program(program, mgr, env);
compile_program(program);
} catch(unification_error& err) {
std::cout << "failed to unify types: " << std::endl;
std::cout << " (1) \033[34m";

View File

@ -138,3 +138,4 @@ Here are the posts that I've written so far for this series:
* [Typechecking]({{< relref "03_compiler_typechecking.md" >}})
* [Small Improvements]({{< relref "04_compiler_improvements.md" >}})
* [Execution]({{< relref "05_compiler_execution.md" >}})
* [Compilation]({{< relref "06_compiler_semantics.md" >}})

View File

@ -65,8 +65,8 @@ Finally, just like `ast_case::typecheck` called
We follow the same implementation strategy for patterns,
but we don't need indentation, or recursion:
{{< codelines "C++" "compiler/04/ast.cpp" 114 116 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 122 127 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 115 117 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 123 128 >}}
In `main`, let's print the bodies of each function we receive from the parser:
{{< codelines "C++" "compiler/04/main.cpp" 47 56 >}}
@ -160,12 +160,12 @@ we simply pass the type of the expression to be case analyzed into
the pattern matching method. However, since we don't want
case analysis on functions, we ensure that the type of the expression
is `type_base`. If not, we report this:
{{< codelines "C++" "compiler/04/ast.cpp" 100 102 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 107 110 >}}
The next exception is in `pattern_constr::match`. It occurs
when the pattern has a constructor we don't recognize, and
that's exactly what we report:
{{< codelines "C++" "compiler/04/ast.cpp" 131 133 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 132 134 >}}
The next exception occurs in a loop, when we bind
types for each of the constructor pattern's variables.
@ -174,7 +174,7 @@ constructor type to a `type_arr`. Conceptually,
this means that the pattern wants to apply the
constructor to more parameters than it actually
takes:
{{< codelines "C++" "compiler/04/ast.cpp" 137 137 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 138 138 >}}
We remove the last throw at the bottom of `pattern_constr::match`.
This is because once unification succeeds, we know

View File

@ -592,4 +592,4 @@ tell us what to do with our `ast` structs. We'll need to define
rules to translate trees into these instructions, and I've already
alluded to this when we went over `double 326`.
However, this has already gotten pretty long,
so we'll do it in the next post: (link me!)
so we'll do it in the next post: [Part 6 - Compilation]({{< relref "06_compiler_semantics.md" >}}).

View File

@ -168,6 +168,10 @@ first define C++ structs for the instructions of the G-machine:
{{< codeblock "C++" "compiler/06/instruction.hpp" >}}
I omit the implementation of the various (trivial) `print` methods in this post;
as always, you can look at the full project source code, which is
freely available for each post in the series.
We can now envision a method on the `ast` struct that takes an environment
(just like our compilation scheme takes the environment \\(\\rho\\\)),
and compiles the `ast`. Rather than returning a vector
@ -282,7 +286,7 @@ struct, called `type_data`:
When we create types from `definition_data`, we tag the corresponding constructors:
{{< codelines "C++" "compiler/06/definition.cpp" 35 51 >}}
{{< codelines "C++" "compiler/06/definition.cpp" 53 69 >}}
Ah, but adding constructor info to the type doesn't solve the problem.
Once we performed type checking, we don't keep
@ -339,7 +343,7 @@ of a node. Here's a sample implementation from `ast_binop`:
And here's the implementation of `definition::resolve` on `definition_defn`:
{{< codelines "C++" "compiler/06/definition.cpp" 31 33 >}}
{{< codelines "C++" "compiler/06/definition.cpp" 32 42 >}}
Finally, we call `resolve` at the end `typecheck_program` in `main.cpp`:
@ -348,7 +352,7 @@ Finally, we call `resolve` at the end `typecheck_program` in `main.cpp`:
At last, we're ready to implement the code for compiling `ast_case`.
Here it is, in all its glory:
{{< codelines "C++" "compiler/06/ast.cpp" 178 224 >}}
{{< codelines "C++" "compiler/06/ast.cpp" 178 230 >}}
There's a lot to unpack here. First of all, just like we said in the compilation
scheme, we want to build and evaluate the expression that's being analyzed.
@ -399,4 +403,97 @@ After we're done with all the branches, we also check for non-exhaustive pattern
since otherwise we could run into runtime errors. With this, the case expression,
and the last of the AST nodes, can be compiled.
{{< todo >}}Backport bugfix in case's typecheck{{< /todo >}}
We also add a `compile` method to definitions, since they contain
our AST nodes. The method is empty for `defn_data`, and
looks as follows for `definition_defn`:
{{< codelines "C++" "compiler/06/definition.cpp" 44 51 >}}
Notice that we terminate the function with Update. This
will turn the `ast_app` node that served as the "root"
of the application into an indirection to the value that we have computed.
In essense, this is how we can lazily evaluate expressions.
Finally, we make a function in our `main.cpp` file to compile
all the definitions:
{{< codelines "C++" "compiler/06/main.cpp" 45 56 >}}
In this method, we also include some extra
output to help us see the result of our compilation. Since
at the moment, only the `definition_defn` program has to
be compiled, we try cast all definitions to it, and if
we succeed, we print them out.
Let's try it all out! For the below sample program:
{{< rawblock "compiler/06/examples/works1.txt" >}}
Our compiler produces the following new output:
```
PushInt(6)
PushInt(320)
PushGlobal(plus)
MkApp()
MkApp()
Push(1)
Push(1)
PushGlobal(+)
MkApp()
MkApp()
```
The first sequence of instructions is clearly `main`. It creates
an application of `plus` to `320`, and then applies that to
`6`, which results in `plus 320 6`, which is correct. The
second sequence of instruction pushes the parameter that
sits on offset 1 from the top of the stack (`y`). It then
pushes a parameter from the same offset again, but this time,
since `y` was previously pushed on the stack, `x` is now
in that position, so `x` is pushed onto the stack.
Finally, `+` is pushed, and the application
`(+) x y` is created, which is equivalent to `x+y`.
Let's also take a look at a case expression program:
{{< rawblock "compiler/06/examples/works3.txt" >}}
The result of the compilation is as follows:
```
Push(0)
Eval()
Jump(
Split()
PushInt(0)
Slide(0)
Split()
Push(1)
PushGlobal(length)
MkApp()
PushInt(1)
PushGlobal(+)
MkApp()
MkApp()
Slide(2)
)
Update(1)
```
We push the first (and only) parameter onto the stack. We then make
sure it's evaluated, and perform case analysis: if the list
is `Nil`, we simply push the number 0 onto the stack. If it's
a concatenation of some `x` and another lists `xs`, we
push `xs` and `length` onto the stack, make the application
(`length xs`), push the 1, and finally apply `+` to the result.
This all makes sense!
With this, we've been able to compile our expressions and functions
into G-machine code. We're not done, however - our computers
aren't G-machines. We'll need to compile our G-machine code to
__machine code__ (we will use LLVM for this), implement the
__runtime__, and develop a __garbage collector__. We'll
tackle the first of these in the next post - see you there!