From 21f90d85c57addf9afeb46f3ee5f99ac40eface6 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Thu, 10 Oct 2019 13:14:00 -0700 Subject: [PATCH] Add finishing touches to code for part 6 of compiler series --- code/compiler/06/ast.cpp | 12 ++++-- code/compiler/06/ast.hpp | 5 +++ code/compiler/06/definition.cpp | 12 ++++++ code/compiler/06/instruction.cpp | 6 ++- code/compiler/06/main.cpp | 14 +++++++ content/blog/06_compiler_semantics.md | 57 +++++++++++++++++++++++++-- 6 files changed, 98 insertions(+), 8 deletions(-) diff --git a/code/compiler/06/ast.cpp b/code/compiler/06/ast.cpp index 8c5c3f9..00d92a3 100644 --- a/code/compiler/06/ast.cpp +++ b/code/compiler/06/ast.cpp @@ -176,7 +176,7 @@ void ast_case::resolve(const type_mgr& mgr) const { } void ast_case::compile(const env_ptr& env, std::vector& into) const { - type_data* type = dynamic_cast(node_type.get()); + type_data* type = dynamic_cast(of->node_type.get()); of->compile(env, into); into.push_back(instruction_ptr(new instruction_eval())); @@ -201,9 +201,15 @@ void ast_case::compile(const env_ptr& env, std::vector& into) c } jump_instruction->branches.push_back(std::move(branch_instructions)); } else if((cpat = dynamic_cast(branch->pat.get()))) { + env_ptr new_env = env; + for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) { + new_env = env_ptr(new env_var(*it, new_env)); + } + branch_instructions.push_back(instruction_ptr(new instruction_split())); - branch->expr->compile(env_ptr(new env_offset(cpat->params.size(), env)), - branch_instructions); + branch->expr->compile(new_env, branch_instructions); + branch_instructions.push_back(instruction_ptr(new instruction_slide( + cpat->params.size()))); int new_tag = type->constructors[cpat->constr].tag; if(jump_instruction->tag_mappings.find(new_tag) != diff --git a/code/compiler/06/ast.hpp b/code/compiler/06/ast.hpp index 21e1f61..c536d17 100644 --- a/code/compiler/06/ast.hpp +++ b/code/compiler/06/ast.hpp @@ -59,6 +59,7 @@ struct definition { virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0; virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0; virtual void resolve(const type_mgr& mgr) = 0; + virtual void compile() = 0; }; using definition_ptr = std::unique_ptr; @@ -168,6 +169,8 @@ struct definition_defn : public definition { type_ptr return_type; std::vector param_types; + std::vector instructions; + definition_defn(std::string n, std::vector p, ast_ptr b) : name(std::move(n)), params(std::move(p)), body(std::move(b)) { @@ -176,6 +179,7 @@ struct definition_defn : public definition { void typecheck_first(type_mgr& mgr, type_env& env); void typecheck_second(type_mgr& mgr, const type_env& env) const; void resolve(const type_mgr& mgr); + void compile(); }; struct definition_data : public definition { @@ -188,4 +192,5 @@ struct definition_data : public definition { void typecheck_first(type_mgr& mgr, type_env& env); void typecheck_second(type_mgr& mgr, const type_env& env) const; void resolve(const type_mgr& mgr); + void compile(); }; diff --git a/code/compiler/06/definition.cpp b/code/compiler/06/definition.cpp index 5047055..34c6f66 100644 --- a/code/compiler/06/definition.cpp +++ b/code/compiler/06/definition.cpp @@ -41,6 +41,15 @@ void definition_defn::resolve(const type_mgr& mgr) { } } +void definition_defn::compile() { + env_ptr new_env = env_ptr(new env_offset(0, nullptr)); + for(auto it = params.rbegin(); it != params.rend(); it++) { + new_env = env_ptr(new env_var(*it, new_env)); + } + body->compile(new_env, instructions); + instructions.push_back(instruction_ptr(new instruction_update(params.size()))); +} + void definition_data::typecheck_first(type_mgr& mgr, type_env& env) { type_data* this_type = new type_data(name); type_ptr return_type = type_ptr(this_type); @@ -67,3 +76,6 @@ void definition_data::resolve(const type_mgr& mgr) { // Nothing } +void definition_data::compile() { + +} diff --git a/code/compiler/06/instruction.cpp b/code/compiler/06/instruction.cpp index b540ff6..0272e20 100644 --- a/code/compiler/06/instruction.cpp +++ b/code/compiler/06/instruction.cpp @@ -21,12 +21,12 @@ void instruction_push::print(int indent, std::ostream& to) const { void instruction_mkapp::print(int indent, std::ostream& to) const { print_indent(indent, to); - to << "Push()" << std::endl; + to << "MkApp()" << std::endl; } void instruction_update::print(int indent, std::ostream& to) const { print_indent(indent, to); - to << "Offset(" << offset << ")" << std::endl; + to << "Update(" << offset << ")" << std::endl; } void instruction_pack::print(int indent, std::ostream& to) const { @@ -48,6 +48,8 @@ void instruction_jump::print(int indent, std::ostream& to) const { } to << std::endl; } + print_indent(indent, to); + to << ")" << std::endl; } void instruction_slide::print(int indent, std::ostream& to) const { diff --git a/code/compiler/06/main.cpp b/code/compiler/06/main.cpp index c5313fd..ffbd4e5 100644 --- a/code/compiler/06/main.cpp +++ b/code/compiler/06/main.cpp @@ -42,6 +42,19 @@ void typecheck_program( } } +void compile_program(const std::vector& prog) { + for(auto& def : prog) { + def->compile(); + + definition_defn* defn = dynamic_cast(def.get()); + if(!defn) continue; + for(auto& instruction : defn->instructions) { + instruction->print(0, std::cout); + } + std::cout << std::endl; + } +} + int main() { yy::parser parser; type_mgr mgr; @@ -60,6 +73,7 @@ int main() { } try { typecheck_program(program, mgr, env); + compile_program(program); } catch(unification_error& err) { std::cout << "failed to unify types: " << std::endl; std::cout << " (1) \033[34m"; diff --git a/content/blog/06_compiler_semantics.md b/content/blog/06_compiler_semantics.md index 91058ea..c6f60de 100644 --- a/content/blog/06_compiler_semantics.md +++ b/content/blog/06_compiler_semantics.md @@ -168,6 +168,10 @@ first define C++ structs for the instructions of the G-machine: {{< codeblock "C++" "compiler/06/instruction.hpp" >}} +I omit the implementation of the various (trivial) `print` methods in this post; +as always, you can look at the full project source code, which is +freely available for each post in the series. + We can now envision a method on the `ast` struct that takes an environment (just like our compilation scheme takes the environment \\(\\rho\\\)), and compiles the `ast`. Rather than returning a vector @@ -282,7 +286,7 @@ struct, called `type_data`: When we create types from `definition_data`, we tag the corresponding constructors: -{{< codelines "C++" "compiler/06/definition.cpp" 35 51 >}} +{{< codelines "C++" "compiler/06/definition.cpp" 53 69 >}} Ah, but adding constructor info to the type doesn't solve the problem. Once we performed type checking, we don't keep @@ -339,7 +343,7 @@ of a node. Here's a sample implementation from `ast_binop`: And here's the implementation of `definition::resolve` on `definition_defn`: -{{< codelines "C++" "compiler/06/definition.cpp" 31 33 >}} +{{< codelines "C++" "compiler/06/definition.cpp" 32 42 >}} Finally, we call `resolve` at the end `typecheck_program` in `main.cpp`: @@ -348,7 +352,7 @@ Finally, we call `resolve` at the end `typecheck_program` in `main.cpp`: At last, we're ready to implement the code for compiling `ast_case`. Here it is, in all its glory: -{{< codelines "C++" "compiler/06/ast.cpp" 178 224 >}} +{{< codelines "C++" "compiler/06/ast.cpp" 178 230 >}} There's a lot to unpack here. First of all, just like we said in the compilation scheme, we want to build and evaluate the expression that's being analyzed. @@ -399,4 +403,51 @@ After we're done with all the branches, we also check for non-exhaustive pattern since otherwise we could run into runtime errors. With this, the case expression, and the last of the AST nodes, can be compiled. +We also add a `compile` method to definitions, since they contain +our AST nodes. The method is empty for `defn_data`, and +looks as follows for `definition_defn`: + +{{< codelines "C++" "compiler/06/definition.cpp" 44 51 >}} + +Finally, we make a function in our `main.cpp` file to compile +all the definitions: + +{{< codelines "C++" "compiler/06/main.cpp" 45 56 >}} + +In this method, we also include some extra +output to help us see the result of our compilation. Since +at the moment, only the `definition_defn` program has to +be compiled, we try cast all definitions to it, and if +we succeed, we print them out. + +Let's try it all out! For the below sample program: + +{{< rawblock "compiler/06/examples/works1.txt" >}} + +Our compiler produces the following new output: +``` +PushInt(6) +PushInt(320) +PushGlobal(plus) +MkApp() +MkApp() + +Push(1) +Push(1) +PushGlobal(+) +MkApp() +MkApp() +``` + +The first sequence of instructions is clearly `main`. It creates +an application of `plus` to `320`, and then applies that to +`6`, which results in `plus 320 6`, which is correct. The +second sequence of instruction pushes the parameter that +sits on offset 1 from the top of the stack (`y`). It then +pushes a parameter from the same offset again, but this time, +since `y` was previously pushed on the stack, `x` is now +in that position, so `x` is pushed onto the stack. +Finally, `+` is pushed, and the application +`(+) x y` is created, which is equivalent to `x+y`. + {{< todo >}}Backport bugfix in case's typecheck{{< /todo >}}