From 8ba501bd84edb4f4365bd6c3252d97cb3ba14e57 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Mon, 26 Aug 2019 21:05:44 -0700 Subject: [PATCH] Add output and fix two bugs. --- code/compiler/04/ast.cpp | 56 ++++++++++++++ code/compiler/04/ast.hpp | 10 +++ code/compiler/04/definition.cpp | 4 +- code/compiler/04/main.cpp | 17 +++++ code/compiler/04/parser.y | 2 +- code/compiler/04/type.cpp | 20 +++++ code/compiler/04/type.hpp | 10 +++ content/blog/04_compiler_improvements.md | 97 ++++++++++++++++++++++++ 8 files changed, 213 insertions(+), 3 deletions(-) diff --git a/code/compiler/04/ast.cpp b/code/compiler/04/ast.cpp index bdee157..b9f920f 100644 --- a/code/compiler/04/ast.cpp +++ b/code/compiler/04/ast.cpp @@ -1,4 +1,5 @@ #include "ast.hpp" +#include std::string op_name(binop op) { switch(op) { @@ -10,18 +11,44 @@ std::string op_name(binop op) { throw 0; } +void print_indent(int n, std::ostream& to) { + while(n--) to << " "; +} + +void ast_int::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "INT: " << value << std::endl; +} + type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) const { return type_ptr(new type_base("Int")); } +void ast_lid::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "LID: " << id << std::endl; +} + type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) const { return env.lookup(id); } +void ast_uid::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "UID: " << id << std::endl; +} + type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) const { return env.lookup(id); } +void ast_binop::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "BINOP: " << op_name(op) << std::endl; + left->print(indent + 1, to); + right->print(indent + 1, to); +} + type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const { type_ptr ltype = left->typecheck(mgr, env); type_ptr rtype = right->typecheck(mgr, env); @@ -36,6 +63,13 @@ type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const { return return_type; } +void ast_app::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "APP:" << std::endl; + left->print(indent + 1, to); + right->print(indent + 1, to); +} + type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const { type_ptr ltype = left->typecheck(mgr, env); type_ptr rtype = right->typecheck(mgr, env); @@ -46,6 +80,17 @@ type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const { return return_type; } +void ast_case::print(int indent, std::ostream& to) const { + print_indent(indent, to); + to << "CASE: " << std::endl; + for(auto& branch : branches) { + print_indent(indent + 1, to); + branch->pat->print(to); + to << std::endl; + branch->expr->print(indent + 2, to); + } +} + type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const { type_ptr case_type = of->typecheck(mgr, env); type_ptr branch_type = mgr.new_type(); @@ -60,10 +105,21 @@ type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const { return branch_type; } +void pattern_var::print(std::ostream& to) const { + to << var; +} + void pattern_var::match(type_ptr t, type_mgr& mgr, type_env& env) const { env.bind(var, t); } +void pattern_constr::print(std::ostream& to) const { + to << constr; + for(auto& param : params) { + to << " " << param; + } +} + void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const { type_ptr constructor_type = env.lookup(constr); if(!constructor_type) throw 0; diff --git a/code/compiler/04/ast.hpp b/code/compiler/04/ast.hpp index c658be7..fcfed19 100644 --- a/code/compiler/04/ast.hpp +++ b/code/compiler/04/ast.hpp @@ -7,6 +7,7 @@ struct ast { virtual ~ast() = default; + virtual void print(int indent, std::ostream& to) const = 0; virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const = 0; }; @@ -15,6 +16,7 @@ using ast_ptr = std::unique_ptr; struct pattern { virtual ~pattern() = default; + virtual void print(std::ostream& to) const = 0; virtual void match(type_ptr t, type_mgr& mgr, type_env& env) const = 0; }; @@ -62,6 +64,7 @@ struct ast_int : public ast { explicit ast_int(int v) : value(v) {} + void print(int indent, std::ostream& to) const; type_ptr typecheck(type_mgr& mgr, const type_env& env) const; }; @@ -71,6 +74,7 @@ struct ast_lid : public ast { explicit ast_lid(std::string i) : id(std::move(i)) {} + void print(int indent, std::ostream& to) const; type_ptr typecheck(type_mgr& mgr, const type_env& env) const; }; @@ -80,6 +84,7 @@ struct ast_uid : public ast { explicit ast_uid(std::string i) : id(std::move(i)) {} + void print(int indent, std::ostream& to) const; type_ptr typecheck(type_mgr& mgr, const type_env& env) const; }; @@ -91,6 +96,7 @@ struct ast_binop : public ast { ast_binop(binop o, ast_ptr l, ast_ptr r) : op(o), left(std::move(l)), right(std::move(r)) {} + void print(int indent, std::ostream& to) const; type_ptr typecheck(type_mgr& mgr, const type_env& env) const; }; @@ -101,6 +107,7 @@ struct ast_app : public ast { ast_app(ast_ptr l, ast_ptr r) : left(std::move(l)), right(std::move(r)) {} + void print(int indent, std::ostream& to) const; type_ptr typecheck(type_mgr& mgr, const type_env& env) const; }; @@ -111,6 +118,7 @@ struct ast_case : public ast { ast_case(ast_ptr o, std::vector b) : of(std::move(o)), branches(std::move(b)) {} + void print(int indent, std::ostream& to) const; type_ptr typecheck(type_mgr& mgr, const type_env& env) const; }; @@ -120,6 +128,7 @@ struct pattern_var : public pattern { pattern_var(std::string v) : var(std::move(v)) {} + void print(std::ostream &to) const; void match(type_ptr t, type_mgr& mgr, type_env& env) const; }; @@ -130,6 +139,7 @@ struct pattern_constr : public pattern { pattern_constr(std::string c, std::vector p) : constr(std::move(c)), params(std::move(p)) {} + void print(std::ostream &to) const; void match(type_ptr t, type_mgr&, type_env& env) const; }; diff --git a/code/compiler/04/definition.cpp b/code/compiler/04/definition.cpp index 69b3196..f0889b6 100644 --- a/code/compiler/04/definition.cpp +++ b/code/compiler/04/definition.cpp @@ -34,8 +34,8 @@ void definition_data::typecheck_first(type_mgr& mgr, type_env& env) { for(auto& constructor : constructors) { type_ptr full_type = return_type; - for(auto& type_name : constructor->types) { - type_ptr type = type_ptr(new type_base(type_name)); + for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) { + type_ptr type = type_ptr(new type_base(*it)); full_type = type_ptr(new type_arr(type, full_type)); } diff --git a/code/compiler/04/main.cpp b/code/compiler/04/main.cpp index 43c1b4f..337bc7c 100644 --- a/code/compiler/04/main.cpp +++ b/code/compiler/04/main.cpp @@ -1,6 +1,7 @@ #include "ast.hpp" #include "parser.hpp" #include "type.hpp" +#include void yy::parser::error(const std::string& msg) { std::cout << "An error occured: " << msg << std::endl; @@ -29,11 +30,27 @@ void typecheck_program(const std::vector& prog) { for(auto& def : prog) { def->typecheck_second(mgr, env); } + + for(auto& pair : env.names) { + std::cout << pair.first << ": "; + pair.second->print(mgr, std::cout); + std::cout << std::endl; + } } int main() { yy::parser parser; parser.parse(); + for(auto& definition : program) { + definition_defn* def = dynamic_cast(definition.get()); + if(!def) continue; + + std::cout << def->name; + for(auto& param : def->params) std::cout << " " << param; + std::cout << ":" << std::endl; + + def->body->print(1, std::cout); + } typecheck_program(program); std::cout << program.size() << std::endl; } diff --git a/code/compiler/04/parser.y b/code/compiler/04/parser.y index fdb72c9..3874aca 100644 --- a/code/compiler/04/parser.y +++ b/code/compiler/04/parser.y @@ -107,7 +107,7 @@ case ; branches - : branches branch { $$ = std::move($1); $1.push_back(std::move($2)); } + : branches branch { $$ = std::move($1); $$.push_back(std::move($2)); } | branch { $$ = std::vector(); $$.push_back(std::move($1));} ; diff --git a/code/compiler/04/type.cpp b/code/compiler/04/type.cpp index f370a91..8a8e22d 100644 --- a/code/compiler/04/type.cpp +++ b/code/compiler/04/type.cpp @@ -2,6 +2,26 @@ #include #include +void type_var::print(const type_mgr& mgr, std::ostream& to) const { + auto it = mgr.types.find(name); + if(it != mgr.types.end()) { + it->second->print(mgr, to); + } else { + to << name; + } +} + +void type_base::print(const type_mgr& mgr, std::ostream& to) const { + to << name; +} + +void type_arr::print(const type_mgr& mgr, std::ostream& to) const { + left->print(mgr, to); + to << " -> ("; + right->print(mgr, to); + to << ")"; +} + std::string type_mgr::new_type_name() { int temp = last_id++; std::string str = ""; diff --git a/code/compiler/04/type.hpp b/code/compiler/04/type.hpp index e63040e..2774c29 100644 --- a/code/compiler/04/type.hpp +++ b/code/compiler/04/type.hpp @@ -2,8 +2,12 @@ #include #include +struct type_mgr; + struct type { virtual ~type() = default; + + virtual void print(const type_mgr& mgr, std::ostream& to) const = 0; }; using type_ptr = std::shared_ptr; @@ -13,6 +17,8 @@ struct type_var : public type { type_var(std::string n) : name(std::move(n)) {} + + void print(const type_mgr& mgr, std::ostream& to) const; }; struct type_base : public type { @@ -20,6 +26,8 @@ struct type_base : public type { type_base(std::string n) : name(std::move(n)) {} + + void print(const type_mgr& mgr, std::ostream& to) const; }; struct type_arr : public type { @@ -28,6 +36,8 @@ struct type_arr : public type { type_arr(type_ptr l, type_ptr r) : left(std::move(l)), right(std::move(r)) {} + + void print(const type_mgr& mgr, std::ostream& to) const; }; struct type_mgr { diff --git a/content/blog/04_compiler_improvements.md b/content/blog/04_compiler_improvements.md index 5c71a06..fe795d1 100644 --- a/content/blog/04_compiler_improvements.md +++ b/content/blog/04_compiler_improvements.md @@ -29,6 +29,103 @@ to only compile source files that have changed, and we want to have a standard definition of how to build our program. +### Printing Syntax Trees +Let's start by printing the trees we get from our parser. +This is long overdue - we had no way to verify the structure +of what our parser returned to us since Part 2. We'll print +the trees top-down, with the children of a node +indent one block further than the node itself. For this, +we'll make a new virtual function with the signature: +``` +virtual void print(int indent, std::ostream& to) const; +``` +We'll include a similar printing function into our +pattern struct, too: +``` +virtual void print(std::ostream& to) const; +``` + +Let's take a look at the implementation. For `ast_int`, +`ast_lid`, and `ast_uid`: +{{< codelines "C++" "compiler/04/ast.cpp" 18 21 >}} +{{< codelines "C++" "compiler/04/ast.cpp" 27 30 >}} +{{< codelines "C++" "compiler/04/ast.cpp" 36 39 >}} + +With `ast_binop` things get a bit more interesting. +We call `print` recursively on the children of the +`binop` node: +{{< codelines "C++" "compiler/04/ast.cpp" 45 50 >}} + +The same idea for `ast_app`: +{{< codelines "C++" "compiler/04/ast.cpp" 66 71 >}} + +Finally, just like `ast_case::typecheck` called +`pattern::match`, `ast_case::print` calls `pattern::print`: +{{< codelines "C++" "compiler/04/ast.cpp" 83 92 >}} + +We follow the same implementation strategy for patterns, +but we don't need indentation, or recursion: +{{< codelines "C++" "compiler/04/ast.cpp" 108 110 >}} +{{< codelines "C++" "compiler/04/ast.cpp" 116 121 >}} + +Let's print the bodies of each function we receive from the parser: +{{< codelines "C++" "compiler/04/main.cpp" 35 50 >}} + +### Printing Types +Types are another thing that we want to be able to inspect, so let's +add a similar print method to them: +``` +virtual void print(const type_mgr& mgr, std::ostream& to) const; +``` +We need the type manager so we can follow substitutions. +The implementation is simple enough: +{{< codelines "C++" "compiler/04/type.cpp" 5 24 >}} + +Let's also print out the types we infer. We'll make it a separate loop +in the `typecheck_program` function, because it's mostly just +for debugging purposes. + +### Fixing Bugs +We actually discover not one, but two bugs in our implementation thanks +to this output. Observe the output for `works3.txt`: +``` +length l: + CASE: + Nil + INT: 0 +*: Int -> (Int -> (Int)) ++: Int -> (Int -> (Int)) +-: Int -> (Int -> (Int)) +/: Int -> (Int -> (Int)) +Cons: List -> (Int -> (List)) +Nil: List +length: List -> (Int) +2 +``` + +First, we're missing the `Cons` branch. The culprit is `parser.y`, specifically +this line: +```C++ + : branches branch { $$ = std::move($1); $1.push_back(std::move($2)); } +``` +Notice that we move our list of branches out of `$1`. However, when we +`push_back`, we use `$1` again. That's wrong! We need to `push_back` +to `$$` instead: +{{< codelines "C++" "compiler/04/parser.y" 110 110 >}} + +Next, observe that `Cons` has type `List -> Int -> List`. That's not right, +since `Int` comes first in our definition. The culprit is this fragment of code: +```C++ + for(auto& type_name : constructor->types) { + type_ptr type = type_ptr(new type_base(type_name)); + full_type = type_ptr(new type_arr(type, full_type)); + } +``` +Remember how we build the function type backwards in Part 3? We have to do the same here. +We replace the fragment with the proper reverse iteration: +{{< codelines "C++" "compiler/04/definition.cpp" 37 40 >}} + + ### Setting up CMake This would be extremely easy if not for Flex and Bison. We start with the usual: {{< codelines "CMake" "compiler/04/CMakeLists.txt" 1 2 >}}