Browse Source

Add output and fix two bugs.

sidenotes
Danila Fedorin 2 years ago
parent
commit
8ba501bd84
  1. 56
      code/compiler/04/ast.cpp
  2. 10
      code/compiler/04/ast.hpp
  3. 4
      code/compiler/04/definition.cpp
  4. 17
      code/compiler/04/main.cpp
  5. 2
      code/compiler/04/parser.y
  6. 20
      code/compiler/04/type.cpp
  7. 10
      code/compiler/04/type.hpp
  8. 97
      content/blog/04_compiler_improvements.md

56
code/compiler/04/ast.cpp

@ -1,4 +1,5 @@
#include "ast.hpp"
#include <ostream>
std::string op_name(binop op) {
switch(op) {
@ -10,18 +11,44 @@ std::string op_name(binop op) {
throw 0;
}
void print_indent(int n, std::ostream& to) {
while(n--) to << " ";
}
void ast_int::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "INT: " << value << std::endl;
}
type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) const {
return type_ptr(new type_base("Int"));
}
void ast_lid::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "LID: " << id << std::endl;
}
type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) const {
return env.lookup(id);
}
void ast_uid::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "UID: " << id << std::endl;
}
type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) const {
return env.lookup(id);
}
void ast_binop::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "BINOP: " << op_name(op) << std::endl;
left->print(indent + 1, to);
right->print(indent + 1, to);
}
type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr ltype = left->typecheck(mgr, env);
type_ptr rtype = right->typecheck(mgr, env);
@ -36,6 +63,13 @@ type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
return return_type;
}
void ast_app::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "APP:" << std::endl;
left->print(indent + 1, to);
right->print(indent + 1, to);
}
type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr ltype = left->typecheck(mgr, env);
type_ptr rtype = right->typecheck(mgr, env);
@ -46,6 +80,17 @@ type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
return return_type;
}
void ast_case::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "CASE: " << std::endl;
for(auto& branch : branches) {
print_indent(indent + 1, to);
branch->pat->print(to);
to << std::endl;
branch->expr->print(indent + 2, to);
}
}
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr case_type = of->typecheck(mgr, env);
type_ptr branch_type = mgr.new_type();
@ -60,10 +105,21 @@ type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
return branch_type;
}
void pattern_var::print(std::ostream& to) const {
to << var;
}
void pattern_var::match(type_ptr t, type_mgr& mgr, type_env& env) const {
env.bind(var, t);
}
void pattern_constr::print(std::ostream& to) const {
to << constr;
for(auto& param : params) {
to << " " << param;
}
}
void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const {
type_ptr constructor_type = env.lookup(constr);
if(!constructor_type) throw 0;

10
code/compiler/04/ast.hpp

@ -7,6 +7,7 @@
struct ast {
virtual ~ast() = default;
virtual void print(int indent, std::ostream& to) const = 0;
virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const = 0;
};
@ -15,6 +16,7 @@ using ast_ptr = std::unique_ptr<ast>;
struct pattern {
virtual ~pattern() = default;
virtual void print(std::ostream& to) const = 0;
virtual void match(type_ptr t, type_mgr& mgr, type_env& env) const = 0;
};
@ -62,6 +64,7 @@ struct ast_int : public ast {
explicit ast_int(int v)
: value(v) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
};
@ -71,6 +74,7 @@ struct ast_lid : public ast {
explicit ast_lid(std::string i)
: id(std::move(i)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
};
@ -80,6 +84,7 @@ struct ast_uid : public ast {
explicit ast_uid(std::string i)
: id(std::move(i)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
};
@ -91,6 +96,7 @@ struct ast_binop : public ast {
ast_binop(binop o, ast_ptr l, ast_ptr r)
: op(o), left(std::move(l)), right(std::move(r)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
};
@ -101,6 +107,7 @@ struct ast_app : public ast {
ast_app(ast_ptr l, ast_ptr r)
: left(std::move(l)), right(std::move(r)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
};
@ -111,6 +118,7 @@ struct ast_case : public ast {
ast_case(ast_ptr o, std::vector<branch_ptr> b)
: of(std::move(o)), branches(std::move(b)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
};
@ -120,6 +128,7 @@ struct pattern_var : public pattern {
pattern_var(std::string v)
: var(std::move(v)) {}
void print(std::ostream &to) const;
void match(type_ptr t, type_mgr& mgr, type_env& env) const;
};
@ -130,6 +139,7 @@ struct pattern_constr : public pattern {
pattern_constr(std::string c, std::vector<std::string> p)
: constr(std::move(c)), params(std::move(p)) {}
void print(std::ostream &to) const;
void match(type_ptr t, type_mgr&, type_env& env) const;
};

4
code/compiler/04/definition.cpp

@ -34,8 +34,8 @@ void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
for(auto& constructor : constructors) {
type_ptr full_type = return_type;
for(auto& type_name : constructor->types) {
type_ptr type = type_ptr(new type_base(type_name));
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
type_ptr type = type_ptr(new type_base(*it));
full_type = type_ptr(new type_arr(type, full_type));
}

17
code/compiler/04/main.cpp

@ -1,6 +1,7 @@
#include "ast.hpp"
#include "parser.hpp"
#include "type.hpp"
#include <iostream>
void yy::parser::error(const std::string& msg) {
std::cout << "An error occured: " << msg << std::endl;
@ -29,11 +30,27 @@ void typecheck_program(const std::vector<definition_ptr>& prog) {
for(auto& def : prog) {
def->typecheck_second(mgr, env);
}
for(auto& pair : env.names) {
std::cout << pair.first << ": ";
pair.second->print(mgr, std::cout);
std::cout << std::endl;
}
}
int main() {
yy::parser parser;
parser.parse();
for(auto& definition : program) {
definition_defn* def = dynamic_cast<definition_defn*>(definition.get());
if(!def) continue;
std::cout << def->name;
for(auto& param : def->params) std::cout << " " << param;
std::cout << ":" << std::endl;
def->body->print(1, std::cout);
}
typecheck_program(program);
std::cout << program.size() << std::endl;
}

2
code/compiler/04/parser.y

@ -107,7 +107,7 @@ case
;
branches
: branches branch { $$ = std::move($1); $1.push_back(std::move($2)); }
: branches branch { $$ = std::move($1); $$.push_back(std::move($2)); }
| branch { $$ = std::vector<branch_ptr>(); $$.push_back(std::move($1));}
;

20
code/compiler/04/type.cpp

@ -2,6 +2,26 @@
#include <sstream>
#include <algorithm>
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
auto it = mgr.types.find(name);
if(it != mgr.types.end()) {
it->second->print(mgr, to);
} else {
to << name;
}
}
void type_base::print(const type_mgr& mgr, std::ostream& to) const {
to << name;
}
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
left->print(mgr, to);
to << " -> (";
right->print(mgr, to);
to << ")";
}
std::string type_mgr::new_type_name() {
int temp = last_id++;
std::string str = "";

10
code/compiler/04/type.hpp

@ -2,8 +2,12 @@
#include <memory>
#include <map>
struct type_mgr;
struct type {
virtual ~type() = default;
virtual void print(const type_mgr& mgr, std::ostream& to) const = 0;
};
using type_ptr = std::shared_ptr<type>;
@ -13,6 +17,8 @@ struct type_var : public type {
type_var(std::string n)
: name(std::move(n)) {}
void print(const type_mgr& mgr, std::ostream& to) const;
};
struct type_base : public type {
@ -20,6 +26,8 @@ struct type_base : public type {
type_base(std::string n)
: name(std::move(n)) {}
void print(const type_mgr& mgr, std::ostream& to) const;
};
struct type_arr : public type {
@ -28,6 +36,8 @@ struct type_arr : public type {
type_arr(type_ptr l, type_ptr r)
: left(std::move(l)), right(std::move(r)) {}
void print(const type_mgr& mgr, std::ostream& to) const;
};
struct type_mgr {

97
content/blog/04_compiler_improvements.md

@ -29,6 +29,103 @@ to only compile source files that have changed,
and we want to have a standard definition of how to
build our program.
### Printing Syntax Trees
Let's start by printing the trees we get from our parser.
This is long overdue - we had no way to verify the structure
of what our parser returned to us since Part 2. We'll print
the trees top-down, with the children of a node
indent one block further than the node itself. For this,
we'll make a new virtual function with the signature:
```
virtual void print(int indent, std::ostream& to) const;
```
We'll include a similar printing function into our
pattern struct, too:
```
virtual void print(std::ostream& to) const;
```
Let's take a look at the implementation. For `ast_int`,
`ast_lid`, and `ast_uid`:
{{< codelines "C++" "compiler/04/ast.cpp" 18 21 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 27 30 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 36 39 >}}
With `ast_binop` things get a bit more interesting.
We call `print` recursively on the children of the
`binop` node:
{{< codelines "C++" "compiler/04/ast.cpp" 45 50 >}}
The same idea for `ast_app`:
{{< codelines "C++" "compiler/04/ast.cpp" 66 71 >}}
Finally, just like `ast_case::typecheck` called
`pattern::match`, `ast_case::print` calls `pattern::print`:
{{< codelines "C++" "compiler/04/ast.cpp" 83 92 >}}
We follow the same implementation strategy for patterns,
but we don't need indentation, or recursion:
{{< codelines "C++" "compiler/04/ast.cpp" 108 110 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 116 121 >}}
Let's print the bodies of each function we receive from the parser:
{{< codelines "C++" "compiler/04/main.cpp" 35 50 >}}
### Printing Types
Types are another thing that we want to be able to inspect, so let's
add a similar print method to them:
```
virtual void print(const type_mgr& mgr, std::ostream& to) const;
```
We need the type manager so we can follow substitutions.
The implementation is simple enough:
{{< codelines "C++" "compiler/04/type.cpp" 5 24 >}}
Let's also print out the types we infer. We'll make it a separate loop
in the `typecheck_program` function, because it's mostly just
for debugging purposes.
### Fixing Bugs
We actually discover not one, but two bugs in our implementation thanks
to this output. Observe the output for `works3.txt`:
```
length l:
CASE:
Nil
INT: 0
*: Int -> (Int -> (Int))
+: Int -> (Int -> (Int))
-: Int -> (Int -> (Int))
/: Int -> (Int -> (Int))
Cons: List -> (Int -> (List))
Nil: List
length: List -> (Int)
2
```
First, we're missing the `Cons` branch. The culprit is `parser.y`, specifically
this line:
```C++
: branches branch { $$ = std::move($1); $1.push_back(std::move($2)); }
```
Notice that we move our list of branches out of `$1`. However, when we
`push_back`, we use `$1` again. That's wrong! We need to `push_back`
to `$$` instead:
{{< codelines "C++" "compiler/04/parser.y" 110 110 >}}
Next, observe that `Cons` has type `List -> Int -> List`. That's not right,
since `Int` comes first in our definition. The culprit is this fragment of code:
```C++
for(auto& type_name : constructor->types) {
type_ptr type = type_ptr(new type_base(type_name));
full_type = type_ptr(new type_arr(type, full_type));
}
```
Remember how we build the function type backwards in Part 3? We have to do the same here.
We replace the fragment with the proper reverse iteration:
{{< codelines "C++" "compiler/04/definition.cpp" 37 40 >}}
### Setting up CMake
This would be extremely easy if not for Flex and Bison. We start with the usual:
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 1 2 >}}

Loading…
Cancel
Save