Add errors ection to Part 4 of compiler posts

This commit is contained in:
Danila Fedorin 2019-08-28 15:34:13 -07:00
parent df1101a14c
commit 05af1350c8
7 changed files with 148 additions and 32 deletions

View File

@ -16,6 +16,7 @@ add_executable(compiler
ast.cpp ast.hpp definition.cpp
env.cpp env.hpp
type.cpp type.hpp
error.cpp error.hpp
${BISON_parser_OUTPUTS}
${FLEX_scanner_OUTPUTS}
main.cpp

View File

@ -1,5 +1,6 @@
#include "ast.hpp"
#include <ostream>
#include "error.hpp"
std::string op_name(binop op) {
switch(op) {
@ -8,7 +9,7 @@ std::string op_name(binop op) {
case TIMES: return "*";
case DIVIDE: return "/";
}
throw 0;
return "??";
}
void print_indent(int n, std::ostream& to) {
@ -53,7 +54,7 @@ type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr ltype = left->typecheck(mgr, env);
type_ptr rtype = right->typecheck(mgr, env);
type_ptr ftype = env.lookup(op_name(op));
if(!ftype) throw 0;
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
type_ptr return_type = mgr.new_type();
type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type));
@ -92,9 +93,14 @@ void ast_case::print(int indent, std::ostream& to) const {
}
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr case_type = of->typecheck(mgr, env);
type_var* var;
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var);
type_ptr branch_type = mgr.new_type();
if(!dynamic_cast<type_base*>(case_type.get())) {
throw type_error("attempting case analysis of non-data type");
}
for(auto& branch : branches) {
type_env new_env = env.scope();
branch->pat->match(case_type, mgr, new_env);
@ -122,17 +128,17 @@ void pattern_constr::print(std::ostream& to) const {
void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const {
type_ptr constructor_type = env.lookup(constr);
if(!constructor_type) throw 0;
if(!constructor_type) {
throw type_error(std::string("pattern using unknown constructor ") + constr);
}
for(int i = 0; i < params.size(); i++) {
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
if(!arr) throw 0;
if(!arr) throw type_error("too many parameters in constructor pattern");
env.bind(params[i], arr->left);
constructor_type = arr->right;
}
mgr.unify(t, constructor_type);
type_base* result_type = dynamic_cast<type_base*>(constructor_type.get());
if(!result_type) throw 0;
}

View File

@ -0,0 +1,5 @@
#include "error.hpp"
const char* type_error::what() const noexcept {
return "an error occured while checking the types of the program";
}

View File

@ -0,0 +1,21 @@
#pragma once
#include <exception>
#include "type.hpp"
struct type_error : std::exception {
std::string description;
type_error(std::string d)
: description(std::move(d)) {}
const char* what() const noexcept override;
};
struct unification_error : public type_error {
type_ptr left;
type_ptr right;
unification_error(type_ptr l, type_ptr r)
: left(std::move(l)), right(std::move(r)),
type_error("failed to unify types") {}
};

View File

@ -1,7 +1,8 @@
#include "ast.hpp"
#include "parser.hpp"
#include "type.hpp"
#include <iostream>
#include "parser.hpp"
#include "error.hpp"
#include "type.hpp"
void yy::parser::error(const std::string& msg) {
std::cout << "An error occured: " << msg << std::endl;
@ -9,10 +10,9 @@ void yy::parser::error(const std::string& msg) {
extern std::vector<definition_ptr> program;
void typecheck_program(const std::vector<definition_ptr>& prog) {
type_mgr mgr;
type_env env;
void typecheck_program(
const std::vector<definition_ptr>& prog,
type_mgr& mgr, type_env& env) {
type_ptr int_type = type_ptr(new type_base("Int"));
type_ptr binop_type = type_ptr(new type_arr(
int_type,
@ -40,6 +40,9 @@ void typecheck_program(const std::vector<definition_ptr>& prog) {
int main() {
yy::parser parser;
type_mgr mgr;
type_env env;
parser.parse();
for(auto& definition : program) {
definition_defn* def = dynamic_cast<definition_defn*>(definition.get());
@ -51,6 +54,17 @@ int main() {
def->body->print(1, std::cout);
}
typecheck_program(program);
std::cout << program.size() << std::endl;
try {
typecheck_program(program, mgr, env);
} catch(unification_error& err) {
std::cout << "failed to unify types: " << std::endl;
std::cout << " (1) \033[34m";
err.left->print(mgr, std::cout);
std::cout << "\033[0m" << std::endl;
std::cout << " (2) \033[32m";
err.right->print(mgr, std::cout);
std::cout << "\033[0m" << std::endl;
} catch(type_error& err) {
std::cout << "failed to type check program: " << err.description << std::endl;
}
}

View File

@ -1,6 +1,7 @@
#include "type.hpp"
#include <sstream>
#include <algorithm>
#include "error.hpp"
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
auto it = mgr.types.find(name);
@ -87,7 +88,7 @@ void type_mgr::unify(type_ptr l, type_ptr r) {
if(lid->name == rid->name) return;
}
throw 0;
throw unification_error(l, r);
}
void type_mgr::bind(const std::string& s, type_ptr t) {

View File

@ -47,29 +47,29 @@ virtual void print(std::ostream& to) const;
Let's take a look at the implementation. For `ast_int`,
`ast_lid`, and `ast_uid`:
{{< codelines "C++" "compiler/04/ast.cpp" 18 21 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 27 30 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 36 39 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 19 22 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 28 31 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 37 40 >}}
With `ast_binop` things get a bit more interesting.
We call `print` recursively on the children of the
`binop` node:
{{< codelines "C++" "compiler/04/ast.cpp" 45 50 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 46 51 >}}
The same idea for `ast_app`:
{{< codelines "C++" "compiler/04/ast.cpp" 66 71 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 67 72 >}}
Finally, just like `ast_case::typecheck` called
`pattern::match`, `ast_case::print` calls `pattern::print`:
{{< codelines "C++" "compiler/04/ast.cpp" 83 92 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 84 93 >}}
We follow the same implementation strategy for patterns,
but we don't need indentation, or recursion:
{{< codelines "C++" "compiler/04/ast.cpp" 108 110 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 116 121 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 114 116 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 122 127 >}}
Let's print the bodies of each function we receive from the parser:
{{< codelines "C++" "compiler/04/main.cpp" 41 56 >}}
In `main`, let's print the bodies of each function we receive from the parser:
{{< codelines "C++" "compiler/04/main.cpp" 47 56 >}}
### Printing Types
Types are another thing that we want to be able to inspect, so let's
@ -79,7 +79,7 @@ virtual void print(const type_mgr& mgr, std::ostream& to) const;
```
We need the type manager so we can follow substitutions.
The implementation is simple enough:
{{< codelines "C++" "compiler/04/type.cpp" 5 24 >}}
{{< codelines "C++" "compiler/04/type.cpp" 6 24 >}}
Let's also print out the types we infer. We'll make it a separate loop
at the bottom of the `typecheck_program` function, because it's mostly just
@ -127,6 +127,72 @@ Remember how we build the function type backwards in Part 3? We have to do the s
We replace the fragment with the proper reverse iteration:
{{< codelines "C++" "compiler/04/definition.cpp" 37 40 >}}
### Throwing Exceptions
Throwing 0 is never a good idea. Such an exception doesn't contain any information
that we may find useful in debugging, nor any information that would benefit
the users of the compiler. Instead, let's define our own exception classes,
and throw them instead. We'll make two:
{{< codeblock "C++" "compiler/04/error.hpp" >}}
Only one function needs to be implemented, and it's pretty boring:
{{< codeblock "C++" "compiler/04/error.cpp" >}}
It's time to throw these instead of 0. Let's take a look at the places
we do so.
First, we throw 0 in `type.cpp`, in the `type_mgr::unify` method. This is
where our `unification_error` comes in. The error will
contain the two types that we failed to unify, which we will
later report to the user:
{{< codelines "C++" "compiler/04/type.cpp" 91 91 >}}
Next up, we have a few throws in `ast.cpp`. The first is in `op_string`, but
we will simply replace it with `return "??"`, which will be caught later on
(either way, the case expression falling through would be a compiler bug,
since the user has no way of providing an invalid binary operator). The
first throw we need to address is in `ast_binop::typecheck`, in the case
that we don't find a type for a binary operator. We report this
directly:
{{< codelines "C++" "compiler/04/ast.cpp" 57 57 >}}
We will introduce a new exception into `ast_case::typecheck`. Previously,
we simply pass the type of the expression to be case analyzed into
the pattern matching method. However, since we don't want
case analysis on functions, we ensure that the type of the expression
is `type_base`. If not, we report this:
{{< codelines "C++" "compiler/04/ast.cpp" 100 102 >}}
The next exception is in `pattern_constr::match`. It occurs
when the pattern has a constructor we don't recognize, and
that's exactly what we report:
{{< codelines "C++" "compiler/04/ast.cpp" 131 133 >}}
The next exception occurs in a loop, when we bind
types for each of the constructor pattern's variables.
We throw when we are unable to cast the remaining
constructor type to a `type_arr`. Conceptually,
this means that the pattern wants to apply the
constructor to more parameters than it actually
takes:
{{< codelines "C++" "compiler/04/ast.cpp" 137 137 >}}
We remove the last throw at the bottom of `pattern_constr::match`.
This is because once unification succeeds, we know
that the return type of the pattern is a base type since
we know the type of the case expression is a base type
(we know this because we added that check to `ast_case::typecheck`).
Finally, let's catch and report these exceptions. We could do it
in `typecheck_program`, but I think doing so in `main` is neater.
Since printing types requires a `type_mgr`, we'll move the
declarations of both `type_mgr` and `type_env` to the top of
main, and pass them to `typecheck_program` as parameters. Then,
we can surround the call to `typecheck_program` with
try/catch:
{{< codelines "C++" "compiler/04/main.cpp" 57 69 >}}
We use some [ANSI escape codes](https://en.wikipedia.org/wiki/ANSI_escape_code)
to color the types in the case of a unification error.
### Setting up CMake
We will set up CMake as our build system. This would be extremely easy
@ -147,16 +213,16 @@ in order to compile. We add this dependency:
Finally, we add our source code to a CMake target. We use
the `BISON_parser_OUTPUTS` and `FLEX_scanner_OUTPUTS` to
pass in the source files generated by Flex and Bison.
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 15 22 >}}
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 15 23 >}}
Almost there! `parser.cpp` will be generated in the `build` directory
during an out-of-source build, and so will `parser.hpp`. When building,
`parser.cpp` will try to look for `ast.hpp`, and `main.cpp` will look for
`parser.hpp`. We want them to be able to find each other, so we
add both the source directory and the build (binary) directory to
the list of includes directories:
the list of include directories:
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 23 24 >}}
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 24 25 >}}
That's it for CMake! Let's try our build:
```
@ -164,5 +230,7 @@ cmake -S . -B build
cd build && make -j8
```
We get an executable called `compiler`. Excellent! Here's the whole file:
{{< codeblock "CMake" "compiler/04/CMakeLists.txt" >}}
### Updated Code
We've made a lot of changes to the codebase, and I've only shown snippets of the code
so far. If you'de like to see the whole codebase, you can go to my site's git repository
and check out [the code so far](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/04).