Add errors ection to Part 4 of compiler posts

This commit is contained in:
Danila Fedorin 2019-08-28 15:34:13 -07:00
parent df1101a14c
commit 05af1350c8
7 changed files with 148 additions and 32 deletions

View File

@ -16,6 +16,7 @@ add_executable(compiler
ast.cpp ast.hpp definition.cpp ast.cpp ast.hpp definition.cpp
env.cpp env.hpp env.cpp env.hpp
type.cpp type.hpp type.cpp type.hpp
error.cpp error.hpp
${BISON_parser_OUTPUTS} ${BISON_parser_OUTPUTS}
${FLEX_scanner_OUTPUTS} ${FLEX_scanner_OUTPUTS}
main.cpp main.cpp

View File

@ -1,5 +1,6 @@
#include "ast.hpp" #include "ast.hpp"
#include <ostream> #include <ostream>
#include "error.hpp"
std::string op_name(binop op) { std::string op_name(binop op) {
switch(op) { switch(op) {
@ -8,7 +9,7 @@ std::string op_name(binop op) {
case TIMES: return "*"; case TIMES: return "*";
case DIVIDE: return "/"; case DIVIDE: return "/";
} }
throw 0; return "??";
} }
void print_indent(int n, std::ostream& to) { void print_indent(int n, std::ostream& to) {
@ -53,7 +54,7 @@ type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr ltype = left->typecheck(mgr, env); type_ptr ltype = left->typecheck(mgr, env);
type_ptr rtype = right->typecheck(mgr, env); type_ptr rtype = right->typecheck(mgr, env);
type_ptr ftype = env.lookup(op_name(op)); type_ptr ftype = env.lookup(op_name(op));
if(!ftype) throw 0; if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
type_ptr return_type = mgr.new_type(); type_ptr return_type = mgr.new_type();
type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type)); type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type));
@ -92,9 +93,14 @@ void ast_case::print(int indent, std::ostream& to) const {
} }
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const { type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr case_type = of->typecheck(mgr, env); type_var* var;
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var);
type_ptr branch_type = mgr.new_type(); type_ptr branch_type = mgr.new_type();
if(!dynamic_cast<type_base*>(case_type.get())) {
throw type_error("attempting case analysis of non-data type");
}
for(auto& branch : branches) { for(auto& branch : branches) {
type_env new_env = env.scope(); type_env new_env = env.scope();
branch->pat->match(case_type, mgr, new_env); branch->pat->match(case_type, mgr, new_env);
@ -122,17 +128,17 @@ void pattern_constr::print(std::ostream& to) const {
void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const { void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const {
type_ptr constructor_type = env.lookup(constr); type_ptr constructor_type = env.lookup(constr);
if(!constructor_type) throw 0; if(!constructor_type) {
throw type_error(std::string("pattern using unknown constructor ") + constr);
}
for(int i = 0; i < params.size(); i++) { for(int i = 0; i < params.size(); i++) {
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get()); type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
if(!arr) throw 0; if(!arr) throw type_error("too many parameters in constructor pattern");
env.bind(params[i], arr->left); env.bind(params[i], arr->left);
constructor_type = arr->right; constructor_type = arr->right;
} }
mgr.unify(t, constructor_type); mgr.unify(t, constructor_type);
type_base* result_type = dynamic_cast<type_base*>(constructor_type.get());
if(!result_type) throw 0;
} }

View File

@ -0,0 +1,5 @@
#include "error.hpp"
const char* type_error::what() const noexcept {
return "an error occured while checking the types of the program";
}

View File

@ -0,0 +1,21 @@
#pragma once
#include <exception>
#include "type.hpp"
struct type_error : std::exception {
std::string description;
type_error(std::string d)
: description(std::move(d)) {}
const char* what() const noexcept override;
};
struct unification_error : public type_error {
type_ptr left;
type_ptr right;
unification_error(type_ptr l, type_ptr r)
: left(std::move(l)), right(std::move(r)),
type_error("failed to unify types") {}
};

View File

@ -1,7 +1,8 @@
#include "ast.hpp" #include "ast.hpp"
#include "parser.hpp"
#include "type.hpp"
#include <iostream> #include <iostream>
#include "parser.hpp"
#include "error.hpp"
#include "type.hpp"
void yy::parser::error(const std::string& msg) { void yy::parser::error(const std::string& msg) {
std::cout << "An error occured: " << msg << std::endl; std::cout << "An error occured: " << msg << std::endl;
@ -9,10 +10,9 @@ void yy::parser::error(const std::string& msg) {
extern std::vector<definition_ptr> program; extern std::vector<definition_ptr> program;
void typecheck_program(const std::vector<definition_ptr>& prog) { void typecheck_program(
type_mgr mgr; const std::vector<definition_ptr>& prog,
type_env env; type_mgr& mgr, type_env& env) {
type_ptr int_type = type_ptr(new type_base("Int")); type_ptr int_type = type_ptr(new type_base("Int"));
type_ptr binop_type = type_ptr(new type_arr( type_ptr binop_type = type_ptr(new type_arr(
int_type, int_type,
@ -40,6 +40,9 @@ void typecheck_program(const std::vector<definition_ptr>& prog) {
int main() { int main() {
yy::parser parser; yy::parser parser;
type_mgr mgr;
type_env env;
parser.parse(); parser.parse();
for(auto& definition : program) { for(auto& definition : program) {
definition_defn* def = dynamic_cast<definition_defn*>(definition.get()); definition_defn* def = dynamic_cast<definition_defn*>(definition.get());
@ -51,6 +54,17 @@ int main() {
def->body->print(1, std::cout); def->body->print(1, std::cout);
} }
typecheck_program(program); try {
std::cout << program.size() << std::endl; typecheck_program(program, mgr, env);
} catch(unification_error& err) {
std::cout << "failed to unify types: " << std::endl;
std::cout << " (1) \033[34m";
err.left->print(mgr, std::cout);
std::cout << "\033[0m" << std::endl;
std::cout << " (2) \033[32m";
err.right->print(mgr, std::cout);
std::cout << "\033[0m" << std::endl;
} catch(type_error& err) {
std::cout << "failed to type check program: " << err.description << std::endl;
}
} }

View File

@ -1,6 +1,7 @@
#include "type.hpp" #include "type.hpp"
#include <sstream> #include <sstream>
#include <algorithm> #include <algorithm>
#include "error.hpp"
void type_var::print(const type_mgr& mgr, std::ostream& to) const { void type_var::print(const type_mgr& mgr, std::ostream& to) const {
auto it = mgr.types.find(name); auto it = mgr.types.find(name);
@ -87,7 +88,7 @@ void type_mgr::unify(type_ptr l, type_ptr r) {
if(lid->name == rid->name) return; if(lid->name == rid->name) return;
} }
throw 0; throw unification_error(l, r);
} }
void type_mgr::bind(const std::string& s, type_ptr t) { void type_mgr::bind(const std::string& s, type_ptr t) {

View File

@ -47,29 +47,29 @@ virtual void print(std::ostream& to) const;
Let's take a look at the implementation. For `ast_int`, Let's take a look at the implementation. For `ast_int`,
`ast_lid`, and `ast_uid`: `ast_lid`, and `ast_uid`:
{{< codelines "C++" "compiler/04/ast.cpp" 18 21 >}} {{< codelines "C++" "compiler/04/ast.cpp" 19 22 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 27 30 >}} {{< codelines "C++" "compiler/04/ast.cpp" 28 31 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 36 39 >}} {{< codelines "C++" "compiler/04/ast.cpp" 37 40 >}}
With `ast_binop` things get a bit more interesting. With `ast_binop` things get a bit more interesting.
We call `print` recursively on the children of the We call `print` recursively on the children of the
`binop` node: `binop` node:
{{< codelines "C++" "compiler/04/ast.cpp" 45 50 >}} {{< codelines "C++" "compiler/04/ast.cpp" 46 51 >}}
The same idea for `ast_app`: The same idea for `ast_app`:
{{< codelines "C++" "compiler/04/ast.cpp" 66 71 >}} {{< codelines "C++" "compiler/04/ast.cpp" 67 72 >}}
Finally, just like `ast_case::typecheck` called Finally, just like `ast_case::typecheck` called
`pattern::match`, `ast_case::print` calls `pattern::print`: `pattern::match`, `ast_case::print` calls `pattern::print`:
{{< codelines "C++" "compiler/04/ast.cpp" 83 92 >}} {{< codelines "C++" "compiler/04/ast.cpp" 84 93 >}}
We follow the same implementation strategy for patterns, We follow the same implementation strategy for patterns,
but we don't need indentation, or recursion: but we don't need indentation, or recursion:
{{< codelines "C++" "compiler/04/ast.cpp" 108 110 >}} {{< codelines "C++" "compiler/04/ast.cpp" 114 116 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 116 121 >}} {{< codelines "C++" "compiler/04/ast.cpp" 122 127 >}}
Let's print the bodies of each function we receive from the parser: In `main`, let's print the bodies of each function we receive from the parser:
{{< codelines "C++" "compiler/04/main.cpp" 41 56 >}} {{< codelines "C++" "compiler/04/main.cpp" 47 56 >}}
### Printing Types ### Printing Types
Types are another thing that we want to be able to inspect, so let's Types are another thing that we want to be able to inspect, so let's
@ -79,7 +79,7 @@ virtual void print(const type_mgr& mgr, std::ostream& to) const;
``` ```
We need the type manager so we can follow substitutions. We need the type manager so we can follow substitutions.
The implementation is simple enough: The implementation is simple enough:
{{< codelines "C++" "compiler/04/type.cpp" 5 24 >}} {{< codelines "C++" "compiler/04/type.cpp" 6 24 >}}
Let's also print out the types we infer. We'll make it a separate loop Let's also print out the types we infer. We'll make it a separate loop
at the bottom of the `typecheck_program` function, because it's mostly just at the bottom of the `typecheck_program` function, because it's mostly just
@ -127,6 +127,72 @@ Remember how we build the function type backwards in Part 3? We have to do the s
We replace the fragment with the proper reverse iteration: We replace the fragment with the proper reverse iteration:
{{< codelines "C++" "compiler/04/definition.cpp" 37 40 >}} {{< codelines "C++" "compiler/04/definition.cpp" 37 40 >}}
### Throwing Exceptions
Throwing 0 is never a good idea. Such an exception doesn't contain any information
that we may find useful in debugging, nor any information that would benefit
the users of the compiler. Instead, let's define our own exception classes,
and throw them instead. We'll make two:
{{< codeblock "C++" "compiler/04/error.hpp" >}}
Only one function needs to be implemented, and it's pretty boring:
{{< codeblock "C++" "compiler/04/error.cpp" >}}
It's time to throw these instead of 0. Let's take a look at the places
we do so.
First, we throw 0 in `type.cpp`, in the `type_mgr::unify` method. This is
where our `unification_error` comes in. The error will
contain the two types that we failed to unify, which we will
later report to the user:
{{< codelines "C++" "compiler/04/type.cpp" 91 91 >}}
Next up, we have a few throws in `ast.cpp`. The first is in `op_string`, but
we will simply replace it with `return "??"`, which will be caught later on
(either way, the case expression falling through would be a compiler bug,
since the user has no way of providing an invalid binary operator). The
first throw we need to address is in `ast_binop::typecheck`, in the case
that we don't find a type for a binary operator. We report this
directly:
{{< codelines "C++" "compiler/04/ast.cpp" 57 57 >}}
We will introduce a new exception into `ast_case::typecheck`. Previously,
we simply pass the type of the expression to be case analyzed into
the pattern matching method. However, since we don't want
case analysis on functions, we ensure that the type of the expression
is `type_base`. If not, we report this:
{{< codelines "C++" "compiler/04/ast.cpp" 100 102 >}}
The next exception is in `pattern_constr::match`. It occurs
when the pattern has a constructor we don't recognize, and
that's exactly what we report:
{{< codelines "C++" "compiler/04/ast.cpp" 131 133 >}}
The next exception occurs in a loop, when we bind
types for each of the constructor pattern's variables.
We throw when we are unable to cast the remaining
constructor type to a `type_arr`. Conceptually,
this means that the pattern wants to apply the
constructor to more parameters than it actually
takes:
{{< codelines "C++" "compiler/04/ast.cpp" 137 137 >}}
We remove the last throw at the bottom of `pattern_constr::match`.
This is because once unification succeeds, we know
that the return type of the pattern is a base type since
we know the type of the case expression is a base type
(we know this because we added that check to `ast_case::typecheck`).
Finally, let's catch and report these exceptions. We could do it
in `typecheck_program`, but I think doing so in `main` is neater.
Since printing types requires a `type_mgr`, we'll move the
declarations of both `type_mgr` and `type_env` to the top of
main, and pass them to `typecheck_program` as parameters. Then,
we can surround the call to `typecheck_program` with
try/catch:
{{< codelines "C++" "compiler/04/main.cpp" 57 69 >}}
We use some [ANSI escape codes](https://en.wikipedia.org/wiki/ANSI_escape_code)
to color the types in the case of a unification error.
### Setting up CMake ### Setting up CMake
We will set up CMake as our build system. This would be extremely easy We will set up CMake as our build system. This would be extremely easy
@ -147,16 +213,16 @@ in order to compile. We add this dependency:
Finally, we add our source code to a CMake target. We use Finally, we add our source code to a CMake target. We use
the `BISON_parser_OUTPUTS` and `FLEX_scanner_OUTPUTS` to the `BISON_parser_OUTPUTS` and `FLEX_scanner_OUTPUTS` to
pass in the source files generated by Flex and Bison. pass in the source files generated by Flex and Bison.
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 15 22 >}} {{< codelines "CMake" "compiler/04/CMakeLists.txt" 15 23 >}}
Almost there! `parser.cpp` will be generated in the `build` directory Almost there! `parser.cpp` will be generated in the `build` directory
during an out-of-source build, and so will `parser.hpp`. When building, during an out-of-source build, and so will `parser.hpp`. When building,
`parser.cpp` will try to look for `ast.hpp`, and `main.cpp` will look for `parser.cpp` will try to look for `ast.hpp`, and `main.cpp` will look for
`parser.hpp`. We want them to be able to find each other, so we `parser.hpp`. We want them to be able to find each other, so we
add both the source directory and the build (binary) directory to add both the source directory and the build (binary) directory to
the list of includes directories: the list of include directories:
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 23 24 >}} {{< codelines "CMake" "compiler/04/CMakeLists.txt" 24 25 >}}
That's it for CMake! Let's try our build: That's it for CMake! Let's try our build:
``` ```
@ -164,5 +230,7 @@ cmake -S . -B build
cd build && make -j8 cd build && make -j8
``` ```
We get an executable called `compiler`. Excellent! Here's the whole file: ### Updated Code
{{< codeblock "CMake" "compiler/04/CMakeLists.txt" >}} We've made a lot of changes to the codebase, and I've only shown snippets of the code
so far. If you'de like to see the whole codebase, you can go to my site's git repository
and check out [the code so far](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/04).