Add errors ection to Part 4 of compiler posts
This commit is contained in:
parent
df1101a14c
commit
05af1350c8
|
@ -16,6 +16,7 @@ add_executable(compiler
|
|||
ast.cpp ast.hpp definition.cpp
|
||||
env.cpp env.hpp
|
||||
type.cpp type.hpp
|
||||
error.cpp error.hpp
|
||||
${BISON_parser_OUTPUTS}
|
||||
${FLEX_scanner_OUTPUTS}
|
||||
main.cpp
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#include "ast.hpp"
|
||||
#include <ostream>
|
||||
#include "error.hpp"
|
||||
|
||||
std::string op_name(binop op) {
|
||||
switch(op) {
|
||||
|
@ -8,7 +9,7 @@ std::string op_name(binop op) {
|
|||
case TIMES: return "*";
|
||||
case DIVIDE: return "/";
|
||||
}
|
||||
throw 0;
|
||||
return "??";
|
||||
}
|
||||
|
||||
void print_indent(int n, std::ostream& to) {
|
||||
|
@ -53,7 +54,7 @@ type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
|
|||
type_ptr ltype = left->typecheck(mgr, env);
|
||||
type_ptr rtype = right->typecheck(mgr, env);
|
||||
type_ptr ftype = env.lookup(op_name(op));
|
||||
if(!ftype) throw 0;
|
||||
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
|
||||
|
||||
type_ptr return_type = mgr.new_type();
|
||||
type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type));
|
||||
|
@ -92,9 +93,14 @@ void ast_case::print(int indent, std::ostream& to) const {
|
|||
}
|
||||
|
||||
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||
type_ptr case_type = of->typecheck(mgr, env);
|
||||
type_var* var;
|
||||
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var);
|
||||
type_ptr branch_type = mgr.new_type();
|
||||
|
||||
if(!dynamic_cast<type_base*>(case_type.get())) {
|
||||
throw type_error("attempting case analysis of non-data type");
|
||||
}
|
||||
|
||||
for(auto& branch : branches) {
|
||||
type_env new_env = env.scope();
|
||||
branch->pat->match(case_type, mgr, new_env);
|
||||
|
@ -122,17 +128,17 @@ void pattern_constr::print(std::ostream& to) const {
|
|||
|
||||
void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const {
|
||||
type_ptr constructor_type = env.lookup(constr);
|
||||
if(!constructor_type) throw 0;
|
||||
if(!constructor_type) {
|
||||
throw type_error(std::string("pattern using unknown constructor ") + constr);
|
||||
}
|
||||
|
||||
for(int i = 0; i < params.size(); i++) {
|
||||
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
|
||||
if(!arr) throw 0;
|
||||
if(!arr) throw type_error("too many parameters in constructor pattern");
|
||||
|
||||
env.bind(params[i], arr->left);
|
||||
constructor_type = arr->right;
|
||||
}
|
||||
|
||||
mgr.unify(t, constructor_type);
|
||||
type_base* result_type = dynamic_cast<type_base*>(constructor_type.get());
|
||||
if(!result_type) throw 0;
|
||||
}
|
||||
|
|
5
code/compiler/04/error.cpp
Normal file
5
code/compiler/04/error.cpp
Normal file
|
@ -0,0 +1,5 @@
|
|||
#include "error.hpp"
|
||||
|
||||
const char* type_error::what() const noexcept {
|
||||
return "an error occured while checking the types of the program";
|
||||
}
|
21
code/compiler/04/error.hpp
Normal file
21
code/compiler/04/error.hpp
Normal file
|
@ -0,0 +1,21 @@
|
|||
#pragma once
|
||||
#include <exception>
|
||||
#include "type.hpp"
|
||||
|
||||
struct type_error : std::exception {
|
||||
std::string description;
|
||||
|
||||
type_error(std::string d)
|
||||
: description(std::move(d)) {}
|
||||
|
||||
const char* what() const noexcept override;
|
||||
};
|
||||
|
||||
struct unification_error : public type_error {
|
||||
type_ptr left;
|
||||
type_ptr right;
|
||||
|
||||
unification_error(type_ptr l, type_ptr r)
|
||||
: left(std::move(l)), right(std::move(r)),
|
||||
type_error("failed to unify types") {}
|
||||
};
|
|
@ -1,7 +1,8 @@
|
|||
#include "ast.hpp"
|
||||
#include "parser.hpp"
|
||||
#include "type.hpp"
|
||||
#include <iostream>
|
||||
#include "parser.hpp"
|
||||
#include "error.hpp"
|
||||
#include "type.hpp"
|
||||
|
||||
void yy::parser::error(const std::string& msg) {
|
||||
std::cout << "An error occured: " << msg << std::endl;
|
||||
|
@ -9,10 +10,9 @@ void yy::parser::error(const std::string& msg) {
|
|||
|
||||
extern std::vector<definition_ptr> program;
|
||||
|
||||
void typecheck_program(const std::vector<definition_ptr>& prog) {
|
||||
type_mgr mgr;
|
||||
type_env env;
|
||||
|
||||
void typecheck_program(
|
||||
const std::vector<definition_ptr>& prog,
|
||||
type_mgr& mgr, type_env& env) {
|
||||
type_ptr int_type = type_ptr(new type_base("Int"));
|
||||
type_ptr binop_type = type_ptr(new type_arr(
|
||||
int_type,
|
||||
|
@ -40,6 +40,9 @@ void typecheck_program(const std::vector<definition_ptr>& prog) {
|
|||
|
||||
int main() {
|
||||
yy::parser parser;
|
||||
type_mgr mgr;
|
||||
type_env env;
|
||||
|
||||
parser.parse();
|
||||
for(auto& definition : program) {
|
||||
definition_defn* def = dynamic_cast<definition_defn*>(definition.get());
|
||||
|
@ -51,6 +54,17 @@ int main() {
|
|||
|
||||
def->body->print(1, std::cout);
|
||||
}
|
||||
typecheck_program(program);
|
||||
std::cout << program.size() << std::endl;
|
||||
try {
|
||||
typecheck_program(program, mgr, env);
|
||||
} catch(unification_error& err) {
|
||||
std::cout << "failed to unify types: " << std::endl;
|
||||
std::cout << " (1) \033[34m";
|
||||
err.left->print(mgr, std::cout);
|
||||
std::cout << "\033[0m" << std::endl;
|
||||
std::cout << " (2) \033[32m";
|
||||
err.right->print(mgr, std::cout);
|
||||
std::cout << "\033[0m" << std::endl;
|
||||
} catch(type_error& err) {
|
||||
std::cout << "failed to type check program: " << err.description << std::endl;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include "type.hpp"
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include "error.hpp"
|
||||
|
||||
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
auto it = mgr.types.find(name);
|
||||
|
@ -87,7 +88,7 @@ void type_mgr::unify(type_ptr l, type_ptr r) {
|
|||
if(lid->name == rid->name) return;
|
||||
}
|
||||
|
||||
throw 0;
|
||||
throw unification_error(l, r);
|
||||
}
|
||||
|
||||
void type_mgr::bind(const std::string& s, type_ptr t) {
|
||||
|
|
|
@ -47,29 +47,29 @@ virtual void print(std::ostream& to) const;
|
|||
|
||||
Let's take a look at the implementation. For `ast_int`,
|
||||
`ast_lid`, and `ast_uid`:
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 18 21 >}}
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 27 30 >}}
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 36 39 >}}
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 19 22 >}}
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 28 31 >}}
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 37 40 >}}
|
||||
|
||||
With `ast_binop` things get a bit more interesting.
|
||||
We call `print` recursively on the children of the
|
||||
`binop` node:
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 45 50 >}}
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 46 51 >}}
|
||||
|
||||
The same idea for `ast_app`:
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 66 71 >}}
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 67 72 >}}
|
||||
|
||||
Finally, just like `ast_case::typecheck` called
|
||||
`pattern::match`, `ast_case::print` calls `pattern::print`:
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 83 92 >}}
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 84 93 >}}
|
||||
|
||||
We follow the same implementation strategy for patterns,
|
||||
but we don't need indentation, or recursion:
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 108 110 >}}
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 116 121 >}}
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 114 116 >}}
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 122 127 >}}
|
||||
|
||||
Let's print the bodies of each function we receive from the parser:
|
||||
{{< codelines "C++" "compiler/04/main.cpp" 41 56 >}}
|
||||
In `main`, let's print the bodies of each function we receive from the parser:
|
||||
{{< codelines "C++" "compiler/04/main.cpp" 47 56 >}}
|
||||
|
||||
### Printing Types
|
||||
Types are another thing that we want to be able to inspect, so let's
|
||||
|
@ -79,7 +79,7 @@ virtual void print(const type_mgr& mgr, std::ostream& to) const;
|
|||
```
|
||||
We need the type manager so we can follow substitutions.
|
||||
The implementation is simple enough:
|
||||
{{< codelines "C++" "compiler/04/type.cpp" 5 24 >}}
|
||||
{{< codelines "C++" "compiler/04/type.cpp" 6 24 >}}
|
||||
|
||||
Let's also print out the types we infer. We'll make it a separate loop
|
||||
at the bottom of the `typecheck_program` function, because it's mostly just
|
||||
|
@ -127,6 +127,72 @@ Remember how we build the function type backwards in Part 3? We have to do the s
|
|||
We replace the fragment with the proper reverse iteration:
|
||||
{{< codelines "C++" "compiler/04/definition.cpp" 37 40 >}}
|
||||
|
||||
### Throwing Exceptions
|
||||
Throwing 0 is never a good idea. Such an exception doesn't contain any information
|
||||
that we may find useful in debugging, nor any information that would benefit
|
||||
the users of the compiler. Instead, let's define our own exception classes,
|
||||
and throw them instead. We'll make two:
|
||||
{{< codeblock "C++" "compiler/04/error.hpp" >}}
|
||||
|
||||
Only one function needs to be implemented, and it's pretty boring:
|
||||
{{< codeblock "C++" "compiler/04/error.cpp" >}}
|
||||
|
||||
It's time to throw these instead of 0. Let's take a look at the places
|
||||
we do so.
|
||||
|
||||
First, we throw 0 in `type.cpp`, in the `type_mgr::unify` method. This is
|
||||
where our `unification_error` comes in. The error will
|
||||
contain the two types that we failed to unify, which we will
|
||||
later report to the user:
|
||||
{{< codelines "C++" "compiler/04/type.cpp" 91 91 >}}
|
||||
|
||||
Next up, we have a few throws in `ast.cpp`. The first is in `op_string`, but
|
||||
we will simply replace it with `return "??"`, which will be caught later on
|
||||
(either way, the case expression falling through would be a compiler bug,
|
||||
since the user has no way of providing an invalid binary operator). The
|
||||
first throw we need to address is in `ast_binop::typecheck`, in the case
|
||||
that we don't find a type for a binary operator. We report this
|
||||
directly:
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 57 57 >}}
|
||||
|
||||
We will introduce a new exception into `ast_case::typecheck`. Previously,
|
||||
we simply pass the type of the expression to be case analyzed into
|
||||
the pattern matching method. However, since we don't want
|
||||
case analysis on functions, we ensure that the type of the expression
|
||||
is `type_base`. If not, we report this:
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 100 102 >}}
|
||||
|
||||
The next exception is in `pattern_constr::match`. It occurs
|
||||
when the pattern has a constructor we don't recognize, and
|
||||
that's exactly what we report:
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 131 133 >}}
|
||||
|
||||
The next exception occurs in a loop, when we bind
|
||||
types for each of the constructor pattern's variables.
|
||||
We throw when we are unable to cast the remaining
|
||||
constructor type to a `type_arr`. Conceptually,
|
||||
this means that the pattern wants to apply the
|
||||
constructor to more parameters than it actually
|
||||
takes:
|
||||
{{< codelines "C++" "compiler/04/ast.cpp" 137 137 >}}
|
||||
|
||||
We remove the last throw at the bottom of `pattern_constr::match`.
|
||||
This is because once unification succeeds, we know
|
||||
that the return type of the pattern is a base type since
|
||||
we know the type of the case expression is a base type
|
||||
(we know this because we added that check to `ast_case::typecheck`).
|
||||
|
||||
Finally, let's catch and report these exceptions. We could do it
|
||||
in `typecheck_program`, but I think doing so in `main` is neater.
|
||||
Since printing types requires a `type_mgr`, we'll move the
|
||||
declarations of both `type_mgr` and `type_env` to the top of
|
||||
main, and pass them to `typecheck_program` as parameters. Then,
|
||||
we can surround the call to `typecheck_program` with
|
||||
try/catch:
|
||||
{{< codelines "C++" "compiler/04/main.cpp" 57 69 >}}
|
||||
|
||||
We use some [ANSI escape codes](https://en.wikipedia.org/wiki/ANSI_escape_code)
|
||||
to color the types in the case of a unification error.
|
||||
|
||||
### Setting up CMake
|
||||
We will set up CMake as our build system. This would be extremely easy
|
||||
|
@ -147,16 +213,16 @@ in order to compile. We add this dependency:
|
|||
Finally, we add our source code to a CMake target. We use
|
||||
the `BISON_parser_OUTPUTS` and `FLEX_scanner_OUTPUTS` to
|
||||
pass in the source files generated by Flex and Bison.
|
||||
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 15 22 >}}
|
||||
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 15 23 >}}
|
||||
|
||||
Almost there! `parser.cpp` will be generated in the `build` directory
|
||||
during an out-of-source build, and so will `parser.hpp`. When building,
|
||||
`parser.cpp` will try to look for `ast.hpp`, and `main.cpp` will look for
|
||||
`parser.hpp`. We want them to be able to find each other, so we
|
||||
add both the source directory and the build (binary) directory to
|
||||
the list of includes directories:
|
||||
the list of include directories:
|
||||
|
||||
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 23 24 >}}
|
||||
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 24 25 >}}
|
||||
|
||||
That's it for CMake! Let's try our build:
|
||||
```
|
||||
|
@ -164,5 +230,7 @@ cmake -S . -B build
|
|||
cd build && make -j8
|
||||
```
|
||||
|
||||
We get an executable called `compiler`. Excellent! Here's the whole file:
|
||||
{{< codeblock "CMake" "compiler/04/CMakeLists.txt" >}}
|
||||
### Updated Code
|
||||
We've made a lot of changes to the codebase, and I've only shown snippets of the code
|
||||
so far. If you'de like to see the whole codebase, you can go to my site's git repository
|
||||
and check out [the code so far](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/04).
|
||||
|
|
Loading…
Reference in New Issue
Block a user