diff --git a/code/compiler/04/CMakeLists.txt b/code/compiler/04/CMakeLists.txt index cfee544..9d2d571 100644 --- a/code/compiler/04/CMakeLists.txt +++ b/code/compiler/04/CMakeLists.txt @@ -16,6 +16,7 @@ add_executable(compiler ast.cpp ast.hpp definition.cpp env.cpp env.hpp type.cpp type.hpp + error.cpp error.hpp ${BISON_parser_OUTPUTS} ${FLEX_scanner_OUTPUTS} main.cpp diff --git a/code/compiler/04/ast.cpp b/code/compiler/04/ast.cpp index b9f920f..f8a88df 100644 --- a/code/compiler/04/ast.cpp +++ b/code/compiler/04/ast.cpp @@ -1,5 +1,6 @@ #include "ast.hpp" #include +#include "error.hpp" std::string op_name(binop op) { switch(op) { @@ -8,7 +9,7 @@ std::string op_name(binop op) { case TIMES: return "*"; case DIVIDE: return "/"; } - throw 0; + return "??"; } void print_indent(int n, std::ostream& to) { @@ -53,7 +54,7 @@ type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const { type_ptr ltype = left->typecheck(mgr, env); type_ptr rtype = right->typecheck(mgr, env); type_ptr ftype = env.lookup(op_name(op)); - if(!ftype) throw 0; + if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op)); type_ptr return_type = mgr.new_type(); type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type)); @@ -92,9 +93,14 @@ void ast_case::print(int indent, std::ostream& to) const { } type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const { - type_ptr case_type = of->typecheck(mgr, env); + type_var* var; + type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var); type_ptr branch_type = mgr.new_type(); + if(!dynamic_cast(case_type.get())) { + throw type_error("attempting case analysis of non-data type"); + } + for(auto& branch : branches) { type_env new_env = env.scope(); branch->pat->match(case_type, mgr, new_env); @@ -122,17 +128,17 @@ void pattern_constr::print(std::ostream& to) const { void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const { type_ptr constructor_type = env.lookup(constr); - if(!constructor_type) throw 0; + if(!constructor_type) { + throw type_error(std::string("pattern using unknown constructor ") + constr); + } for(int i = 0; i < params.size(); i++) { type_arr* arr = dynamic_cast(constructor_type.get()); - if(!arr) throw 0; + if(!arr) throw type_error("too many parameters in constructor pattern"); env.bind(params[i], arr->left); constructor_type = arr->right; } mgr.unify(t, constructor_type); - type_base* result_type = dynamic_cast(constructor_type.get()); - if(!result_type) throw 0; } diff --git a/code/compiler/04/error.cpp b/code/compiler/04/error.cpp new file mode 100644 index 0000000..f5125e3 --- /dev/null +++ b/code/compiler/04/error.cpp @@ -0,0 +1,5 @@ +#include "error.hpp" + +const char* type_error::what() const noexcept { + return "an error occured while checking the types of the program"; +} diff --git a/code/compiler/04/error.hpp b/code/compiler/04/error.hpp new file mode 100644 index 0000000..5bfbc7e --- /dev/null +++ b/code/compiler/04/error.hpp @@ -0,0 +1,21 @@ +#pragma once +#include +#include "type.hpp" + +struct type_error : std::exception { + std::string description; + + type_error(std::string d) + : description(std::move(d)) {} + + const char* what() const noexcept override; +}; + +struct unification_error : public type_error { + type_ptr left; + type_ptr right; + + unification_error(type_ptr l, type_ptr r) + : left(std::move(l)), right(std::move(r)), + type_error("failed to unify types") {} +}; diff --git a/code/compiler/04/main.cpp b/code/compiler/04/main.cpp index 337bc7c..60dd9c9 100644 --- a/code/compiler/04/main.cpp +++ b/code/compiler/04/main.cpp @@ -1,7 +1,8 @@ #include "ast.hpp" -#include "parser.hpp" -#include "type.hpp" #include +#include "parser.hpp" +#include "error.hpp" +#include "type.hpp" void yy::parser::error(const std::string& msg) { std::cout << "An error occured: " << msg << std::endl; @@ -9,10 +10,9 @@ void yy::parser::error(const std::string& msg) { extern std::vector program; -void typecheck_program(const std::vector& prog) { - type_mgr mgr; - type_env env; - +void typecheck_program( + const std::vector& prog, + type_mgr& mgr, type_env& env) { type_ptr int_type = type_ptr(new type_base("Int")); type_ptr binop_type = type_ptr(new type_arr( int_type, @@ -40,6 +40,9 @@ void typecheck_program(const std::vector& prog) { int main() { yy::parser parser; + type_mgr mgr; + type_env env; + parser.parse(); for(auto& definition : program) { definition_defn* def = dynamic_cast(definition.get()); @@ -51,6 +54,17 @@ int main() { def->body->print(1, std::cout); } - typecheck_program(program); - std::cout << program.size() << std::endl; + try { + typecheck_program(program, mgr, env); + } catch(unification_error& err) { + std::cout << "failed to unify types: " << std::endl; + std::cout << " (1) \033[34m"; + err.left->print(mgr, std::cout); + std::cout << "\033[0m" << std::endl; + std::cout << " (2) \033[32m"; + err.right->print(mgr, std::cout); + std::cout << "\033[0m" << std::endl; + } catch(type_error& err) { + std::cout << "failed to type check program: " << err.description << std::endl; + } } diff --git a/code/compiler/04/type.cpp b/code/compiler/04/type.cpp index 8a8e22d..0fc7364 100644 --- a/code/compiler/04/type.cpp +++ b/code/compiler/04/type.cpp @@ -1,6 +1,7 @@ #include "type.hpp" #include #include +#include "error.hpp" void type_var::print(const type_mgr& mgr, std::ostream& to) const { auto it = mgr.types.find(name); @@ -87,7 +88,7 @@ void type_mgr::unify(type_ptr l, type_ptr r) { if(lid->name == rid->name) return; } - throw 0; + throw unification_error(l, r); } void type_mgr::bind(const std::string& s, type_ptr t) { diff --git a/content/blog/04_compiler_improvements.md b/content/blog/04_compiler_improvements.md index 436dd79..c9e7b4a 100644 --- a/content/blog/04_compiler_improvements.md +++ b/content/blog/04_compiler_improvements.md @@ -47,29 +47,29 @@ virtual void print(std::ostream& to) const; Let's take a look at the implementation. For `ast_int`, `ast_lid`, and `ast_uid`: -{{< codelines "C++" "compiler/04/ast.cpp" 18 21 >}} -{{< codelines "C++" "compiler/04/ast.cpp" 27 30 >}} -{{< codelines "C++" "compiler/04/ast.cpp" 36 39 >}} +{{< codelines "C++" "compiler/04/ast.cpp" 19 22 >}} +{{< codelines "C++" "compiler/04/ast.cpp" 28 31 >}} +{{< codelines "C++" "compiler/04/ast.cpp" 37 40 >}} With `ast_binop` things get a bit more interesting. We call `print` recursively on the children of the `binop` node: -{{< codelines "C++" "compiler/04/ast.cpp" 45 50 >}} +{{< codelines "C++" "compiler/04/ast.cpp" 46 51 >}} The same idea for `ast_app`: -{{< codelines "C++" "compiler/04/ast.cpp" 66 71 >}} +{{< codelines "C++" "compiler/04/ast.cpp" 67 72 >}} Finally, just like `ast_case::typecheck` called `pattern::match`, `ast_case::print` calls `pattern::print`: -{{< codelines "C++" "compiler/04/ast.cpp" 83 92 >}} +{{< codelines "C++" "compiler/04/ast.cpp" 84 93 >}} We follow the same implementation strategy for patterns, but we don't need indentation, or recursion: -{{< codelines "C++" "compiler/04/ast.cpp" 108 110 >}} -{{< codelines "C++" "compiler/04/ast.cpp" 116 121 >}} +{{< codelines "C++" "compiler/04/ast.cpp" 114 116 >}} +{{< codelines "C++" "compiler/04/ast.cpp" 122 127 >}} -Let's print the bodies of each function we receive from the parser: -{{< codelines "C++" "compiler/04/main.cpp" 41 56 >}} +In `main`, let's print the bodies of each function we receive from the parser: +{{< codelines "C++" "compiler/04/main.cpp" 47 56 >}} ### Printing Types Types are another thing that we want to be able to inspect, so let's @@ -79,7 +79,7 @@ virtual void print(const type_mgr& mgr, std::ostream& to) const; ``` We need the type manager so we can follow substitutions. The implementation is simple enough: -{{< codelines "C++" "compiler/04/type.cpp" 5 24 >}} +{{< codelines "C++" "compiler/04/type.cpp" 6 24 >}} Let's also print out the types we infer. We'll make it a separate loop at the bottom of the `typecheck_program` function, because it's mostly just @@ -127,6 +127,72 @@ Remember how we build the function type backwards in Part 3? We have to do the s We replace the fragment with the proper reverse iteration: {{< codelines "C++" "compiler/04/definition.cpp" 37 40 >}} +### Throwing Exceptions +Throwing 0 is never a good idea. Such an exception doesn't contain any information +that we may find useful in debugging, nor any information that would benefit +the users of the compiler. Instead, let's define our own exception classes, +and throw them instead. We'll make two: +{{< codeblock "C++" "compiler/04/error.hpp" >}} + +Only one function needs to be implemented, and it's pretty boring: +{{< codeblock "C++" "compiler/04/error.cpp" >}} + +It's time to throw these instead of 0. Let's take a look at the places +we do so. + +First, we throw 0 in `type.cpp`, in the `type_mgr::unify` method. This is +where our `unification_error` comes in. The error will +contain the two types that we failed to unify, which we will +later report to the user: +{{< codelines "C++" "compiler/04/type.cpp" 91 91 >}} + +Next up, we have a few throws in `ast.cpp`. The first is in `op_string`, but +we will simply replace it with `return "??"`, which will be caught later on +(either way, the case expression falling through would be a compiler bug, +since the user has no way of providing an invalid binary operator). The +first throw we need to address is in `ast_binop::typecheck`, in the case +that we don't find a type for a binary operator. We report this +directly: +{{< codelines "C++" "compiler/04/ast.cpp" 57 57 >}} + +We will introduce a new exception into `ast_case::typecheck`. Previously, +we simply pass the type of the expression to be case analyzed into +the pattern matching method. However, since we don't want +case analysis on functions, we ensure that the type of the expression +is `type_base`. If not, we report this: +{{< codelines "C++" "compiler/04/ast.cpp" 100 102 >}} + +The next exception is in `pattern_constr::match`. It occurs +when the pattern has a constructor we don't recognize, and +that's exactly what we report: +{{< codelines "C++" "compiler/04/ast.cpp" 131 133 >}} + +The next exception occurs in a loop, when we bind +types for each of the constructor pattern's variables. +We throw when we are unable to cast the remaining +constructor type to a `type_arr`. Conceptually, +this means that the pattern wants to apply the +constructor to more parameters than it actually +takes: +{{< codelines "C++" "compiler/04/ast.cpp" 137 137 >}} + +We remove the last throw at the bottom of `pattern_constr::match`. +This is because once unification succeeds, we know +that the return type of the pattern is a base type since +we know the type of the case expression is a base type +(we know this because we added that check to `ast_case::typecheck`). + +Finally, let's catch and report these exceptions. We could do it +in `typecheck_program`, but I think doing so in `main` is neater. +Since printing types requires a `type_mgr`, we'll move the +declarations of both `type_mgr` and `type_env` to the top of +main, and pass them to `typecheck_program` as parameters. Then, +we can surround the call to `typecheck_program` with +try/catch: +{{< codelines "C++" "compiler/04/main.cpp" 57 69 >}} + +We use some [ANSI escape codes](https://en.wikipedia.org/wiki/ANSI_escape_code) +to color the types in the case of a unification error. ### Setting up CMake We will set up CMake as our build system. This would be extremely easy @@ -147,16 +213,16 @@ in order to compile. We add this dependency: Finally, we add our source code to a CMake target. We use the `BISON_parser_OUTPUTS` and `FLEX_scanner_OUTPUTS` to pass in the source files generated by Flex and Bison. -{{< codelines "CMake" "compiler/04/CMakeLists.txt" 15 22 >}} +{{< codelines "CMake" "compiler/04/CMakeLists.txt" 15 23 >}} Almost there! `parser.cpp` will be generated in the `build` directory during an out-of-source build, and so will `parser.hpp`. When building, `parser.cpp` will try to look for `ast.hpp`, and `main.cpp` will look for `parser.hpp`. We want them to be able to find each other, so we add both the source directory and the build (binary) directory to -the list of includes directories: +the list of include directories: -{{< codelines "CMake" "compiler/04/CMakeLists.txt" 23 24 >}} +{{< codelines "CMake" "compiler/04/CMakeLists.txt" 24 25 >}} That's it for CMake! Let's try our build: ``` @@ -164,5 +230,7 @@ cmake -S . -B build cd build && make -j8 ``` -We get an executable called `compiler`. Excellent! Here's the whole file: -{{< codeblock "CMake" "compiler/04/CMakeLists.txt" >}} +### Updated Code +We've made a lot of changes to the codebase, and I've only shown snippets of the code +so far. If you'de like to see the whole codebase, you can go to my site's git repository +and check out [the code so far](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/04).