Add errors ection to Part 4 of compiler posts
This commit is contained in:
parent
df1101a14c
commit
05af1350c8
|
@ -16,6 +16,7 @@ add_executable(compiler
|
||||||
ast.cpp ast.hpp definition.cpp
|
ast.cpp ast.hpp definition.cpp
|
||||||
env.cpp env.hpp
|
env.cpp env.hpp
|
||||||
type.cpp type.hpp
|
type.cpp type.hpp
|
||||||
|
error.cpp error.hpp
|
||||||
${BISON_parser_OUTPUTS}
|
${BISON_parser_OUTPUTS}
|
||||||
${FLEX_scanner_OUTPUTS}
|
${FLEX_scanner_OUTPUTS}
|
||||||
main.cpp
|
main.cpp
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#include "ast.hpp"
|
#include "ast.hpp"
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
std::string op_name(binop op) {
|
std::string op_name(binop op) {
|
||||||
switch(op) {
|
switch(op) {
|
||||||
|
@ -8,7 +9,7 @@ std::string op_name(binop op) {
|
||||||
case TIMES: return "*";
|
case TIMES: return "*";
|
||||||
case DIVIDE: return "/";
|
case DIVIDE: return "/";
|
||||||
}
|
}
|
||||||
throw 0;
|
return "??";
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_indent(int n, std::ostream& to) {
|
void print_indent(int n, std::ostream& to) {
|
||||||
|
@ -53,7 +54,7 @@ type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
type_ptr ltype = left->typecheck(mgr, env);
|
type_ptr ltype = left->typecheck(mgr, env);
|
||||||
type_ptr rtype = right->typecheck(mgr, env);
|
type_ptr rtype = right->typecheck(mgr, env);
|
||||||
type_ptr ftype = env.lookup(op_name(op));
|
type_ptr ftype = env.lookup(op_name(op));
|
||||||
if(!ftype) throw 0;
|
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
|
||||||
|
|
||||||
type_ptr return_type = mgr.new_type();
|
type_ptr return_type = mgr.new_type();
|
||||||
type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type));
|
type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type));
|
||||||
|
@ -92,9 +93,14 @@ void ast_case::print(int indent, std::ostream& to) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
|
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
type_ptr case_type = of->typecheck(mgr, env);
|
type_var* var;
|
||||||
|
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var);
|
||||||
type_ptr branch_type = mgr.new_type();
|
type_ptr branch_type = mgr.new_type();
|
||||||
|
|
||||||
|
if(!dynamic_cast<type_base*>(case_type.get())) {
|
||||||
|
throw type_error("attempting case analysis of non-data type");
|
||||||
|
}
|
||||||
|
|
||||||
for(auto& branch : branches) {
|
for(auto& branch : branches) {
|
||||||
type_env new_env = env.scope();
|
type_env new_env = env.scope();
|
||||||
branch->pat->match(case_type, mgr, new_env);
|
branch->pat->match(case_type, mgr, new_env);
|
||||||
|
@ -122,17 +128,17 @@ void pattern_constr::print(std::ostream& to) const {
|
||||||
|
|
||||||
void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const {
|
void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const {
|
||||||
type_ptr constructor_type = env.lookup(constr);
|
type_ptr constructor_type = env.lookup(constr);
|
||||||
if(!constructor_type) throw 0;
|
if(!constructor_type) {
|
||||||
|
throw type_error(std::string("pattern using unknown constructor ") + constr);
|
||||||
|
}
|
||||||
|
|
||||||
for(int i = 0; i < params.size(); i++) {
|
for(int i = 0; i < params.size(); i++) {
|
||||||
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
|
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
|
||||||
if(!arr) throw 0;
|
if(!arr) throw type_error("too many parameters in constructor pattern");
|
||||||
|
|
||||||
env.bind(params[i], arr->left);
|
env.bind(params[i], arr->left);
|
||||||
constructor_type = arr->right;
|
constructor_type = arr->right;
|
||||||
}
|
}
|
||||||
|
|
||||||
mgr.unify(t, constructor_type);
|
mgr.unify(t, constructor_type);
|
||||||
type_base* result_type = dynamic_cast<type_base*>(constructor_type.get());
|
|
||||||
if(!result_type) throw 0;
|
|
||||||
}
|
}
|
||||||
|
|
5
code/compiler/04/error.cpp
Normal file
5
code/compiler/04/error.cpp
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
const char* type_error::what() const noexcept {
|
||||||
|
return "an error occured while checking the types of the program";
|
||||||
|
}
|
21
code/compiler/04/error.hpp
Normal file
21
code/compiler/04/error.hpp
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
#pragma once
|
||||||
|
#include <exception>
|
||||||
|
#include "type.hpp"
|
||||||
|
|
||||||
|
struct type_error : std::exception {
|
||||||
|
std::string description;
|
||||||
|
|
||||||
|
type_error(std::string d)
|
||||||
|
: description(std::move(d)) {}
|
||||||
|
|
||||||
|
const char* what() const noexcept override;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct unification_error : public type_error {
|
||||||
|
type_ptr left;
|
||||||
|
type_ptr right;
|
||||||
|
|
||||||
|
unification_error(type_ptr l, type_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)),
|
||||||
|
type_error("failed to unify types") {}
|
||||||
|
};
|
|
@ -1,7 +1,8 @@
|
||||||
#include "ast.hpp"
|
#include "ast.hpp"
|
||||||
#include "parser.hpp"
|
|
||||||
#include "type.hpp"
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include "parser.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
#include "type.hpp"
|
||||||
|
|
||||||
void yy::parser::error(const std::string& msg) {
|
void yy::parser::error(const std::string& msg) {
|
||||||
std::cout << "An error occured: " << msg << std::endl;
|
std::cout << "An error occured: " << msg << std::endl;
|
||||||
|
@ -9,10 +10,9 @@ void yy::parser::error(const std::string& msg) {
|
||||||
|
|
||||||
extern std::vector<definition_ptr> program;
|
extern std::vector<definition_ptr> program;
|
||||||
|
|
||||||
void typecheck_program(const std::vector<definition_ptr>& prog) {
|
void typecheck_program(
|
||||||
type_mgr mgr;
|
const std::vector<definition_ptr>& prog,
|
||||||
type_env env;
|
type_mgr& mgr, type_env& env) {
|
||||||
|
|
||||||
type_ptr int_type = type_ptr(new type_base("Int"));
|
type_ptr int_type = type_ptr(new type_base("Int"));
|
||||||
type_ptr binop_type = type_ptr(new type_arr(
|
type_ptr binop_type = type_ptr(new type_arr(
|
||||||
int_type,
|
int_type,
|
||||||
|
@ -40,6 +40,9 @@ void typecheck_program(const std::vector<definition_ptr>& prog) {
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
yy::parser parser;
|
yy::parser parser;
|
||||||
|
type_mgr mgr;
|
||||||
|
type_env env;
|
||||||
|
|
||||||
parser.parse();
|
parser.parse();
|
||||||
for(auto& definition : program) {
|
for(auto& definition : program) {
|
||||||
definition_defn* def = dynamic_cast<definition_defn*>(definition.get());
|
definition_defn* def = dynamic_cast<definition_defn*>(definition.get());
|
||||||
|
@ -51,6 +54,17 @@ int main() {
|
||||||
|
|
||||||
def->body->print(1, std::cout);
|
def->body->print(1, std::cout);
|
||||||
}
|
}
|
||||||
typecheck_program(program);
|
try {
|
||||||
std::cout << program.size() << std::endl;
|
typecheck_program(program, mgr, env);
|
||||||
|
} catch(unification_error& err) {
|
||||||
|
std::cout << "failed to unify types: " << std::endl;
|
||||||
|
std::cout << " (1) \033[34m";
|
||||||
|
err.left->print(mgr, std::cout);
|
||||||
|
std::cout << "\033[0m" << std::endl;
|
||||||
|
std::cout << " (2) \033[32m";
|
||||||
|
err.right->print(mgr, std::cout);
|
||||||
|
std::cout << "\033[0m" << std::endl;
|
||||||
|
} catch(type_error& err) {
|
||||||
|
std::cout << "failed to type check program: " << err.description << std::endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#include "type.hpp"
|
#include "type.hpp"
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
auto it = mgr.types.find(name);
|
auto it = mgr.types.find(name);
|
||||||
|
@ -87,7 +88,7 @@ void type_mgr::unify(type_ptr l, type_ptr r) {
|
||||||
if(lid->name == rid->name) return;
|
if(lid->name == rid->name) return;
|
||||||
}
|
}
|
||||||
|
|
||||||
throw 0;
|
throw unification_error(l, r);
|
||||||
}
|
}
|
||||||
|
|
||||||
void type_mgr::bind(const std::string& s, type_ptr t) {
|
void type_mgr::bind(const std::string& s, type_ptr t) {
|
||||||
|
|
|
@ -47,29 +47,29 @@ virtual void print(std::ostream& to) const;
|
||||||
|
|
||||||
Let's take a look at the implementation. For `ast_int`,
|
Let's take a look at the implementation. For `ast_int`,
|
||||||
`ast_lid`, and `ast_uid`:
|
`ast_lid`, and `ast_uid`:
|
||||||
{{< codelines "C++" "compiler/04/ast.cpp" 18 21 >}}
|
{{< codelines "C++" "compiler/04/ast.cpp" 19 22 >}}
|
||||||
{{< codelines "C++" "compiler/04/ast.cpp" 27 30 >}}
|
{{< codelines "C++" "compiler/04/ast.cpp" 28 31 >}}
|
||||||
{{< codelines "C++" "compiler/04/ast.cpp" 36 39 >}}
|
{{< codelines "C++" "compiler/04/ast.cpp" 37 40 >}}
|
||||||
|
|
||||||
With `ast_binop` things get a bit more interesting.
|
With `ast_binop` things get a bit more interesting.
|
||||||
We call `print` recursively on the children of the
|
We call `print` recursively on the children of the
|
||||||
`binop` node:
|
`binop` node:
|
||||||
{{< codelines "C++" "compiler/04/ast.cpp" 45 50 >}}
|
{{< codelines "C++" "compiler/04/ast.cpp" 46 51 >}}
|
||||||
|
|
||||||
The same idea for `ast_app`:
|
The same idea for `ast_app`:
|
||||||
{{< codelines "C++" "compiler/04/ast.cpp" 66 71 >}}
|
{{< codelines "C++" "compiler/04/ast.cpp" 67 72 >}}
|
||||||
|
|
||||||
Finally, just like `ast_case::typecheck` called
|
Finally, just like `ast_case::typecheck` called
|
||||||
`pattern::match`, `ast_case::print` calls `pattern::print`:
|
`pattern::match`, `ast_case::print` calls `pattern::print`:
|
||||||
{{< codelines "C++" "compiler/04/ast.cpp" 83 92 >}}
|
{{< codelines "C++" "compiler/04/ast.cpp" 84 93 >}}
|
||||||
|
|
||||||
We follow the same implementation strategy for patterns,
|
We follow the same implementation strategy for patterns,
|
||||||
but we don't need indentation, or recursion:
|
but we don't need indentation, or recursion:
|
||||||
{{< codelines "C++" "compiler/04/ast.cpp" 108 110 >}}
|
{{< codelines "C++" "compiler/04/ast.cpp" 114 116 >}}
|
||||||
{{< codelines "C++" "compiler/04/ast.cpp" 116 121 >}}
|
{{< codelines "C++" "compiler/04/ast.cpp" 122 127 >}}
|
||||||
|
|
||||||
Let's print the bodies of each function we receive from the parser:
|
In `main`, let's print the bodies of each function we receive from the parser:
|
||||||
{{< codelines "C++" "compiler/04/main.cpp" 41 56 >}}
|
{{< codelines "C++" "compiler/04/main.cpp" 47 56 >}}
|
||||||
|
|
||||||
### Printing Types
|
### Printing Types
|
||||||
Types are another thing that we want to be able to inspect, so let's
|
Types are another thing that we want to be able to inspect, so let's
|
||||||
|
@ -79,7 +79,7 @@ virtual void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
```
|
```
|
||||||
We need the type manager so we can follow substitutions.
|
We need the type manager so we can follow substitutions.
|
||||||
The implementation is simple enough:
|
The implementation is simple enough:
|
||||||
{{< codelines "C++" "compiler/04/type.cpp" 5 24 >}}
|
{{< codelines "C++" "compiler/04/type.cpp" 6 24 >}}
|
||||||
|
|
||||||
Let's also print out the types we infer. We'll make it a separate loop
|
Let's also print out the types we infer. We'll make it a separate loop
|
||||||
at the bottom of the `typecheck_program` function, because it's mostly just
|
at the bottom of the `typecheck_program` function, because it's mostly just
|
||||||
|
@ -127,6 +127,72 @@ Remember how we build the function type backwards in Part 3? We have to do the s
|
||||||
We replace the fragment with the proper reverse iteration:
|
We replace the fragment with the proper reverse iteration:
|
||||||
{{< codelines "C++" "compiler/04/definition.cpp" 37 40 >}}
|
{{< codelines "C++" "compiler/04/definition.cpp" 37 40 >}}
|
||||||
|
|
||||||
|
### Throwing Exceptions
|
||||||
|
Throwing 0 is never a good idea. Such an exception doesn't contain any information
|
||||||
|
that we may find useful in debugging, nor any information that would benefit
|
||||||
|
the users of the compiler. Instead, let's define our own exception classes,
|
||||||
|
and throw them instead. We'll make two:
|
||||||
|
{{< codeblock "C++" "compiler/04/error.hpp" >}}
|
||||||
|
|
||||||
|
Only one function needs to be implemented, and it's pretty boring:
|
||||||
|
{{< codeblock "C++" "compiler/04/error.cpp" >}}
|
||||||
|
|
||||||
|
It's time to throw these instead of 0. Let's take a look at the places
|
||||||
|
we do so.
|
||||||
|
|
||||||
|
First, we throw 0 in `type.cpp`, in the `type_mgr::unify` method. This is
|
||||||
|
where our `unification_error` comes in. The error will
|
||||||
|
contain the two types that we failed to unify, which we will
|
||||||
|
later report to the user:
|
||||||
|
{{< codelines "C++" "compiler/04/type.cpp" 91 91 >}}
|
||||||
|
|
||||||
|
Next up, we have a few throws in `ast.cpp`. The first is in `op_string`, but
|
||||||
|
we will simply replace it with `return "??"`, which will be caught later on
|
||||||
|
(either way, the case expression falling through would be a compiler bug,
|
||||||
|
since the user has no way of providing an invalid binary operator). The
|
||||||
|
first throw we need to address is in `ast_binop::typecheck`, in the case
|
||||||
|
that we don't find a type for a binary operator. We report this
|
||||||
|
directly:
|
||||||
|
{{< codelines "C++" "compiler/04/ast.cpp" 57 57 >}}
|
||||||
|
|
||||||
|
We will introduce a new exception into `ast_case::typecheck`. Previously,
|
||||||
|
we simply pass the type of the expression to be case analyzed into
|
||||||
|
the pattern matching method. However, since we don't want
|
||||||
|
case analysis on functions, we ensure that the type of the expression
|
||||||
|
is `type_base`. If not, we report this:
|
||||||
|
{{< codelines "C++" "compiler/04/ast.cpp" 100 102 >}}
|
||||||
|
|
||||||
|
The next exception is in `pattern_constr::match`. It occurs
|
||||||
|
when the pattern has a constructor we don't recognize, and
|
||||||
|
that's exactly what we report:
|
||||||
|
{{< codelines "C++" "compiler/04/ast.cpp" 131 133 >}}
|
||||||
|
|
||||||
|
The next exception occurs in a loop, when we bind
|
||||||
|
types for each of the constructor pattern's variables.
|
||||||
|
We throw when we are unable to cast the remaining
|
||||||
|
constructor type to a `type_arr`. Conceptually,
|
||||||
|
this means that the pattern wants to apply the
|
||||||
|
constructor to more parameters than it actually
|
||||||
|
takes:
|
||||||
|
{{< codelines "C++" "compiler/04/ast.cpp" 137 137 >}}
|
||||||
|
|
||||||
|
We remove the last throw at the bottom of `pattern_constr::match`.
|
||||||
|
This is because once unification succeeds, we know
|
||||||
|
that the return type of the pattern is a base type since
|
||||||
|
we know the type of the case expression is a base type
|
||||||
|
(we know this because we added that check to `ast_case::typecheck`).
|
||||||
|
|
||||||
|
Finally, let's catch and report these exceptions. We could do it
|
||||||
|
in `typecheck_program`, but I think doing so in `main` is neater.
|
||||||
|
Since printing types requires a `type_mgr`, we'll move the
|
||||||
|
declarations of both `type_mgr` and `type_env` to the top of
|
||||||
|
main, and pass them to `typecheck_program` as parameters. Then,
|
||||||
|
we can surround the call to `typecheck_program` with
|
||||||
|
try/catch:
|
||||||
|
{{< codelines "C++" "compiler/04/main.cpp" 57 69 >}}
|
||||||
|
|
||||||
|
We use some [ANSI escape codes](https://en.wikipedia.org/wiki/ANSI_escape_code)
|
||||||
|
to color the types in the case of a unification error.
|
||||||
|
|
||||||
### Setting up CMake
|
### Setting up CMake
|
||||||
We will set up CMake as our build system. This would be extremely easy
|
We will set up CMake as our build system. This would be extremely easy
|
||||||
|
@ -147,16 +213,16 @@ in order to compile. We add this dependency:
|
||||||
Finally, we add our source code to a CMake target. We use
|
Finally, we add our source code to a CMake target. We use
|
||||||
the `BISON_parser_OUTPUTS` and `FLEX_scanner_OUTPUTS` to
|
the `BISON_parser_OUTPUTS` and `FLEX_scanner_OUTPUTS` to
|
||||||
pass in the source files generated by Flex and Bison.
|
pass in the source files generated by Flex and Bison.
|
||||||
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 15 22 >}}
|
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 15 23 >}}
|
||||||
|
|
||||||
Almost there! `parser.cpp` will be generated in the `build` directory
|
Almost there! `parser.cpp` will be generated in the `build` directory
|
||||||
during an out-of-source build, and so will `parser.hpp`. When building,
|
during an out-of-source build, and so will `parser.hpp`. When building,
|
||||||
`parser.cpp` will try to look for `ast.hpp`, and `main.cpp` will look for
|
`parser.cpp` will try to look for `ast.hpp`, and `main.cpp` will look for
|
||||||
`parser.hpp`. We want them to be able to find each other, so we
|
`parser.hpp`. We want them to be able to find each other, so we
|
||||||
add both the source directory and the build (binary) directory to
|
add both the source directory and the build (binary) directory to
|
||||||
the list of includes directories:
|
the list of include directories:
|
||||||
|
|
||||||
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 23 24 >}}
|
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 24 25 >}}
|
||||||
|
|
||||||
That's it for CMake! Let's try our build:
|
That's it for CMake! Let's try our build:
|
||||||
```
|
```
|
||||||
|
@ -164,5 +230,7 @@ cmake -S . -B build
|
||||||
cd build && make -j8
|
cd build && make -j8
|
||||||
```
|
```
|
||||||
|
|
||||||
We get an executable called `compiler`. Excellent! Here's the whole file:
|
### Updated Code
|
||||||
{{< codeblock "CMake" "compiler/04/CMakeLists.txt" >}}
|
We've made a lot of changes to the codebase, and I've only shown snippets of the code
|
||||||
|
so far. If you'de like to see the whole codebase, you can go to my site's git repository
|
||||||
|
and check out [the code so far](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/04).
|
||||||
|
|
Loading…
Reference in New Issue
Block a user