Compare commits
2 Commits
1f6b4bef74
...
6b8d3b0f8a
Author | SHA1 | Date | |
---|---|---|---|
6b8d3b0f8a | |||
725958137a |
|
@ -3,6 +3,7 @@
|
|||
#include <type_traits>
|
||||
#include "binop.hpp"
|
||||
#include "error.hpp"
|
||||
#include "type.hpp"
|
||||
#include "type_env.hpp"
|
||||
#include "env.hpp"
|
||||
|
||||
|
@ -234,13 +235,13 @@ struct case_mappings {
|
|||
|
||||
std::vector<instruction_ptr>& make_case_for(tag_type tag) {
|
||||
if(default_case)
|
||||
throw type_error("attempted pattern match after catch-all");
|
||||
throw compiler_error("attempted pattern match after catch-all");
|
||||
return defined_cases[tag];
|
||||
}
|
||||
|
||||
std::vector<instruction_ptr>& make_default_case() {
|
||||
if(default_case)
|
||||
throw type_error("attempted repeated use of catch-all");
|
||||
throw compiler_error("attempted repeated use of catch-all");
|
||||
default_case.emplace(std::vector<instruction_ptr>());
|
||||
return *default_case;
|
||||
}
|
||||
|
@ -275,14 +276,16 @@ struct case_strategy_bool {
|
|||
using tag_type = bool;
|
||||
using repr_type = bool;
|
||||
|
||||
case_strategy_bool(const type* type) {}
|
||||
|
||||
tag_type tag_from_repr(repr_type b) { return b; }
|
||||
|
||||
repr_type from_typed_pattern(const pattern_ptr& pt, const type* type) {
|
||||
repr_type repr_from_pattern(const pattern_ptr& pt) {
|
||||
pattern_constr* cpat;
|
||||
if(!(cpat = dynamic_cast<pattern_constr*>(pt.get())) ||
|
||||
(cpat->constr != "True" && cpat->constr != "False") ||
|
||||
cpat->params.size() != 0)
|
||||
throw type_error(
|
||||
throw compiler_error(
|
||||
"pattern cannot be converted to a boolean",
|
||||
pt->loc);
|
||||
return cpat->constr == "True";
|
||||
|
@ -297,12 +300,11 @@ struct case_strategy_bool {
|
|||
into.push_back(instruction_ptr(new instruction_slide(1)));
|
||||
}
|
||||
|
||||
size_t case_count(const type* type) {
|
||||
size_t case_count() {
|
||||
return 2;
|
||||
}
|
||||
|
||||
void into_instructions(
|
||||
const type* type,
|
||||
case_mappings<case_strategy_bool>& ms,
|
||||
std::vector<instruction_ptr>& into) {
|
||||
if(ms.defined_cases_count() == 0) {
|
||||
|
@ -321,16 +323,23 @@ struct case_strategy_data {
|
|||
using tag_type = int;
|
||||
using repr_type = std::pair<const type_data::constructor*, const std::vector<std::string>*>;
|
||||
|
||||
const type_data* arg_type;
|
||||
|
||||
case_strategy_data(const type* t) {
|
||||
arg_type = dynamic_cast<const type_data*>(t);
|
||||
assert(arg_type);
|
||||
}
|
||||
|
||||
tag_type tag_from_repr(const repr_type& repr) { return repr.first->tag; }
|
||||
|
||||
repr_type from_typed_pattern(const pattern_ptr& pt, const type* type) {
|
||||
repr_type repr_from_pattern(const pattern_ptr& pt) {
|
||||
pattern_constr* cpat;
|
||||
if(!(cpat = dynamic_cast<pattern_constr*>(pt.get())))
|
||||
throw type_error(
|
||||
throw compiler_error(
|
||||
"pattern cannot be interpreted as constructor.",
|
||||
pt->loc);
|
||||
return std::make_pair(
|
||||
&static_cast<const type_data*>(type)->constructors.find(cpat->constr)->second,
|
||||
&arg_type->constructors.find(cpat->constr)->second,
|
||||
&cpat->params);
|
||||
}
|
||||
|
||||
|
@ -349,19 +358,17 @@ struct case_strategy_data {
|
|||
into.push_back(instruction_ptr(new instruction_slide(repr.second->size())));
|
||||
}
|
||||
|
||||
size_t case_count(const type* type) {
|
||||
return static_cast<const type_data*>(type)->constructors.size();
|
||||
size_t case_count() {
|
||||
return arg_type->constructors.size();
|
||||
}
|
||||
|
||||
void into_instructions(
|
||||
const type* type,
|
||||
case_mappings<case_strategy_data>& ms,
|
||||
std::vector<instruction_ptr>& into) {
|
||||
instruction_jump* jump_instruction = new instruction_jump();
|
||||
instruction_ptr inst(jump_instruction);
|
||||
|
||||
auto data_type = static_cast<const type_data*>(type);
|
||||
for(auto& constr : data_type->constructors) {
|
||||
for(auto& constr : arg_type->constructors) {
|
||||
if(!ms.case_defined_for(constr.second.tag)) continue;
|
||||
jump_instruction->branches.push_back(
|
||||
std::move(ms.get_specific_case_for(constr.second.tag)));
|
||||
|
@ -372,7 +379,7 @@ struct case_strategy_data {
|
|||
if(ms.default_case_defined()) {
|
||||
jump_instruction->branches.push_back(
|
||||
std::move(ms.get_default_case()));
|
||||
for(auto& constr : data_type->constructors) {
|
||||
for(auto& constr : arg_type->constructors) {
|
||||
if(ms.case_defined_for(constr.second.tag)) continue;
|
||||
jump_instruction->tag_mappings[constr.second.tag] =
|
||||
jump_instruction->branches.size();
|
||||
|
@ -385,29 +392,29 @@ struct case_strategy_data {
|
|||
|
||||
template <typename T>
|
||||
void compile_case(const ast_case& node, const env_ptr& env, const type* type, std::vector<instruction_ptr>& into) {
|
||||
T strategy;
|
||||
T strategy(type);
|
||||
case_mappings<T> cases;
|
||||
for(auto& branch : node.branches) {
|
||||
pattern_var* vpat;
|
||||
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
|
||||
if(cases.defined_cases_count() == strategy.case_count(type))
|
||||
throw type_error("redundant catch-all pattern", branch->pat->loc);
|
||||
if(cases.defined_cases_count() == strategy.case_count())
|
||||
throw compiler_error("redundant catch-all pattern", branch->pat->loc);
|
||||
auto& branch_into = cases.make_default_case();
|
||||
env_ptr new_env(new env_var(vpat->var, env));
|
||||
branch->expr->compile(new_env, branch_into);
|
||||
branch_into.push_back(instruction_ptr(new instruction_slide(1)));
|
||||
} else {
|
||||
auto repr = strategy.from_typed_pattern(branch->pat, type);
|
||||
auto repr = strategy.repr_from_pattern(branch->pat);
|
||||
auto& branch_into = cases.make_case_for(strategy.tag_from_repr(repr));
|
||||
strategy.compile_branch(branch, env, repr, branch_into);
|
||||
}
|
||||
}
|
||||
|
||||
if(!(cases.defined_cases_count() == strategy.case_count(type) ||
|
||||
if(!(cases.defined_cases_count() == strategy.case_count() ||
|
||||
cases.default_case_defined()))
|
||||
throw type_error("incomplete patterns", node.loc);
|
||||
throw compiler_error("incomplete patterns", node.loc);
|
||||
|
||||
strategy.into_instructions(type, cases, into);
|
||||
strategy.into_instructions(cases, into);
|
||||
}
|
||||
|
||||
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
|
|
|
@ -64,9 +64,9 @@ void definition_data::insert_constructors() const {
|
|||
type_ptr return_type(return_app);
|
||||
for(auto& var : vars) {
|
||||
if(var_set.find(var) != var_set.end())
|
||||
throw std::runtime_error(
|
||||
throw compiler_error(
|
||||
std::string("type variable ") +
|
||||
var + std::string(" used twice in data type definition."));
|
||||
var + std::string(" used twice in data type definition."), loc);
|
||||
var_set.insert(var);
|
||||
return_app->arguments.push_back(type_ptr(new type_var(var)));
|
||||
}
|
||||
|
|
|
@ -1,19 +1,32 @@
|
|||
#include "error.hpp"
|
||||
|
||||
const char* compiler_error::what() const noexcept {
|
||||
return "an error occured while compiling the program";
|
||||
}
|
||||
|
||||
void compiler_error::print_about(std::ostream& to) {
|
||||
to << what() << ": ";
|
||||
to << description << std::endl;
|
||||
}
|
||||
|
||||
void compiler_error::print_location(std::ostream& to, parse_driver& drv, bool highlight) {
|
||||
if(!loc) return;
|
||||
to << "occuring on line " << loc->begin.line << ":" << std::endl;
|
||||
drv.print_location(to, *loc, highlight);
|
||||
}
|
||||
|
||||
void compiler_error::pretty_print(std::ostream& to, parse_driver& drv) {
|
||||
print_about(to);
|
||||
print_location(to, drv);
|
||||
}
|
||||
|
||||
const char* type_error::what() const noexcept {
|
||||
return "an error occured while checking the types of the program";
|
||||
}
|
||||
|
||||
void type_error::pretty_print(std::ostream& to, parse_driver& drv) {
|
||||
to << "encountered error while typechecking program: ";
|
||||
to << description << std::endl;
|
||||
|
||||
if(loc) {
|
||||
to << "occuring on line " << loc->begin.line << ":" << std::endl;
|
||||
to << std::endl << "```" << std::endl;
|
||||
drv.print_highlighted_location(to, *loc);
|
||||
to << "```" << std::endl << std::endl;
|
||||
}
|
||||
print_about(to);
|
||||
print_location(to, drv, true);
|
||||
}
|
||||
|
||||
void unification_error::pretty_print(std::ostream& to, parse_driver& drv, type_mgr& mgr) {
|
||||
|
|
|
@ -7,12 +7,26 @@
|
|||
|
||||
using maybe_location = std::optional<yy::location>;
|
||||
|
||||
struct type_error : std::exception {
|
||||
struct compiler_error : std::exception {
|
||||
std::string description;
|
||||
maybe_location loc;
|
||||
|
||||
compiler_error(std::string d, maybe_location l = std::nullopt)
|
||||
: description(std::move(d)), loc(std::move(l)) {}
|
||||
|
||||
const char* what() const noexcept override;
|
||||
|
||||
void print_about(std::ostream& to);
|
||||
void print_location(std::ostream& to, parse_driver& drv, bool highlight = false);
|
||||
|
||||
void pretty_print(std::ostream& to, parse_driver& drv);
|
||||
};
|
||||
|
||||
struct type_error : compiler_error {
|
||||
std::optional<yy::location> loc;
|
||||
|
||||
type_error(std::string d, maybe_location l = std::nullopt)
|
||||
: description(std::move(d)), loc(std::move(l)) {}
|
||||
: compiler_error(std::move(d), std::move(l)) {}
|
||||
|
||||
const char* what() const noexcept override;
|
||||
void pretty_print(std::ostream& to, parse_driver& drv);
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
void yy::parser::error(const yy::location& loc, const std::string& msg) {
|
||||
std::cout << "An error occured: " << msg << std::endl;
|
||||
std::cerr << "An error occured: " << msg << std::endl;
|
||||
}
|
||||
|
||||
void prelude_types(definition_group& defs, type_env_ptr env) {
|
||||
|
@ -110,12 +110,12 @@ void output_llvm(llvm_context& ctx, const std::string& filename) {
|
|||
std::error_code ec;
|
||||
llvm::raw_fd_ostream file(filename, ec, llvm::sys::fs::F_None);
|
||||
if (ec) {
|
||||
throw std::runtime_error("failed to open object file for writing");
|
||||
throw compiler_error("failed to open object file for writing");
|
||||
} else {
|
||||
llvm::CodeGenFileType type = llvm::CGFT_ObjectFile;
|
||||
llvm::legacy::PassManager pm;
|
||||
if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) {
|
||||
throw std::runtime_error("failed to add passes to pass manager");
|
||||
throw compiler_error("failed to add passes to pass manager");
|
||||
} else {
|
||||
pm.run(ctx.module);
|
||||
file.close();
|
||||
|
@ -177,10 +177,11 @@ void gen_llvm(global_scope& scope) {
|
|||
int main(int argc, char** argv) {
|
||||
if(argc != 2) {
|
||||
std::cerr << "please enter a file to compile." << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
parse_driver driver(argv[1]);
|
||||
if(!driver.run_parse()) {
|
||||
std::cerr << "failed to open file " << argv[1] << std::endl;
|
||||
std::cerr << "failed to parse file " << argv[1] << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
@ -207,7 +208,7 @@ int main(int argc, char** argv) {
|
|||
err.pretty_print(std::cerr, driver, mgr);
|
||||
} catch(type_error& err) {
|
||||
err.pretty_print(std::cerr, driver);
|
||||
} catch(std::runtime_error& err) {
|
||||
std::cerr << err.what() << std::endl;
|
||||
} catch (compiler_error& err) {
|
||||
err.pretty_print(std::cerr, driver);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,8 +5,6 @@
|
|||
bool parse_driver::run_parse() {
|
||||
FILE* stream = fopen(file_name.c_str(), "r");
|
||||
if(!stream) return false;
|
||||
string_stream = std::ostringstream();
|
||||
file_offset = 0;
|
||||
line_offsets.push_back(0);
|
||||
yyscan_t scanner;
|
||||
yylex_init(&scanner);
|
||||
|
@ -29,27 +27,27 @@ void parse_driver::mark_line() {
|
|||
}
|
||||
|
||||
size_t parse_driver::get_index(int line, int column) {
|
||||
assert(line > 0);
|
||||
assert(line <= line_offsets.size());
|
||||
size_t file_offset = line_offsets[line-1];
|
||||
file_offset += column - 1;
|
||||
return file_offset;
|
||||
assert(line > 0 && line <= line_offsets.size());
|
||||
return line_offsets[line-1] + column - 1;
|
||||
}
|
||||
|
||||
size_t parse_driver::get_line_end(int line) {
|
||||
if(line > line_offsets.size()) return file_contents.size();
|
||||
if(line == line_offsets.size()) return file_contents.size();
|
||||
return get_index(line+1, 1);
|
||||
}
|
||||
|
||||
void parse_driver::print_highlighted_location(std::ostream& stream, const yy::location& loc) {
|
||||
void parse_driver::print_location(
|
||||
std::ostream& stream,
|
||||
const yy::location& loc,
|
||||
bool highlight) {
|
||||
size_t print_start = get_index(loc.begin.line, 1);
|
||||
size_t highlight_start = get_index(loc.begin.line, loc.begin.column);
|
||||
size_t highlight_end = get_index(loc.end.line, loc.end.column);
|
||||
size_t print_end = get_line_end(loc.end.line);
|
||||
const char* content = file_contents.c_str();
|
||||
stream.write(content + print_start, highlight_start - print_start);
|
||||
stream << "\033[4;31m";
|
||||
if(highlight) stream << "\033[4;31m";
|
||||
stream.write(content + highlight_start, highlight_end - highlight_start);
|
||||
stream << "\033[0m";
|
||||
if(highlight) stream << "\033[0m";
|
||||
stream.write(content + highlight_end, print_end - highlight_end);
|
||||
}
|
||||
|
|
|
@ -14,13 +14,13 @@ void scanner_destroy(yyscan_t* scanner);
|
|||
struct parse_driver {
|
||||
std::string file_name;
|
||||
std::ostringstream string_stream;
|
||||
std::string file_contents;
|
||||
|
||||
yy::location location;
|
||||
size_t file_offset;
|
||||
|
||||
std::vector<size_t> line_offsets;
|
||||
|
||||
definition_group global_defs;
|
||||
std::string file_contents;
|
||||
|
||||
parse_driver(const std::string& file)
|
||||
: file_name(file), file_offset(0) {}
|
||||
|
@ -30,7 +30,10 @@ struct parse_driver {
|
|||
void mark_line();
|
||||
size_t get_index(int line, int column);
|
||||
size_t get_line_end(int line);
|
||||
void print_highlighted_location(std::ostream& stream, const yy::location& loc);
|
||||
void print_location(
|
||||
std::ostream& stream,
|
||||
const yy::location& loc,
|
||||
bool highlight = true);
|
||||
};
|
||||
|
||||
#define YY_DECL yy::parser::symbol_type yylex(yyscan_t yyscanner, parse_driver& drv)
|
||||
|
|
|
@ -62,7 +62,7 @@ automatically assemble the "from" and "to" locations of a nonterminal
|
|||
from the locations of children, which would be very tedious to write
|
||||
by hand. We enable this feature using the following option:
|
||||
|
||||
{{< codelines "text" "compiler/13/parser.y" 50 50 >}}
|
||||
{{< codelines "C++" "compiler/13/parser.y" 50 50 >}}
|
||||
|
||||
There's just one hitch, though. Sure, Bison can compute bigger
|
||||
locations from smaller ones, but it must get the smaller ones
|
||||
|
@ -143,6 +143,17 @@ from `ast_binop`:
|
|||
|
||||
{{< codelines "C++" "compiler/13/ast.hpp" 98 99 >}}
|
||||
|
||||
Finally, we tell Bison to pass the computed location
|
||||
data as an argument when constructing our data structures.
|
||||
This too is a mechanical change, and I think the following
|
||||
couple of lines demonstrate the general idea in sufficient
|
||||
detail:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parser.y" 107 110 >}}
|
||||
|
||||
Here, the `@$` character is used to reference the current
|
||||
nonterminal's location data.
|
||||
|
||||
#### Line Offsets, File Input, and the Parse Driver
|
||||
There are three more challenges with printing out the line
|
||||
of code where an error occurred. First of all, to
|
||||
|
@ -202,7 +213,8 @@ will also need some way of accessing the `yy::location` instance, and
|
|||
a way of storing our file input in memory. Fortunately, we're not
|
||||
the only ones to have ever come across the issue of creating non-global
|
||||
state: the Bison documentation has a
|
||||
[section in its C++ guide](https://www.gnu.org/software/bison/manual/html_node/Calc_002b_002b-Parsing-Driver.html) that describes a technique for manipulating
|
||||
[section in its C++ guide](https://www.gnu.org/software/bison/manual/html_node/Calc_002b_002b-Parsing-Driver.html)
|
||||
that describes a technique for manipulating
|
||||
state -- "parsing context", in their words. This technique involves the
|
||||
creation of a _parsing driver_.
|
||||
|
||||
|
@ -211,4 +223,352 @@ state. We can arrange for this class to be available to our tokenizing
|
|||
and parsing functions, which will allow us to use it pretty much like we'd
|
||||
use a global variable. We can define it as follows:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.hpp" 14 34 >}}
|
||||
{{< codelines "C++" "compiler/13/parse_driver.hpp" 14 37 >}}
|
||||
|
||||
There are quite a few fields here. The `file_name` string represents
|
||||
the file that we'll be reading code from. the `string_stream` will
|
||||
be used to back up the contents of source file as Flex reads them;
|
||||
once Flex is done, the content of the `string_stream` will be
|
||||
saved into the `file_content` string.
|
||||
|
||||
The next three fields deal with tracking source code
|
||||
locations. The `location` field will be accessed by Flex
|
||||
via `drv.location` (where `drv` is a reference to our driver class).
|
||||
The `file_offset` and `line_offsets` fields will be used to
|
||||
keep track of where each line begins, as we have discussed above.
|
||||
Finally, `global_defs` will be the new home of our top-level
|
||||
definitions.
|
||||
|
||||
The methods on `parse_driver` are rather simple, too:
|
||||
|
||||
* `run_parse` handles the initialization of the tokenizer
|
||||
and parser, which includes obtaining the `FILE*` and configuring
|
||||
Flex to use it. It also handles invoking the parsing code.
|
||||
We'll make this method return `true` if parsing succeeded,
|
||||
and `false` otherwise (if, say, the file we tried to read doesn't exist).
|
||||
* `write` will be called from Flex, and will allow us to
|
||||
record the content of the file we're processing to the `string_stream`.
|
||||
We've already seen it used in the `YY_USER_ACTION` macro.
|
||||
* `mark_line` will also be called from Flex, and will mark the current
|
||||
`file_offset` as the beginning of a line by pushing it into `line_offsets`.
|
||||
* `get_index` and `get_line_end` will be used for converting
|
||||
`yy::location` instances to offsets within the source code buffer.
|
||||
* `print_location` will be used for printing errors.
|
||||
It will print the lines spanned by the given location, with the
|
||||
location itself colored and underlined if the last argument is `true`.
|
||||
This will make our errors easier on the eyes.
|
||||
|
||||
Let's take a look at their implementations. First, `run_parse`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 5 18 >}}
|
||||
|
||||
We try open the user-specified file, and return `false` if we can't.
|
||||
We then initialize `line_offsets` as we discussed above. After
|
||||
this, we start doing the setup specific to a reentrant
|
||||
Flex scanner. We declare a `yyscan_t` variable, which
|
||||
will contain all of Flex's state. Then, we initialize
|
||||
it using `yylex_init`. Finally, since we can no longer
|
||||
touch the `yyin` global variable (it doesn't exist),
|
||||
we have to resort to using a setter function provided by Flex
|
||||
to configure the tokenizer's input stream.
|
||||
|
||||
Next, we construct our Bison-generated parser. Note that
|
||||
unlike before, we have to pass in two arguments:
|
||||
`scanner` and `*this`, the latter being of type `parse_driver&`.
|
||||
We'll come back to how this works in a moment. With
|
||||
the scanner and parser initialized, we invoke `parser::operator()`,
|
||||
which actually runs the Flex- and Bison-generated code.
|
||||
To clean up, we run `yylex_destroy` and `fclose`. Finally,
|
||||
we extract the contents of our file into the `file_contents`
|
||||
string, and return.
|
||||
|
||||
Next, the `write` method. For the most part, this method
|
||||
is a proxy for the `write` method of our `string_stream`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 20 23 >}}
|
||||
|
||||
We do, however, also keep track of the `file_offset` variable
|
||||
here, which ensures we have up-to-date information
|
||||
regarding our position in the source file. The implementation
|
||||
of `mark_line` uses this information:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 25 27 >}}
|
||||
|
||||
Once we have the line offsets, `get_index` becomes very simple:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 29 32 >}}
|
||||
|
||||
Here, we use an assertion for the first time. Calling
|
||||
`get_index` with a negative or zero line doesn't make
|
||||
any sense, since Bison starts tracking line numbers
|
||||
at 1. Similarly, asking for a line for which we don't
|
||||
have a recorded offset is invalid. Both
|
||||
of these nonsensical calls to `get_index` cannot
|
||||
be caused by the user under normal circumstances,
|
||||
and indicate the method's misuse by the author of
|
||||
the compiler (us!). Thus, we terminate the program.
|
||||
|
||||
Finally, the implementation of `line_end` just finds the
|
||||
beginning of the next line. We stick to the C convention
|
||||
of marking 'end' indices exclusive (pointing just past
|
||||
the end of the array):
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 34 37 >}}
|
||||
|
||||
Since `line_offsets` has as many elements as there are lines,
|
||||
the last line number would be equal to the vector's size.
|
||||
When looking up the end of the last line, we can't look for
|
||||
the beginning of the next line, so instead we return the end of the file.
|
||||
|
||||
Next, the `print_location` method prints three sections
|
||||
of the source file. These are the text "before" the error,
|
||||
the error itself, and, finally, the text "after" the error.
|
||||
For example, if an error began on the fifth column of the third
|
||||
line, and ended on the eighth column of the fourth line, the
|
||||
"before" section would include the first four columns of the third
|
||||
line, and the "after" section would be the ninth column onward
|
||||
on the fourth line. Before and after the error itself,
|
||||
if the `highlight` argument is true,
|
||||
we sprinkle the ANSI escape codes to enable and disable
|
||||
special formatting, respectively. For now, the special
|
||||
formatting involves underlining the text and making it red.
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 39 53 >}}
|
||||
|
||||
Finally, to get the forward declarations for the `yy*` functions
|
||||
and types, we set the `header-file` option in Flex:
|
||||
|
||||
{{< codelines "C++" "compiler/13/scanner.l" 3 3 >}}
|
||||
|
||||
We also include this `scanner.hpp` file in our `parse_driver.cpp`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 2 2 >}}
|
||||
|
||||
#### Adding the Driver to Flex and Bison
|
||||
Bison's C++ language template generates a class called
|
||||
`yy::parser`. We don't really want to modify this class
|
||||
in any way: not only is it generated code, but it's
|
||||
also rather complex. Instead, Bison provides us
|
||||
with a mechanism to pass more data in to the parser.
|
||||
This data is made available to all the actions
|
||||
that the parser runs. Better yet, Bison also attempts
|
||||
to pass this data on to the tokenizer, which in our
|
||||
case would mean that whatever data we provide Bison
|
||||
will also be available to Flex. This is how we'll
|
||||
allow the two components to access our new `parse_driver`
|
||||
class. This is also how we'll pass in the `yyscan_t`
|
||||
that Flex now needs to run its tokenizing code. To
|
||||
do all this, we use Bison's `%param` option. I'm
|
||||
going to include a few more lines from `parser.y`,
|
||||
since they contain the necessary `#include` directives
|
||||
and a required type definition:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parser.y" 1 18 >}}
|
||||
|
||||
The `%param` option effectively adds the parameter listed
|
||||
between the curly braces to the constructor of the generated
|
||||
`yy::parser`. We've already seen this in the implementation
|
||||
of our driver, where we passed `scanner` and `*this` as
|
||||
arguments when creating the parser. The parameters we declare are also passed to the
|
||||
`yylex` function, which is expected to accept them in the same order.
|
||||
|
||||
Since we're adding `parse_driver` as an argument we have to
|
||||
declare it. However, we can't include the `parse_driver` header
|
||||
right away because `parse_driver` itself includes the `parser` header:
|
||||
we'd end up with a circular dependency. Instead, we resort to
|
||||
forward-declaring the driver class, as well as the `yyscan_t`
|
||||
structure containing Flex's state.
|
||||
|
||||
Adding a parameter to Bison doesn't automatically affect
|
||||
Flex. To let Flex know that its `yylex` function must now accept
|
||||
the state and the parse driver, we have to define the
|
||||
`YY_DECL` macro. We do this in `parse_driver.hpp`, since
|
||||
this forward declaration will be used by both Flex
|
||||
and Bison:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.hpp" 39 41 >}}
|
||||
|
||||
Finally, we can change our `main.cpp` file to use the
|
||||
`parse_driver`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/main.cpp" 178 186 >}}
|
||||
|
||||
#### Improving Exceptions
|
||||
Now, it's time to add location data (and a little bit more) to our
|
||||
exceptions. We want to make it possible for exceptions to include
|
||||
data about where the error occurred, and to print this data to the user.
|
||||
However, it's also possible for us to have exceptions that simply
|
||||
do not have that location data. Furthermore, we want to know
|
||||
whether or not an exception has an associated location; we'd
|
||||
rather not print an invalid or "default" location when an error
|
||||
occurs.
|
||||
|
||||
In the old days of programming, we could represent the absence
|
||||
of location data with a `nullptr`, or `NULL`. But not only
|
||||
does this approach expose us to all kind of `NULl`-safety
|
||||
bugs, but it also requires heap allocation! This doesn't
|
||||
make it sound all that appealing; instead, I think we should
|
||||
opt for using `std::optional`.
|
||||
|
||||
Though `std::optional` is standard (as may be obvious from its
|
||||
namespace), it's a rather recent addition to the C++ STL.
|
||||
In order to gain access to it, we need to ensure that our
|
||||
project is compiled using C++17. To this end, we add
|
||||
the following two lines to our CMakeLists.txt:
|
||||
|
||||
{{< codelines "CMake" "compiler/13/CMakeLists.txt" 5 6 >}}
|
||||
|
||||
Now, let's add a new base class for all of our compiler errors,
|
||||
unsurprisingly called `compiler_error`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/error.hpp" 8 23 >}}
|
||||
|
||||
We'll put some 'common' exception functionality
|
||||
into the `print_location` and `print_about` methods. If the error
|
||||
has an associated location, the former method will print that
|
||||
location to the screen. We don't always want to highlight
|
||||
the part of the code that caused the error: for instance,
|
||||
an invalid data type definition may span several lines,
|
||||
and coloring that whole section of text red would be
|
||||
too much. To address this, we add the `highlight`
|
||||
boolean argument, which can be used to switch the
|
||||
colors on and off. The `print_about` method
|
||||
will simply print the `what()` message of the exception,
|
||||
in addition to the "specific" error that occurred (stored
|
||||
in `description`). Here are the implementations of the
|
||||
functions:
|
||||
|
||||
{{< codelines "C++" "compiler/13/error.cpp" 3 16 >}}
|
||||
|
||||
We will also add a `pretty_print` method to all of
|
||||
our exceptions. This method will handle
|
||||
all the exception-specific printing logic.
|
||||
For the generic compiler error, this means
|
||||
simply printing out the error text and the location:
|
||||
|
||||
{{< codelines "C++" "compiler/13/error.cpp" 18 21 >}}
|
||||
|
||||
For `type_error`, this logic slightly changes,
|
||||
enabling colors when printing the location:
|
||||
|
||||
{{< codelines "C++" "compiler/13/error.cpp" 27 30 >}}
|
||||
|
||||
Finally, for `unification_error`, we also include
|
||||
the code to print out the two types that our
|
||||
compiler could not unify:
|
||||
|
||||
{{< codelines "C++" "compiler/13/error.cpp" 32 41 >}}
|
||||
|
||||
There's a subtle change here. Compared to the previous
|
||||
type-printing code (which we had in `main`), what
|
||||
we wrote here deals with "expected" and "actual" types.
|
||||
The `left` type passed to the exception is printed
|
||||
first, and is treat like the "correct" type. The
|
||||
`right` type, on the other hand, is treated
|
||||
like the "wrong" type that should have been
|
||||
unifiable with `left`. This will affect the
|
||||
calling conventions of our unification code. In
|
||||
`main`, we remove all our old exception printing code
|
||||
in favor of calls to `pretty_print`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/main.cpp" 207 213 >}}
|
||||
|
||||
Now, we can go through and find all the places where
|
||||
we `throw 0`. One such place was in the data type
|
||||
definition code, where declaring the same type parameter
|
||||
twice is invalid. We replace the `0` with a
|
||||
`compiler_error`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/definition.cpp" 66 69 >}}
|
||||
|
||||
Not all `throw 0` statements should become exceptions.
|
||||
For example, here's code from the previous version of
|
||||
the compiler:
|
||||
|
||||
{{< codelines "C++" "compiler/12/definition.cpp" 123 127 >}}
|
||||
|
||||
If a definition `def_defn` has a dependency on a "nearby" (declared
|
||||
in the same group) definition called `dependency`, and if
|
||||
`dependency` does not exist within the same definition group,
|
||||
we throw an exception. But this error is impossible
|
||||
for a user to trigger: the only reason for a variable to appear
|
||||
in the `nearby_variables` vector is that it was previously
|
||||
found in the definition group. Here's the code that proves this
|
||||
(from the current version of the compiler):
|
||||
|
||||
{{< codelines "C++" "compiler/13/definition.cpp" 102 106 >}}
|
||||
|
||||
Not being able to find the variable in the definition group
|
||||
is a compiler bug, and should never occur. So, instead
|
||||
of throwing an exception, we'll use an assertion:
|
||||
|
||||
{{< codelines "C++" "compiler/13/definition.cpp" 128 128 >}}
|
||||
|
||||
For more complicated error messages, we can use a `stringstream`.
|
||||
Here's an example from `parsed_type`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parsed_type.cpp" 16 23 >}}
|
||||
|
||||
In general, this change is also rather mechanical, but, to
|
||||
maintain a balance between exceptions and assertions, here
|
||||
are a couple more assertions from `type_env`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/type_env.cpp" 77 78 >}}
|
||||
|
||||
Once again, it should not be possible for the compiler
|
||||
to try generalize the type of a variable that doesn't
|
||||
exist, and nor should generalization occur twice.
|
||||
|
||||
While we're on the topic of types, let's talk about
|
||||
`type_mgr::unify`. In practice, I suspect that a lot of
|
||||
errors in our compiler will originate from this method.
|
||||
However, at present, this method does not in any way
|
||||
track the locations of where a unification error occurred.
|
||||
To fix this, we add a new `loc` parameter to `unify`,
|
||||
which we make optional to allow for unification without
|
||||
a known location. Here's the declaration:
|
||||
|
||||
{{< codelines "C++" "compiler/13/type.hpp" 101 101 >}}
|
||||
|
||||
The change to the implementation is mechanical and repetitive,
|
||||
so instead of showing you the whole method, I'll settle for
|
||||
a couple of lines:
|
||||
|
||||
{{< codelines "C++" "compiler/13/type.cpp" 119 121 >}}
|
||||
|
||||
We want to make sure that a location provided to the
|
||||
top-level call to `unify` is also forwarded to the
|
||||
recursive calls, so we have to explicitly add it
|
||||
to the call.
|
||||
|
||||
With all of that done, we can finally stand back and
|
||||
marvel at the results of our hard work. Here is what a
|
||||
basic unification error looks like now:
|
||||
|
||||
{{< figure src="unification_error.png" caption="The result of a unification error." >}}
|
||||
|
||||
I used an image to show colors, but here is the content of the error in textual form:
|
||||
|
||||
```
|
||||
an error occured while checking the types of the program: failed to unify types
|
||||
occuring on line 2:
|
||||
3 + False
|
||||
the expected type was:
|
||||
!Int
|
||||
while the actual type was:
|
||||
!Bool
|
||||
```
|
||||
|
||||
The exclamation marks in front of the two types are due to some
|
||||
changes from section 2. Here's an error that was previously
|
||||
a `throw 0` statement in our code:
|
||||
|
||||
```
|
||||
an error occured while compiling the program: type variable a used twice in data type definition.
|
||||
occuring on line 1:
|
||||
data Pair a a = { MkPair a a }
|
||||
```
|
||||
|
||||
Now, not only have we eliminated the lazy uses of `throw 0` in our
|
||||
code, but we've also improved the presentation of the errors
|
||||
to the user!
|
||||
|
|
Loading…
Reference in New Issue
Block a user