Refactor errors and update post draft.
This commit is contained in:
		
							parent
							
								
									725958137a
								
							
						
					
					
						commit
						6b8d3b0f8a
					
				| @ -235,13 +235,13 @@ struct case_mappings { | ||||
| 
 | ||||
|     std::vector<instruction_ptr>& make_case_for(tag_type tag) { | ||||
|         if(default_case) | ||||
|             throw type_error("attempted pattern match after catch-all"); | ||||
|             throw compiler_error("attempted pattern match after catch-all"); | ||||
|         return defined_cases[tag]; | ||||
|     } | ||||
| 
 | ||||
|     std::vector<instruction_ptr>& make_default_case() { | ||||
|         if(default_case) | ||||
|             throw type_error("attempted repeated use of catch-all"); | ||||
|             throw compiler_error("attempted repeated use of catch-all"); | ||||
|         default_case.emplace(std::vector<instruction_ptr>()); | ||||
|         return *default_case; | ||||
|     } | ||||
| @ -285,7 +285,7 @@ struct case_strategy_bool { | ||||
|         if(!(cpat = dynamic_cast<pattern_constr*>(pt.get())) || | ||||
|                 (cpat->constr != "True" && cpat->constr != "False") || | ||||
|                 cpat->params.size() != 0) | ||||
|             throw type_error( | ||||
|             throw compiler_error( | ||||
|                     "pattern cannot be converted to a boolean", | ||||
|                     pt->loc); | ||||
|         return cpat->constr == "True"; | ||||
| @ -335,7 +335,7 @@ struct case_strategy_data { | ||||
|     repr_type repr_from_pattern(const pattern_ptr& pt) { | ||||
|         pattern_constr* cpat; | ||||
|         if(!(cpat = dynamic_cast<pattern_constr*>(pt.get()))) | ||||
|             throw type_error( | ||||
|             throw compiler_error( | ||||
|                     "pattern cannot be interpreted as constructor.", | ||||
|                     pt->loc); | ||||
|         return std::make_pair( | ||||
| @ -398,7 +398,7 @@ void compile_case(const ast_case& node, const env_ptr& env, const type* type, st | ||||
|         pattern_var* vpat; | ||||
|         if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) { | ||||
|             if(cases.defined_cases_count() == strategy.case_count()) | ||||
|                 throw type_error("redundant catch-all pattern", branch->pat->loc); | ||||
|                 throw compiler_error("redundant catch-all pattern", branch->pat->loc); | ||||
|             auto& branch_into = cases.make_default_case(); | ||||
|             env_ptr new_env(new env_var(vpat->var, env)); | ||||
|             branch->expr->compile(new_env, branch_into); | ||||
| @ -412,7 +412,7 @@ void compile_case(const ast_case& node, const env_ptr& env, const type* type, st | ||||
| 
 | ||||
|     if(!(cases.defined_cases_count() == strategy.case_count() || | ||||
|                 cases.default_case_defined())) | ||||
|         throw type_error("incomplete patterns", node.loc); | ||||
|         throw compiler_error("incomplete patterns", node.loc); | ||||
| 
 | ||||
|     strategy.into_instructions(cases, into); | ||||
| } | ||||
|  | ||||
| @ -64,9 +64,9 @@ void definition_data::insert_constructors() const { | ||||
|     type_ptr return_type(return_app); | ||||
|     for(auto& var : vars) { | ||||
|         if(var_set.find(var) != var_set.end()) | ||||
|             throw std::runtime_error( | ||||
|             throw compiler_error( | ||||
|                     std::string("type variable ") + | ||||
|                     var + std::string(" used twice in data type definition.")); | ||||
|                     var + std::string(" used twice in data type definition."), loc); | ||||
|         var_set.insert(var); | ||||
|         return_app->arguments.push_back(type_ptr(new type_var(var))); | ||||
|     } | ||||
|  | ||||
| @ -1,19 +1,32 @@ | ||||
| #include "error.hpp" | ||||
| 
 | ||||
| const char* compiler_error::what() const noexcept { | ||||
|     return "an error occured while compiling the program"; | ||||
| } | ||||
| 
 | ||||
| void compiler_error::print_about(std::ostream& to) { | ||||
|     to << what() << ": "; | ||||
|     to << description << std::endl; | ||||
| } | ||||
| 
 | ||||
| void compiler_error::print_location(std::ostream& to, parse_driver& drv, bool highlight) { | ||||
|     if(!loc) return; | ||||
|     to << "occuring on line " << loc->begin.line << ":" << std::endl; | ||||
|     drv.print_location(to, *loc, highlight); | ||||
| } | ||||
| 
 | ||||
| void compiler_error::pretty_print(std::ostream& to, parse_driver& drv) { | ||||
|     print_about(to); | ||||
|     print_location(to, drv); | ||||
| } | ||||
| 
 | ||||
| const char* type_error::what() const noexcept { | ||||
|     return "an error occured while checking the types of the program"; | ||||
| } | ||||
| 
 | ||||
| void type_error::pretty_print(std::ostream& to, parse_driver& drv) { | ||||
|     to << "encountered error while typechecking program: "; | ||||
|     to << description << std::endl; | ||||
| 
 | ||||
|     if(loc) { | ||||
|         to << "occuring on line " << loc->begin.line << ":" << std::endl; | ||||
|         to << std::endl << "```" << std::endl; | ||||
|         drv.print_highlighted_location(to, *loc); | ||||
|         to << "```" << std::endl << std::endl; | ||||
|     } | ||||
|     print_about(to); | ||||
|     print_location(to, drv, true); | ||||
| } | ||||
| 
 | ||||
| void unification_error::pretty_print(std::ostream& to, parse_driver& drv, type_mgr& mgr) { | ||||
|  | ||||
| @ -7,12 +7,26 @@ | ||||
| 
 | ||||
| using maybe_location = std::optional<yy::location>; | ||||
| 
 | ||||
| struct type_error : std::exception { | ||||
| struct compiler_error : std::exception { | ||||
|     std::string description; | ||||
|     maybe_location loc; | ||||
| 
 | ||||
|     compiler_error(std::string d, maybe_location l = std::nullopt) | ||||
|         : description(std::move(d)), loc(std::move(l)) {} | ||||
| 
 | ||||
|     const char* what() const noexcept override; | ||||
| 
 | ||||
|     void print_about(std::ostream& to); | ||||
|     void print_location(std::ostream& to, parse_driver& drv, bool highlight = false); | ||||
| 
 | ||||
|     void pretty_print(std::ostream& to, parse_driver& drv); | ||||
| }; | ||||
| 
 | ||||
| struct type_error : compiler_error { | ||||
|     std::optional<yy::location> loc; | ||||
| 
 | ||||
|     type_error(std::string d, maybe_location l = std::nullopt) | ||||
|         : description(std::move(d)), loc(std::move(l)) {} | ||||
|         : compiler_error(std::move(d), std::move(l)) {} | ||||
| 
 | ||||
|     const char* what() const noexcept override; | ||||
|     void pretty_print(std::ostream& to, parse_driver& drv); | ||||
|  | ||||
| @ -20,7 +20,7 @@ | ||||
| #include "llvm/Target/TargetMachine.h" | ||||
| 
 | ||||
| void yy::parser::error(const yy::location& loc, const std::string& msg) { | ||||
|     std::cout << "An error occured: " << msg << std::endl; | ||||
|     std::cerr << "An error occured: " << msg << std::endl; | ||||
| } | ||||
| 
 | ||||
| void prelude_types(definition_group& defs, type_env_ptr env) { | ||||
| @ -110,12 +110,12 @@ void output_llvm(llvm_context& ctx, const std::string& filename) { | ||||
|         std::error_code ec; | ||||
|         llvm::raw_fd_ostream file(filename, ec, llvm::sys::fs::F_None); | ||||
|         if (ec) { | ||||
|             throw std::runtime_error("failed to open object file for writing"); | ||||
|             throw compiler_error("failed to open object file for writing"); | ||||
|         } else { | ||||
|             llvm::CodeGenFileType type = llvm::CGFT_ObjectFile; | ||||
|             llvm::legacy::PassManager pm; | ||||
|             if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) { | ||||
|                 throw std::runtime_error("failed to add passes to pass manager"); | ||||
|                 throw compiler_error("failed to add passes to pass manager"); | ||||
|             } else { | ||||
|                 pm.run(ctx.module); | ||||
|                 file.close(); | ||||
| @ -177,10 +177,11 @@ void gen_llvm(global_scope& scope) { | ||||
| int main(int argc, char** argv) { | ||||
|     if(argc != 2) { | ||||
|         std::cerr << "please enter a file to compile." << std::endl; | ||||
|         exit(1); | ||||
|     } | ||||
|     parse_driver driver(argv[1]); | ||||
|     if(!driver.run_parse()) { | ||||
|         std::cerr << "failed to open file " << argv[1] << std::endl; | ||||
|         std::cerr << "failed to parse file " << argv[1] << std::endl; | ||||
|         exit(1); | ||||
|     } | ||||
| 
 | ||||
| @ -207,7 +208,7 @@ int main(int argc, char** argv) { | ||||
|         err.pretty_print(std::cerr, driver, mgr); | ||||
|     } catch(type_error& err) { | ||||
|         err.pretty_print(std::cerr, driver); | ||||
|     } catch(std::runtime_error& err) { | ||||
|         std::cerr << err.what() << std::endl; | ||||
|     } catch (compiler_error& err) { | ||||
|         err.pretty_print(std::cerr, driver); | ||||
|     } | ||||
| } | ||||
|  | ||||
| @ -5,8 +5,6 @@ | ||||
| bool parse_driver::run_parse() { | ||||
|     FILE* stream = fopen(file_name.c_str(), "r"); | ||||
|     if(!stream) return false; | ||||
|     string_stream = std::ostringstream(); | ||||
|     file_offset = 0; | ||||
|     line_offsets.push_back(0); | ||||
|     yyscan_t scanner; | ||||
|     yylex_init(&scanner); | ||||
| @ -29,27 +27,27 @@ void parse_driver::mark_line() { | ||||
| } | ||||
| 
 | ||||
| size_t parse_driver::get_index(int line, int column) { | ||||
|     assert(line > 0); | ||||
|     assert(line <= line_offsets.size()); | ||||
|     size_t file_offset = line_offsets[line-1]; | ||||
|     file_offset += column - 1; | ||||
|     return file_offset; | ||||
|     assert(line > 0 && line <= line_offsets.size()); | ||||
|     return line_offsets[line-1] + column - 1; | ||||
| } | ||||
| 
 | ||||
| size_t parse_driver::get_line_end(int line) { | ||||
|     if(line > line_offsets.size()) return file_contents.size(); | ||||
|     if(line == line_offsets.size()) return file_contents.size(); | ||||
|     return get_index(line+1, 1); | ||||
| } | ||||
| 
 | ||||
| void parse_driver::print_highlighted_location(std::ostream& stream, const yy::location& loc) { | ||||
| void parse_driver::print_location( | ||||
|         std::ostream& stream, | ||||
|         const yy::location& loc, | ||||
|         bool highlight) { | ||||
|     size_t print_start = get_index(loc.begin.line, 1); | ||||
|     size_t highlight_start = get_index(loc.begin.line, loc.begin.column); | ||||
|     size_t highlight_end = get_index(loc.end.line, loc.end.column); | ||||
|     size_t print_end = get_line_end(loc.end.line); | ||||
|     const char* content = file_contents.c_str(); | ||||
|     stream.write(content + print_start, highlight_start - print_start); | ||||
|     stream << "\033[4;31m"; | ||||
|     if(highlight) stream << "\033[4;31m"; | ||||
|     stream.write(content + highlight_start, highlight_end - highlight_start); | ||||
|     stream << "\033[0m"; | ||||
|     if(highlight) stream << "\033[0m"; | ||||
|     stream.write(content + highlight_end, print_end - highlight_end); | ||||
| } | ||||
|  | ||||
| @ -14,13 +14,13 @@ void scanner_destroy(yyscan_t* scanner); | ||||
| struct parse_driver { | ||||
|     std::string file_name; | ||||
|     std::ostringstream string_stream; | ||||
|     std::string file_contents; | ||||
| 
 | ||||
|     yy::location location; | ||||
|     size_t file_offset; | ||||
| 
 | ||||
|     std::vector<size_t> line_offsets; | ||||
| 
 | ||||
|     definition_group global_defs; | ||||
|     std::string file_contents; | ||||
| 
 | ||||
|     parse_driver(const std::string& file) | ||||
|         : file_name(file), file_offset(0) {} | ||||
| @ -30,7 +30,10 @@ struct parse_driver { | ||||
|     void mark_line(); | ||||
|     size_t get_index(int line, int column); | ||||
|     size_t get_line_end(int line); | ||||
|     void print_highlighted_location(std::ostream& stream, const yy::location& loc); | ||||
|     void print_location( | ||||
|             std::ostream& stream, | ||||
|             const yy::location& loc, | ||||
|             bool highlight = true); | ||||
| }; | ||||
| 
 | ||||
| #define YY_DECL yy::parser::symbol_type yylex(yyscan_t yyscanner, parse_driver& drv) | ||||
|  | ||||
| @ -62,7 +62,7 @@ automatically assemble the "from" and "to" locations of a nonterminal | ||||
| from the locations of children, which would be very tedious to write | ||||
| by hand. We enable this feature using the following option: | ||||
| 
 | ||||
| {{< codelines "text" "compiler/13/parser.y" 50 50 >}} | ||||
| {{< codelines "C++" "compiler/13/parser.y" 50 50 >}} | ||||
| 
 | ||||
| There's just one hitch, though. Sure, Bison can compute bigger | ||||
| locations from smaller ones, but it must get the smaller ones | ||||
| @ -143,6 +143,17 @@ from `ast_binop`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/ast.hpp" 98 99 >}} | ||||
| 
 | ||||
| Finally, we tell Bison to pass the computed location | ||||
| data as an argument when constructing our data structures. | ||||
| This too is a mechanical change, and I think the following | ||||
| couple of lines demonstrate the general idea in sufficient | ||||
| detail: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/parser.y" 107 110 >}} | ||||
| 
 | ||||
| Here, the `@$` character is used to reference the current | ||||
| nonterminal's location data. | ||||
| 
 | ||||
| #### Line Offsets, File Input, and the Parse Driver | ||||
| There are three more challenges with printing out the line | ||||
| of code where an error occurred. First of all, to | ||||
| @ -202,7 +213,8 @@ will also need some way of accessing the `yy::location` instance, and | ||||
| a way of storing our file input in memory. Fortunately, we're not | ||||
| the only ones to have ever come across the issue of creating non-global | ||||
| state: the Bison documentation has a | ||||
| [section in its C++ guide](https://www.gnu.org/software/bison/manual/html_node/Calc_002b_002b-Parsing-Driver.html) that describes a technique for manipulating | ||||
| [section in its C++ guide](https://www.gnu.org/software/bison/manual/html_node/Calc_002b_002b-Parsing-Driver.html) | ||||
| that describes a technique for manipulating | ||||
| state -- "parsing context", in their words. This technique involves the | ||||
| creation of a _parsing driver_. | ||||
| 
 | ||||
| @ -211,4 +223,352 @@ state. We can arrange for this class to be available to our tokenizing | ||||
| and parsing functions, which will allow us to use it pretty much like we'd | ||||
| use a global variable. We can define it as follows: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/parse_driver.hpp" 14 34 >}} | ||||
| {{< codelines "C++" "compiler/13/parse_driver.hpp" 14 37 >}} | ||||
| 
 | ||||
| There are quite a few fields here. The `file_name` string represents | ||||
| the file that we'll be reading code from. the `string_stream` will | ||||
| be used to back up the contents of source file as Flex reads them; | ||||
| once Flex is done, the content of the `string_stream` will be | ||||
| saved into the `file_content` string. | ||||
| 
 | ||||
| The next three fields deal with tracking source code | ||||
| locations. The `location` field will be accessed by Flex | ||||
| via `drv.location` (where `drv` is a reference to our driver class). | ||||
| The `file_offset` and `line_offsets` fields will be used to | ||||
| keep track of where each line begins, as we have discussed above. | ||||
| Finally, `global_defs` will be the new home of our top-level | ||||
| definitions. | ||||
| 
 | ||||
| The methods on `parse_driver` are rather simple, too: | ||||
| 
 | ||||
| * `run_parse` handles the initialization of the tokenizer | ||||
| and parser, which includes obtaining the `FILE*` and configuring | ||||
| Flex to use it. It also handles invoking the parsing code. | ||||
| We'll make this method return `true` if parsing succeeded, | ||||
| and `false` otherwise (if, say, the file we tried to read doesn't exist). | ||||
| * `write` will be called from Flex, and will allow us to | ||||
| record the content of the file we're processing to the `string_stream`. | ||||
| We've already seen it used in the `YY_USER_ACTION` macro. | ||||
| * `mark_line` will also be called from Flex, and will mark the current | ||||
| `file_offset` as the beginning of a line by pushing it into `line_offsets`. | ||||
| * `get_index` and `get_line_end` will be used for converting | ||||
| `yy::location` instances to offsets within the source code buffer. | ||||
| * `print_location` will be used for printing errors. | ||||
| It will print the lines spanned by the given location, with the | ||||
| location itself colored and underlined if the last argument is `true`. | ||||
| This will make our errors easier on the eyes. | ||||
| 
 | ||||
| Let's take a look at their implementations. First, `run_parse`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/parse_driver.cpp" 5 18 >}} | ||||
| 
 | ||||
| We try open the user-specified file, and return `false` if we can't. | ||||
| We then initialize `line_offsets` as we discussed above. After | ||||
| this, we start doing the setup specific to a reentrant | ||||
| Flex scanner. We declare a `yyscan_t` variable, which | ||||
| will contain all of Flex's state. Then, we initialize | ||||
| it using `yylex_init`. Finally, since we can no longer | ||||
| touch the `yyin` global variable (it doesn't exist), | ||||
| we have to resort to using a setter function provided by Flex | ||||
| to configure the tokenizer's input stream. | ||||
| 
 | ||||
| Next, we construct our Bison-generated parser. Note that | ||||
| unlike before, we have to pass in two arguments: | ||||
| `scanner` and `*this`, the latter being of type `parse_driver&`. | ||||
| We'll come back to how this works in a moment. With | ||||
| the scanner and parser initialized, we invoke `parser::operator()`, | ||||
| which actually runs the Flex- and Bison-generated code. | ||||
| To clean up, we run `yylex_destroy` and `fclose`. Finally, | ||||
| we extract the contents of our file into the `file_contents` | ||||
| string, and return. | ||||
| 
 | ||||
| Next, the `write` method. For the most part, this method | ||||
| is a proxy for the `write` method of our `string_stream`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/parse_driver.cpp" 20 23 >}} | ||||
| 
 | ||||
| We do, however, also keep track of the `file_offset` variable | ||||
| here, which ensures we have up-to-date information | ||||
| regarding our position in the source file. The implementation | ||||
| of `mark_line` uses this information: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/parse_driver.cpp" 25 27 >}} | ||||
| 
 | ||||
| Once we have the line offsets, `get_index` becomes very simple: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/parse_driver.cpp" 29 32 >}} | ||||
| 
 | ||||
| Here, we use an assertion for the first time. Calling | ||||
| `get_index` with a negative or zero line doesn't make | ||||
| any sense, since Bison starts tracking line numbers | ||||
| at 1. Similarly, asking for a line for which we don't | ||||
| have a recorded offset is invalid. Both | ||||
| of these nonsensical calls to `get_index` cannot | ||||
| be caused by the user under normal circumstances, | ||||
| and indicate the method's misuse by the author of | ||||
| the compiler (us!). Thus, we terminate the program. | ||||
| 
 | ||||
| Finally, the implementation of `line_end` just finds the | ||||
| beginning of the next line. We stick to the C convention | ||||
| of marking 'end' indices exclusive (pointing just past | ||||
| the end of the array): | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/parse_driver.cpp" 34 37 >}} | ||||
| 
 | ||||
| Since `line_offsets` has as many elements as there are lines, | ||||
| the last line number would be equal to the vector's size. | ||||
| When looking up the end of the last line, we can't look for | ||||
| the beginning of the next line, so instead we return the end of the file. | ||||
| 
 | ||||
| Next, the `print_location` method prints three sections | ||||
| of the source file. These are the text "before" the error, | ||||
| the error itself, and, finally, the text "after" the error. | ||||
| For example, if an error began on the fifth column of the third | ||||
| line, and ended on the eighth column of the fourth line, the | ||||
| "before" section would include the first four columns of the third | ||||
| line, and the "after" section would be the ninth column onward | ||||
| on the fourth line. Before and after the error itself, | ||||
| if the `highlight` argument is true, | ||||
| we sprinkle the ANSI escape codes to enable and disable | ||||
| special formatting, respectively. For now, the special | ||||
| formatting involves underlining the text and making it red. | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/parse_driver.cpp" 39 53 >}} | ||||
| 
 | ||||
| Finally, to get the forward declarations for the `yy*` functions | ||||
| and types, we set the `header-file` option in Flex: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/scanner.l" 3 3 >}} | ||||
| 
 | ||||
| We also include this `scanner.hpp` file in our `parse_driver.cpp`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/parse_driver.cpp" 2 2 >}} | ||||
| 
 | ||||
| #### Adding the Driver to Flex and Bison | ||||
| Bison's C++ language template generates a class called | ||||
| `yy::parser`. We don't really want to modify this class | ||||
| in any way: not only is it generated code, but it's | ||||
| also rather complex. Instead, Bison provides us | ||||
| with a mechanism to pass more data in to the parser. | ||||
| This data is made available to all the actions | ||||
| that the parser runs. Better yet, Bison also attempts | ||||
| to pass this data on to the tokenizer, which in our | ||||
| case would mean that whatever data we provide Bison | ||||
| will also be available to Flex. This is how we'll | ||||
| allow the two components to access our new `parse_driver` | ||||
| class. This is also how we'll pass in the `yyscan_t` | ||||
| that Flex now needs to run its tokenizing code. To | ||||
| do all this, we use Bison's `%param` option. I'm | ||||
| going to include a few more lines from `parser.y`, | ||||
| since they contain the necessary `#include` directives | ||||
| and a required type definition: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/parser.y" 1 18 >}} | ||||
| 
 | ||||
| The `%param` option effectively adds the parameter listed | ||||
| between the curly braces to the constructor of the generated | ||||
| `yy::parser`. We've already seen this in the implementation | ||||
| of our driver, where we passed `scanner` and `*this` as | ||||
| arguments when creating the parser. The parameters we declare are also passed to the | ||||
| `yylex` function, which is expected to accept them in the same order. | ||||
| 
 | ||||
| Since we're adding `parse_driver` as an argument we have to | ||||
| declare it. However, we can't include the `parse_driver` header | ||||
| right away because `parse_driver` itself includes the `parser` header: | ||||
| we'd end up with a circular dependency. Instead, we resort to | ||||
| forward-declaring the driver class, as well as the `yyscan_t` | ||||
| structure containing Flex's state. | ||||
| 
 | ||||
| Adding a parameter to Bison doesn't automatically affect | ||||
| Flex. To let Flex know that its `yylex` function must now accept | ||||
| the state and the parse driver, we have to define the | ||||
| `YY_DECL` macro. We do this in `parse_driver.hpp`, since | ||||
| this forward declaration will be used by both Flex | ||||
| and Bison: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/parse_driver.hpp" 39 41 >}} | ||||
| 
 | ||||
| Finally, we can change our `main.cpp` file to use the | ||||
| `parse_driver`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/main.cpp" 178 186 >}} | ||||
| 
 | ||||
| #### Improving Exceptions | ||||
| Now, it's time to add location data (and a little bit more) to our | ||||
| exceptions. We want to make it possible for exceptions to include | ||||
| data about where the error occurred, and to print this data to the user. | ||||
| However, it's also possible for us to have exceptions that simply | ||||
| do not have that location data. Furthermore, we want to know | ||||
| whether or not an exception has an associated location; we'd | ||||
| rather not print an invalid or "default" location when an error | ||||
| occurs. | ||||
| 
 | ||||
| In the old days of programming, we could represent the absence | ||||
| of location data with a `nullptr`, or `NULL`. But not only | ||||
| does this approach expose us to all kind of `NULl`-safety | ||||
| bugs, but it also requires heap allocation! This doesn't | ||||
| make it sound all that appealing; instead, I think we should | ||||
| opt for using `std::optional`. | ||||
| 
 | ||||
| Though `std::optional` is standard (as may be obvious from its | ||||
| namespace), it's a rather recent addition to the C++ STL. | ||||
| In order to gain access to it, we need to ensure that our | ||||
| project is compiled using C++17. To this end, we add | ||||
| the following two lines to our CMakeLists.txt: | ||||
| 
 | ||||
| {{< codelines "CMake" "compiler/13/CMakeLists.txt" 5 6 >}} | ||||
| 
 | ||||
| Now, let's add a new base class for all of our compiler errors, | ||||
| unsurprisingly called `compiler_error`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/error.hpp" 8 23 >}} | ||||
| 
 | ||||
| We'll put some 'common' exception functionality | ||||
| into the `print_location` and `print_about` methods. If the error | ||||
| has an associated location, the former method will print that | ||||
| location to the screen. We don't always want to highlight | ||||
| the part of the code that caused the error: for instance, | ||||
| an invalid data type definition may span several lines, | ||||
| and coloring that whole section of text red would be | ||||
| too much. To address this, we add the `highlight` | ||||
| boolean argument, which can be used to switch the | ||||
| colors on and off. The `print_about` method | ||||
| will simply print the `what()` message of the exception, | ||||
| in addition to the "specific" error that occurred (stored | ||||
| in `description`). Here are the implementations of the | ||||
| functions: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/error.cpp" 3 16 >}} | ||||
| 
 | ||||
| We will also add a `pretty_print` method to all of | ||||
| our exceptions. This method will handle | ||||
| all the exception-specific printing logic. | ||||
| For the generic compiler error, this means | ||||
| simply printing out the error text and the location: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/error.cpp" 18 21 >}} | ||||
| 
 | ||||
| For `type_error`, this logic slightly changes, | ||||
| enabling colors when printing the location: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/error.cpp" 27 30 >}} | ||||
| 
 | ||||
| Finally, for `unification_error`, we also include | ||||
| the code to print out the two types that our | ||||
| compiler could not unify: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/error.cpp" 32 41 >}} | ||||
| 
 | ||||
| There's a subtle change here. Compared to the previous | ||||
| type-printing code (which we had in `main`), what | ||||
| we wrote here deals with "expected" and "actual" types. | ||||
| The `left` type passed to the exception is printed | ||||
| first, and is treat like the "correct" type. The | ||||
| `right` type, on the other hand, is treated | ||||
| like the "wrong" type that should have been | ||||
| unifiable with `left`. This will affect the | ||||
| calling conventions of our unification code. In | ||||
| `main`, we remove all our old exception printing code | ||||
| in favor of calls to `pretty_print`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/main.cpp" 207 213 >}} | ||||
| 
 | ||||
| Now, we can go through and find all the places where | ||||
| we `throw 0`. One such place was in the data type | ||||
| definition code, where declaring the same type parameter | ||||
| twice is invalid. We replace the `0` with a  | ||||
| `compiler_error`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/definition.cpp" 66 69 >}} | ||||
| 
 | ||||
| Not all `throw 0` statements should become exceptions. | ||||
| For example, here's code from the previous version of | ||||
| the compiler: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/12/definition.cpp" 123 127 >}} | ||||
| 
 | ||||
| If a definition `def_defn` has a dependency on a "nearby" (declared | ||||
| in the same group) definition called `dependency`, and if | ||||
| `dependency` does not exist within the same definition group, | ||||
| we throw an exception. But this error is impossible | ||||
| for a user to trigger: the only reason for a variable to appear | ||||
| in the `nearby_variables` vector is that it was previously | ||||
| found in the definition group. Here's the code that proves this | ||||
| (from the current version of the compiler): | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/definition.cpp" 102 106 >}} | ||||
| 
 | ||||
| Not being able to find the variable in the definition group | ||||
| is a compiler bug, and should never occur. So, instead | ||||
| of throwing an exception, we'll use an assertion: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/definition.cpp" 128 128 >}} | ||||
| 
 | ||||
| For more complicated error messages, we can use a `stringstream`. | ||||
| Here's an example from `parsed_type`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/parsed_type.cpp" 16 23 >}} | ||||
| 
 | ||||
| In general, this change is also rather mechanical, but, to | ||||
| maintain a balance between exceptions and assertions, here | ||||
| are a couple more assertions from `type_env`: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/type_env.cpp" 77 78 >}} | ||||
| 
 | ||||
| Once again, it should not be possible for the compiler | ||||
| to try generalize the type of a variable that doesn't | ||||
| exist, and nor should generalization occur twice. | ||||
| 
 | ||||
| While we're on the topic of types, let's talk about | ||||
| `type_mgr::unify`. In practice, I suspect that a lot of | ||||
| errors in our compiler will originate from this method. | ||||
| However, at present, this method does not in any way | ||||
| track the locations of where a unification error occurred. | ||||
| To fix this, we add a new `loc` parameter to `unify`, | ||||
| which we make optional to allow for unification without | ||||
| a known location. Here's the declaration: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/type.hpp" 101 101 >}} | ||||
| 
 | ||||
| The change to the implementation is mechanical and repetitive, | ||||
| so instead of showing you the whole method, I'll settle for | ||||
| a couple of lines: | ||||
| 
 | ||||
| {{< codelines "C++" "compiler/13/type.cpp" 119 121 >}} | ||||
| 
 | ||||
| We want to make sure that a location provided to the | ||||
| top-level call to `unify` is also forwarded to the | ||||
| recursive calls, so we have to explicitly add it | ||||
| to the call. | ||||
| 
 | ||||
| With all of that done, we can finally stand back and | ||||
| marvel at the results of our hard work. Here is what a | ||||
| basic unification error looks like now: | ||||
| 
 | ||||
| {{< figure src="unification_error.png" caption="The result of a unification error." >}} | ||||
| 
 | ||||
| I used an image to show colors, but here is the content of the error in textual form: | ||||
| 
 | ||||
| ``` | ||||
| an error occured while checking the types of the program: failed to unify types | ||||
| occuring on line 2: | ||||
|     3 + False | ||||
| the expected type was: | ||||
|   !Int | ||||
| while the actual type was: | ||||
|   !Bool | ||||
| ``` | ||||
| 
 | ||||
| The exclamation marks in front of the two types are due to some | ||||
| changes from section 2. Here's an error that was previously | ||||
| a `throw 0` statement in our code: | ||||
| 
 | ||||
| ``` | ||||
| an error occured while compiling the program: type variable a used twice in data type definition. | ||||
| occuring on line 1: | ||||
| data Pair a a = { MkPair a a } | ||||
| ``` | ||||
| 
 | ||||
| Now, not only have we eliminated the lazy uses of `throw 0` in our | ||||
| code, but we've also improved the presentation of the errors | ||||
| to the user! | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user