Compare commits
2 Commits
1f6b4bef74
...
6b8d3b0f8a
Author | SHA1 | Date | |
---|---|---|---|
6b8d3b0f8a | |||
725958137a |
|
@ -3,6 +3,7 @@
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include "binop.hpp"
|
#include "binop.hpp"
|
||||||
#include "error.hpp"
|
#include "error.hpp"
|
||||||
|
#include "type.hpp"
|
||||||
#include "type_env.hpp"
|
#include "type_env.hpp"
|
||||||
#include "env.hpp"
|
#include "env.hpp"
|
||||||
|
|
||||||
|
@ -234,13 +235,13 @@ struct case_mappings {
|
||||||
|
|
||||||
std::vector<instruction_ptr>& make_case_for(tag_type tag) {
|
std::vector<instruction_ptr>& make_case_for(tag_type tag) {
|
||||||
if(default_case)
|
if(default_case)
|
||||||
throw type_error("attempted pattern match after catch-all");
|
throw compiler_error("attempted pattern match after catch-all");
|
||||||
return defined_cases[tag];
|
return defined_cases[tag];
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<instruction_ptr>& make_default_case() {
|
std::vector<instruction_ptr>& make_default_case() {
|
||||||
if(default_case)
|
if(default_case)
|
||||||
throw type_error("attempted repeated use of catch-all");
|
throw compiler_error("attempted repeated use of catch-all");
|
||||||
default_case.emplace(std::vector<instruction_ptr>());
|
default_case.emplace(std::vector<instruction_ptr>());
|
||||||
return *default_case;
|
return *default_case;
|
||||||
}
|
}
|
||||||
|
@ -275,14 +276,16 @@ struct case_strategy_bool {
|
||||||
using tag_type = bool;
|
using tag_type = bool;
|
||||||
using repr_type = bool;
|
using repr_type = bool;
|
||||||
|
|
||||||
|
case_strategy_bool(const type* type) {}
|
||||||
|
|
||||||
tag_type tag_from_repr(repr_type b) { return b; }
|
tag_type tag_from_repr(repr_type b) { return b; }
|
||||||
|
|
||||||
repr_type from_typed_pattern(const pattern_ptr& pt, const type* type) {
|
repr_type repr_from_pattern(const pattern_ptr& pt) {
|
||||||
pattern_constr* cpat;
|
pattern_constr* cpat;
|
||||||
if(!(cpat = dynamic_cast<pattern_constr*>(pt.get())) ||
|
if(!(cpat = dynamic_cast<pattern_constr*>(pt.get())) ||
|
||||||
(cpat->constr != "True" && cpat->constr != "False") ||
|
(cpat->constr != "True" && cpat->constr != "False") ||
|
||||||
cpat->params.size() != 0)
|
cpat->params.size() != 0)
|
||||||
throw type_error(
|
throw compiler_error(
|
||||||
"pattern cannot be converted to a boolean",
|
"pattern cannot be converted to a boolean",
|
||||||
pt->loc);
|
pt->loc);
|
||||||
return cpat->constr == "True";
|
return cpat->constr == "True";
|
||||||
|
@ -297,12 +300,11 @@ struct case_strategy_bool {
|
||||||
into.push_back(instruction_ptr(new instruction_slide(1)));
|
into.push_back(instruction_ptr(new instruction_slide(1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t case_count(const type* type) {
|
size_t case_count() {
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
void into_instructions(
|
void into_instructions(
|
||||||
const type* type,
|
|
||||||
case_mappings<case_strategy_bool>& ms,
|
case_mappings<case_strategy_bool>& ms,
|
||||||
std::vector<instruction_ptr>& into) {
|
std::vector<instruction_ptr>& into) {
|
||||||
if(ms.defined_cases_count() == 0) {
|
if(ms.defined_cases_count() == 0) {
|
||||||
|
@ -321,16 +323,23 @@ struct case_strategy_data {
|
||||||
using tag_type = int;
|
using tag_type = int;
|
||||||
using repr_type = std::pair<const type_data::constructor*, const std::vector<std::string>*>;
|
using repr_type = std::pair<const type_data::constructor*, const std::vector<std::string>*>;
|
||||||
|
|
||||||
|
const type_data* arg_type;
|
||||||
|
|
||||||
|
case_strategy_data(const type* t) {
|
||||||
|
arg_type = dynamic_cast<const type_data*>(t);
|
||||||
|
assert(arg_type);
|
||||||
|
}
|
||||||
|
|
||||||
tag_type tag_from_repr(const repr_type& repr) { return repr.first->tag; }
|
tag_type tag_from_repr(const repr_type& repr) { return repr.first->tag; }
|
||||||
|
|
||||||
repr_type from_typed_pattern(const pattern_ptr& pt, const type* type) {
|
repr_type repr_from_pattern(const pattern_ptr& pt) {
|
||||||
pattern_constr* cpat;
|
pattern_constr* cpat;
|
||||||
if(!(cpat = dynamic_cast<pattern_constr*>(pt.get())))
|
if(!(cpat = dynamic_cast<pattern_constr*>(pt.get())))
|
||||||
throw type_error(
|
throw compiler_error(
|
||||||
"pattern cannot be interpreted as constructor.",
|
"pattern cannot be interpreted as constructor.",
|
||||||
pt->loc);
|
pt->loc);
|
||||||
return std::make_pair(
|
return std::make_pair(
|
||||||
&static_cast<const type_data*>(type)->constructors.find(cpat->constr)->second,
|
&arg_type->constructors.find(cpat->constr)->second,
|
||||||
&cpat->params);
|
&cpat->params);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -349,19 +358,17 @@ struct case_strategy_data {
|
||||||
into.push_back(instruction_ptr(new instruction_slide(repr.second->size())));
|
into.push_back(instruction_ptr(new instruction_slide(repr.second->size())));
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t case_count(const type* type) {
|
size_t case_count() {
|
||||||
return static_cast<const type_data*>(type)->constructors.size();
|
return arg_type->constructors.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
void into_instructions(
|
void into_instructions(
|
||||||
const type* type,
|
|
||||||
case_mappings<case_strategy_data>& ms,
|
case_mappings<case_strategy_data>& ms,
|
||||||
std::vector<instruction_ptr>& into) {
|
std::vector<instruction_ptr>& into) {
|
||||||
instruction_jump* jump_instruction = new instruction_jump();
|
instruction_jump* jump_instruction = new instruction_jump();
|
||||||
instruction_ptr inst(jump_instruction);
|
instruction_ptr inst(jump_instruction);
|
||||||
|
|
||||||
auto data_type = static_cast<const type_data*>(type);
|
for(auto& constr : arg_type->constructors) {
|
||||||
for(auto& constr : data_type->constructors) {
|
|
||||||
if(!ms.case_defined_for(constr.second.tag)) continue;
|
if(!ms.case_defined_for(constr.second.tag)) continue;
|
||||||
jump_instruction->branches.push_back(
|
jump_instruction->branches.push_back(
|
||||||
std::move(ms.get_specific_case_for(constr.second.tag)));
|
std::move(ms.get_specific_case_for(constr.second.tag)));
|
||||||
|
@ -372,7 +379,7 @@ struct case_strategy_data {
|
||||||
if(ms.default_case_defined()) {
|
if(ms.default_case_defined()) {
|
||||||
jump_instruction->branches.push_back(
|
jump_instruction->branches.push_back(
|
||||||
std::move(ms.get_default_case()));
|
std::move(ms.get_default_case()));
|
||||||
for(auto& constr : data_type->constructors) {
|
for(auto& constr : arg_type->constructors) {
|
||||||
if(ms.case_defined_for(constr.second.tag)) continue;
|
if(ms.case_defined_for(constr.second.tag)) continue;
|
||||||
jump_instruction->tag_mappings[constr.second.tag] =
|
jump_instruction->tag_mappings[constr.second.tag] =
|
||||||
jump_instruction->branches.size();
|
jump_instruction->branches.size();
|
||||||
|
@ -385,29 +392,29 @@ struct case_strategy_data {
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void compile_case(const ast_case& node, const env_ptr& env, const type* type, std::vector<instruction_ptr>& into) {
|
void compile_case(const ast_case& node, const env_ptr& env, const type* type, std::vector<instruction_ptr>& into) {
|
||||||
T strategy;
|
T strategy(type);
|
||||||
case_mappings<T> cases;
|
case_mappings<T> cases;
|
||||||
for(auto& branch : node.branches) {
|
for(auto& branch : node.branches) {
|
||||||
pattern_var* vpat;
|
pattern_var* vpat;
|
||||||
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
|
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
|
||||||
if(cases.defined_cases_count() == strategy.case_count(type))
|
if(cases.defined_cases_count() == strategy.case_count())
|
||||||
throw type_error("redundant catch-all pattern", branch->pat->loc);
|
throw compiler_error("redundant catch-all pattern", branch->pat->loc);
|
||||||
auto& branch_into = cases.make_default_case();
|
auto& branch_into = cases.make_default_case();
|
||||||
env_ptr new_env(new env_var(vpat->var, env));
|
env_ptr new_env(new env_var(vpat->var, env));
|
||||||
branch->expr->compile(new_env, branch_into);
|
branch->expr->compile(new_env, branch_into);
|
||||||
branch_into.push_back(instruction_ptr(new instruction_slide(1)));
|
branch_into.push_back(instruction_ptr(new instruction_slide(1)));
|
||||||
} else {
|
} else {
|
||||||
auto repr = strategy.from_typed_pattern(branch->pat, type);
|
auto repr = strategy.repr_from_pattern(branch->pat);
|
||||||
auto& branch_into = cases.make_case_for(strategy.tag_from_repr(repr));
|
auto& branch_into = cases.make_case_for(strategy.tag_from_repr(repr));
|
||||||
strategy.compile_branch(branch, env, repr, branch_into);
|
strategy.compile_branch(branch, env, repr, branch_into);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!(cases.defined_cases_count() == strategy.case_count(type) ||
|
if(!(cases.defined_cases_count() == strategy.case_count() ||
|
||||||
cases.default_case_defined()))
|
cases.default_case_defined()))
|
||||||
throw type_error("incomplete patterns", node.loc);
|
throw compiler_error("incomplete patterns", node.loc);
|
||||||
|
|
||||||
strategy.into_instructions(type, cases, into);
|
strategy.into_instructions(cases, into);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
|
|
@ -64,9 +64,9 @@ void definition_data::insert_constructors() const {
|
||||||
type_ptr return_type(return_app);
|
type_ptr return_type(return_app);
|
||||||
for(auto& var : vars) {
|
for(auto& var : vars) {
|
||||||
if(var_set.find(var) != var_set.end())
|
if(var_set.find(var) != var_set.end())
|
||||||
throw std::runtime_error(
|
throw compiler_error(
|
||||||
std::string("type variable ") +
|
std::string("type variable ") +
|
||||||
var + std::string(" used twice in data type definition."));
|
var + std::string(" used twice in data type definition."), loc);
|
||||||
var_set.insert(var);
|
var_set.insert(var);
|
||||||
return_app->arguments.push_back(type_ptr(new type_var(var)));
|
return_app->arguments.push_back(type_ptr(new type_var(var)));
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,19 +1,32 @@
|
||||||
#include "error.hpp"
|
#include "error.hpp"
|
||||||
|
|
||||||
|
const char* compiler_error::what() const noexcept {
|
||||||
|
return "an error occured while compiling the program";
|
||||||
|
}
|
||||||
|
|
||||||
|
void compiler_error::print_about(std::ostream& to) {
|
||||||
|
to << what() << ": ";
|
||||||
|
to << description << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void compiler_error::print_location(std::ostream& to, parse_driver& drv, bool highlight) {
|
||||||
|
if(!loc) return;
|
||||||
|
to << "occuring on line " << loc->begin.line << ":" << std::endl;
|
||||||
|
drv.print_location(to, *loc, highlight);
|
||||||
|
}
|
||||||
|
|
||||||
|
void compiler_error::pretty_print(std::ostream& to, parse_driver& drv) {
|
||||||
|
print_about(to);
|
||||||
|
print_location(to, drv);
|
||||||
|
}
|
||||||
|
|
||||||
const char* type_error::what() const noexcept {
|
const char* type_error::what() const noexcept {
|
||||||
return "an error occured while checking the types of the program";
|
return "an error occured while checking the types of the program";
|
||||||
}
|
}
|
||||||
|
|
||||||
void type_error::pretty_print(std::ostream& to, parse_driver& drv) {
|
void type_error::pretty_print(std::ostream& to, parse_driver& drv) {
|
||||||
to << "encountered error while typechecking program: ";
|
print_about(to);
|
||||||
to << description << std::endl;
|
print_location(to, drv, true);
|
||||||
|
|
||||||
if(loc) {
|
|
||||||
to << "occuring on line " << loc->begin.line << ":" << std::endl;
|
|
||||||
to << std::endl << "```" << std::endl;
|
|
||||||
drv.print_highlighted_location(to, *loc);
|
|
||||||
to << "```" << std::endl << std::endl;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void unification_error::pretty_print(std::ostream& to, parse_driver& drv, type_mgr& mgr) {
|
void unification_error::pretty_print(std::ostream& to, parse_driver& drv, type_mgr& mgr) {
|
||||||
|
|
|
@ -7,12 +7,26 @@
|
||||||
|
|
||||||
using maybe_location = std::optional<yy::location>;
|
using maybe_location = std::optional<yy::location>;
|
||||||
|
|
||||||
struct type_error : std::exception {
|
struct compiler_error : std::exception {
|
||||||
std::string description;
|
std::string description;
|
||||||
|
maybe_location loc;
|
||||||
|
|
||||||
|
compiler_error(std::string d, maybe_location l = std::nullopt)
|
||||||
|
: description(std::move(d)), loc(std::move(l)) {}
|
||||||
|
|
||||||
|
const char* what() const noexcept override;
|
||||||
|
|
||||||
|
void print_about(std::ostream& to);
|
||||||
|
void print_location(std::ostream& to, parse_driver& drv, bool highlight = false);
|
||||||
|
|
||||||
|
void pretty_print(std::ostream& to, parse_driver& drv);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_error : compiler_error {
|
||||||
std::optional<yy::location> loc;
|
std::optional<yy::location> loc;
|
||||||
|
|
||||||
type_error(std::string d, maybe_location l = std::nullopt)
|
type_error(std::string d, maybe_location l = std::nullopt)
|
||||||
: description(std::move(d)), loc(std::move(l)) {}
|
: compiler_error(std::move(d), std::move(l)) {}
|
||||||
|
|
||||||
const char* what() const noexcept override;
|
const char* what() const noexcept override;
|
||||||
void pretty_print(std::ostream& to, parse_driver& drv);
|
void pretty_print(std::ostream& to, parse_driver& drv);
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
#include "llvm/Target/TargetMachine.h"
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
|
||||||
void yy::parser::error(const yy::location& loc, const std::string& msg) {
|
void yy::parser::error(const yy::location& loc, const std::string& msg) {
|
||||||
std::cout << "An error occured: " << msg << std::endl;
|
std::cerr << "An error occured: " << msg << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void prelude_types(definition_group& defs, type_env_ptr env) {
|
void prelude_types(definition_group& defs, type_env_ptr env) {
|
||||||
|
@ -110,12 +110,12 @@ void output_llvm(llvm_context& ctx, const std::string& filename) {
|
||||||
std::error_code ec;
|
std::error_code ec;
|
||||||
llvm::raw_fd_ostream file(filename, ec, llvm::sys::fs::F_None);
|
llvm::raw_fd_ostream file(filename, ec, llvm::sys::fs::F_None);
|
||||||
if (ec) {
|
if (ec) {
|
||||||
throw std::runtime_error("failed to open object file for writing");
|
throw compiler_error("failed to open object file for writing");
|
||||||
} else {
|
} else {
|
||||||
llvm::CodeGenFileType type = llvm::CGFT_ObjectFile;
|
llvm::CodeGenFileType type = llvm::CGFT_ObjectFile;
|
||||||
llvm::legacy::PassManager pm;
|
llvm::legacy::PassManager pm;
|
||||||
if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) {
|
if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) {
|
||||||
throw std::runtime_error("failed to add passes to pass manager");
|
throw compiler_error("failed to add passes to pass manager");
|
||||||
} else {
|
} else {
|
||||||
pm.run(ctx.module);
|
pm.run(ctx.module);
|
||||||
file.close();
|
file.close();
|
||||||
|
@ -177,10 +177,11 @@ void gen_llvm(global_scope& scope) {
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
if(argc != 2) {
|
if(argc != 2) {
|
||||||
std::cerr << "please enter a file to compile." << std::endl;
|
std::cerr << "please enter a file to compile." << std::endl;
|
||||||
|
exit(1);
|
||||||
}
|
}
|
||||||
parse_driver driver(argv[1]);
|
parse_driver driver(argv[1]);
|
||||||
if(!driver.run_parse()) {
|
if(!driver.run_parse()) {
|
||||||
std::cerr << "failed to open file " << argv[1] << std::endl;
|
std::cerr << "failed to parse file " << argv[1] << std::endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -207,7 +208,7 @@ int main(int argc, char** argv) {
|
||||||
err.pretty_print(std::cerr, driver, mgr);
|
err.pretty_print(std::cerr, driver, mgr);
|
||||||
} catch(type_error& err) {
|
} catch(type_error& err) {
|
||||||
err.pretty_print(std::cerr, driver);
|
err.pretty_print(std::cerr, driver);
|
||||||
} catch(std::runtime_error& err) {
|
} catch (compiler_error& err) {
|
||||||
std::cerr << err.what() << std::endl;
|
err.pretty_print(std::cerr, driver);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,8 +5,6 @@
|
||||||
bool parse_driver::run_parse() {
|
bool parse_driver::run_parse() {
|
||||||
FILE* stream = fopen(file_name.c_str(), "r");
|
FILE* stream = fopen(file_name.c_str(), "r");
|
||||||
if(!stream) return false;
|
if(!stream) return false;
|
||||||
string_stream = std::ostringstream();
|
|
||||||
file_offset = 0;
|
|
||||||
line_offsets.push_back(0);
|
line_offsets.push_back(0);
|
||||||
yyscan_t scanner;
|
yyscan_t scanner;
|
||||||
yylex_init(&scanner);
|
yylex_init(&scanner);
|
||||||
|
@ -29,27 +27,27 @@ void parse_driver::mark_line() {
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t parse_driver::get_index(int line, int column) {
|
size_t parse_driver::get_index(int line, int column) {
|
||||||
assert(line > 0);
|
assert(line > 0 && line <= line_offsets.size());
|
||||||
assert(line <= line_offsets.size());
|
return line_offsets[line-1] + column - 1;
|
||||||
size_t file_offset = line_offsets[line-1];
|
|
||||||
file_offset += column - 1;
|
|
||||||
return file_offset;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t parse_driver::get_line_end(int line) {
|
size_t parse_driver::get_line_end(int line) {
|
||||||
if(line > line_offsets.size()) return file_contents.size();
|
if(line == line_offsets.size()) return file_contents.size();
|
||||||
return get_index(line+1, 1);
|
return get_index(line+1, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void parse_driver::print_highlighted_location(std::ostream& stream, const yy::location& loc) {
|
void parse_driver::print_location(
|
||||||
|
std::ostream& stream,
|
||||||
|
const yy::location& loc,
|
||||||
|
bool highlight) {
|
||||||
size_t print_start = get_index(loc.begin.line, 1);
|
size_t print_start = get_index(loc.begin.line, 1);
|
||||||
size_t highlight_start = get_index(loc.begin.line, loc.begin.column);
|
size_t highlight_start = get_index(loc.begin.line, loc.begin.column);
|
||||||
size_t highlight_end = get_index(loc.end.line, loc.end.column);
|
size_t highlight_end = get_index(loc.end.line, loc.end.column);
|
||||||
size_t print_end = get_line_end(loc.end.line);
|
size_t print_end = get_line_end(loc.end.line);
|
||||||
const char* content = file_contents.c_str();
|
const char* content = file_contents.c_str();
|
||||||
stream.write(content + print_start, highlight_start - print_start);
|
stream.write(content + print_start, highlight_start - print_start);
|
||||||
stream << "\033[4;31m";
|
if(highlight) stream << "\033[4;31m";
|
||||||
stream.write(content + highlight_start, highlight_end - highlight_start);
|
stream.write(content + highlight_start, highlight_end - highlight_start);
|
||||||
stream << "\033[0m";
|
if(highlight) stream << "\033[0m";
|
||||||
stream.write(content + highlight_end, print_end - highlight_end);
|
stream.write(content + highlight_end, print_end - highlight_end);
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,13 +14,13 @@ void scanner_destroy(yyscan_t* scanner);
|
||||||
struct parse_driver {
|
struct parse_driver {
|
||||||
std::string file_name;
|
std::string file_name;
|
||||||
std::ostringstream string_stream;
|
std::ostringstream string_stream;
|
||||||
|
std::string file_contents;
|
||||||
|
|
||||||
yy::location location;
|
yy::location location;
|
||||||
size_t file_offset;
|
size_t file_offset;
|
||||||
|
|
||||||
std::vector<size_t> line_offsets;
|
std::vector<size_t> line_offsets;
|
||||||
|
|
||||||
definition_group global_defs;
|
definition_group global_defs;
|
||||||
std::string file_contents;
|
|
||||||
|
|
||||||
parse_driver(const std::string& file)
|
parse_driver(const std::string& file)
|
||||||
: file_name(file), file_offset(0) {}
|
: file_name(file), file_offset(0) {}
|
||||||
|
@ -30,7 +30,10 @@ struct parse_driver {
|
||||||
void mark_line();
|
void mark_line();
|
||||||
size_t get_index(int line, int column);
|
size_t get_index(int line, int column);
|
||||||
size_t get_line_end(int line);
|
size_t get_line_end(int line);
|
||||||
void print_highlighted_location(std::ostream& stream, const yy::location& loc);
|
void print_location(
|
||||||
|
std::ostream& stream,
|
||||||
|
const yy::location& loc,
|
||||||
|
bool highlight = true);
|
||||||
};
|
};
|
||||||
|
|
||||||
#define YY_DECL yy::parser::symbol_type yylex(yyscan_t yyscanner, parse_driver& drv)
|
#define YY_DECL yy::parser::symbol_type yylex(yyscan_t yyscanner, parse_driver& drv)
|
||||||
|
|
|
@ -62,7 +62,7 @@ automatically assemble the "from" and "to" locations of a nonterminal
|
||||||
from the locations of children, which would be very tedious to write
|
from the locations of children, which would be very tedious to write
|
||||||
by hand. We enable this feature using the following option:
|
by hand. We enable this feature using the following option:
|
||||||
|
|
||||||
{{< codelines "text" "compiler/13/parser.y" 50 50 >}}
|
{{< codelines "C++" "compiler/13/parser.y" 50 50 >}}
|
||||||
|
|
||||||
There's just one hitch, though. Sure, Bison can compute bigger
|
There's just one hitch, though. Sure, Bison can compute bigger
|
||||||
locations from smaller ones, but it must get the smaller ones
|
locations from smaller ones, but it must get the smaller ones
|
||||||
|
@ -143,6 +143,17 @@ from `ast_binop`:
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/13/ast.hpp" 98 99 >}}
|
{{< codelines "C++" "compiler/13/ast.hpp" 98 99 >}}
|
||||||
|
|
||||||
|
Finally, we tell Bison to pass the computed location
|
||||||
|
data as an argument when constructing our data structures.
|
||||||
|
This too is a mechanical change, and I think the following
|
||||||
|
couple of lines demonstrate the general idea in sufficient
|
||||||
|
detail:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parser.y" 107 110 >}}
|
||||||
|
|
||||||
|
Here, the `@$` character is used to reference the current
|
||||||
|
nonterminal's location data.
|
||||||
|
|
||||||
#### Line Offsets, File Input, and the Parse Driver
|
#### Line Offsets, File Input, and the Parse Driver
|
||||||
There are three more challenges with printing out the line
|
There are three more challenges with printing out the line
|
||||||
of code where an error occurred. First of all, to
|
of code where an error occurred. First of all, to
|
||||||
|
@ -202,7 +213,8 @@ will also need some way of accessing the `yy::location` instance, and
|
||||||
a way of storing our file input in memory. Fortunately, we're not
|
a way of storing our file input in memory. Fortunately, we're not
|
||||||
the only ones to have ever come across the issue of creating non-global
|
the only ones to have ever come across the issue of creating non-global
|
||||||
state: the Bison documentation has a
|
state: the Bison documentation has a
|
||||||
[section in its C++ guide](https://www.gnu.org/software/bison/manual/html_node/Calc_002b_002b-Parsing-Driver.html) that describes a technique for manipulating
|
[section in its C++ guide](https://www.gnu.org/software/bison/manual/html_node/Calc_002b_002b-Parsing-Driver.html)
|
||||||
|
that describes a technique for manipulating
|
||||||
state -- "parsing context", in their words. This technique involves the
|
state -- "parsing context", in their words. This technique involves the
|
||||||
creation of a _parsing driver_.
|
creation of a _parsing driver_.
|
||||||
|
|
||||||
|
@ -211,4 +223,352 @@ state. We can arrange for this class to be available to our tokenizing
|
||||||
and parsing functions, which will allow us to use it pretty much like we'd
|
and parsing functions, which will allow us to use it pretty much like we'd
|
||||||
use a global variable. We can define it as follows:
|
use a global variable. We can define it as follows:
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/13/parse_driver.hpp" 14 34 >}}
|
{{< codelines "C++" "compiler/13/parse_driver.hpp" 14 37 >}}
|
||||||
|
|
||||||
|
There are quite a few fields here. The `file_name` string represents
|
||||||
|
the file that we'll be reading code from. the `string_stream` will
|
||||||
|
be used to back up the contents of source file as Flex reads them;
|
||||||
|
once Flex is done, the content of the `string_stream` will be
|
||||||
|
saved into the `file_content` string.
|
||||||
|
|
||||||
|
The next three fields deal with tracking source code
|
||||||
|
locations. The `location` field will be accessed by Flex
|
||||||
|
via `drv.location` (where `drv` is a reference to our driver class).
|
||||||
|
The `file_offset` and `line_offsets` fields will be used to
|
||||||
|
keep track of where each line begins, as we have discussed above.
|
||||||
|
Finally, `global_defs` will be the new home of our top-level
|
||||||
|
definitions.
|
||||||
|
|
||||||
|
The methods on `parse_driver` are rather simple, too:
|
||||||
|
|
||||||
|
* `run_parse` handles the initialization of the tokenizer
|
||||||
|
and parser, which includes obtaining the `FILE*` and configuring
|
||||||
|
Flex to use it. It also handles invoking the parsing code.
|
||||||
|
We'll make this method return `true` if parsing succeeded,
|
||||||
|
and `false` otherwise (if, say, the file we tried to read doesn't exist).
|
||||||
|
* `write` will be called from Flex, and will allow us to
|
||||||
|
record the content of the file we're processing to the `string_stream`.
|
||||||
|
We've already seen it used in the `YY_USER_ACTION` macro.
|
||||||
|
* `mark_line` will also be called from Flex, and will mark the current
|
||||||
|
`file_offset` as the beginning of a line by pushing it into `line_offsets`.
|
||||||
|
* `get_index` and `get_line_end` will be used for converting
|
||||||
|
`yy::location` instances to offsets within the source code buffer.
|
||||||
|
* `print_location` will be used for printing errors.
|
||||||
|
It will print the lines spanned by the given location, with the
|
||||||
|
location itself colored and underlined if the last argument is `true`.
|
||||||
|
This will make our errors easier on the eyes.
|
||||||
|
|
||||||
|
Let's take a look at their implementations. First, `run_parse`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parse_driver.cpp" 5 18 >}}
|
||||||
|
|
||||||
|
We try open the user-specified file, and return `false` if we can't.
|
||||||
|
We then initialize `line_offsets` as we discussed above. After
|
||||||
|
this, we start doing the setup specific to a reentrant
|
||||||
|
Flex scanner. We declare a `yyscan_t` variable, which
|
||||||
|
will contain all of Flex's state. Then, we initialize
|
||||||
|
it using `yylex_init`. Finally, since we can no longer
|
||||||
|
touch the `yyin` global variable (it doesn't exist),
|
||||||
|
we have to resort to using a setter function provided by Flex
|
||||||
|
to configure the tokenizer's input stream.
|
||||||
|
|
||||||
|
Next, we construct our Bison-generated parser. Note that
|
||||||
|
unlike before, we have to pass in two arguments:
|
||||||
|
`scanner` and `*this`, the latter being of type `parse_driver&`.
|
||||||
|
We'll come back to how this works in a moment. With
|
||||||
|
the scanner and parser initialized, we invoke `parser::operator()`,
|
||||||
|
which actually runs the Flex- and Bison-generated code.
|
||||||
|
To clean up, we run `yylex_destroy` and `fclose`. Finally,
|
||||||
|
we extract the contents of our file into the `file_contents`
|
||||||
|
string, and return.
|
||||||
|
|
||||||
|
Next, the `write` method. For the most part, this method
|
||||||
|
is a proxy for the `write` method of our `string_stream`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parse_driver.cpp" 20 23 >}}
|
||||||
|
|
||||||
|
We do, however, also keep track of the `file_offset` variable
|
||||||
|
here, which ensures we have up-to-date information
|
||||||
|
regarding our position in the source file. The implementation
|
||||||
|
of `mark_line` uses this information:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parse_driver.cpp" 25 27 >}}
|
||||||
|
|
||||||
|
Once we have the line offsets, `get_index` becomes very simple:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parse_driver.cpp" 29 32 >}}
|
||||||
|
|
||||||
|
Here, we use an assertion for the first time. Calling
|
||||||
|
`get_index` with a negative or zero line doesn't make
|
||||||
|
any sense, since Bison starts tracking line numbers
|
||||||
|
at 1. Similarly, asking for a line for which we don't
|
||||||
|
have a recorded offset is invalid. Both
|
||||||
|
of these nonsensical calls to `get_index` cannot
|
||||||
|
be caused by the user under normal circumstances,
|
||||||
|
and indicate the method's misuse by the author of
|
||||||
|
the compiler (us!). Thus, we terminate the program.
|
||||||
|
|
||||||
|
Finally, the implementation of `line_end` just finds the
|
||||||
|
beginning of the next line. We stick to the C convention
|
||||||
|
of marking 'end' indices exclusive (pointing just past
|
||||||
|
the end of the array):
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parse_driver.cpp" 34 37 >}}
|
||||||
|
|
||||||
|
Since `line_offsets` has as many elements as there are lines,
|
||||||
|
the last line number would be equal to the vector's size.
|
||||||
|
When looking up the end of the last line, we can't look for
|
||||||
|
the beginning of the next line, so instead we return the end of the file.
|
||||||
|
|
||||||
|
Next, the `print_location` method prints three sections
|
||||||
|
of the source file. These are the text "before" the error,
|
||||||
|
the error itself, and, finally, the text "after" the error.
|
||||||
|
For example, if an error began on the fifth column of the third
|
||||||
|
line, and ended on the eighth column of the fourth line, the
|
||||||
|
"before" section would include the first four columns of the third
|
||||||
|
line, and the "after" section would be the ninth column onward
|
||||||
|
on the fourth line. Before and after the error itself,
|
||||||
|
if the `highlight` argument is true,
|
||||||
|
we sprinkle the ANSI escape codes to enable and disable
|
||||||
|
special formatting, respectively. For now, the special
|
||||||
|
formatting involves underlining the text and making it red.
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parse_driver.cpp" 39 53 >}}
|
||||||
|
|
||||||
|
Finally, to get the forward declarations for the `yy*` functions
|
||||||
|
and types, we set the `header-file` option in Flex:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/scanner.l" 3 3 >}}
|
||||||
|
|
||||||
|
We also include this `scanner.hpp` file in our `parse_driver.cpp`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parse_driver.cpp" 2 2 >}}
|
||||||
|
|
||||||
|
#### Adding the Driver to Flex and Bison
|
||||||
|
Bison's C++ language template generates a class called
|
||||||
|
`yy::parser`. We don't really want to modify this class
|
||||||
|
in any way: not only is it generated code, but it's
|
||||||
|
also rather complex. Instead, Bison provides us
|
||||||
|
with a mechanism to pass more data in to the parser.
|
||||||
|
This data is made available to all the actions
|
||||||
|
that the parser runs. Better yet, Bison also attempts
|
||||||
|
to pass this data on to the tokenizer, which in our
|
||||||
|
case would mean that whatever data we provide Bison
|
||||||
|
will also be available to Flex. This is how we'll
|
||||||
|
allow the two components to access our new `parse_driver`
|
||||||
|
class. This is also how we'll pass in the `yyscan_t`
|
||||||
|
that Flex now needs to run its tokenizing code. To
|
||||||
|
do all this, we use Bison's `%param` option. I'm
|
||||||
|
going to include a few more lines from `parser.y`,
|
||||||
|
since they contain the necessary `#include` directives
|
||||||
|
and a required type definition:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parser.y" 1 18 >}}
|
||||||
|
|
||||||
|
The `%param` option effectively adds the parameter listed
|
||||||
|
between the curly braces to the constructor of the generated
|
||||||
|
`yy::parser`. We've already seen this in the implementation
|
||||||
|
of our driver, where we passed `scanner` and `*this` as
|
||||||
|
arguments when creating the parser. The parameters we declare are also passed to the
|
||||||
|
`yylex` function, which is expected to accept them in the same order.
|
||||||
|
|
||||||
|
Since we're adding `parse_driver` as an argument we have to
|
||||||
|
declare it. However, we can't include the `parse_driver` header
|
||||||
|
right away because `parse_driver` itself includes the `parser` header:
|
||||||
|
we'd end up with a circular dependency. Instead, we resort to
|
||||||
|
forward-declaring the driver class, as well as the `yyscan_t`
|
||||||
|
structure containing Flex's state.
|
||||||
|
|
||||||
|
Adding a parameter to Bison doesn't automatically affect
|
||||||
|
Flex. To let Flex know that its `yylex` function must now accept
|
||||||
|
the state and the parse driver, we have to define the
|
||||||
|
`YY_DECL` macro. We do this in `parse_driver.hpp`, since
|
||||||
|
this forward declaration will be used by both Flex
|
||||||
|
and Bison:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parse_driver.hpp" 39 41 >}}
|
||||||
|
|
||||||
|
Finally, we can change our `main.cpp` file to use the
|
||||||
|
`parse_driver`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/main.cpp" 178 186 >}}
|
||||||
|
|
||||||
|
#### Improving Exceptions
|
||||||
|
Now, it's time to add location data (and a little bit more) to our
|
||||||
|
exceptions. We want to make it possible for exceptions to include
|
||||||
|
data about where the error occurred, and to print this data to the user.
|
||||||
|
However, it's also possible for us to have exceptions that simply
|
||||||
|
do not have that location data. Furthermore, we want to know
|
||||||
|
whether or not an exception has an associated location; we'd
|
||||||
|
rather not print an invalid or "default" location when an error
|
||||||
|
occurs.
|
||||||
|
|
||||||
|
In the old days of programming, we could represent the absence
|
||||||
|
of location data with a `nullptr`, or `NULL`. But not only
|
||||||
|
does this approach expose us to all kind of `NULl`-safety
|
||||||
|
bugs, but it also requires heap allocation! This doesn't
|
||||||
|
make it sound all that appealing; instead, I think we should
|
||||||
|
opt for using `std::optional`.
|
||||||
|
|
||||||
|
Though `std::optional` is standard (as may be obvious from its
|
||||||
|
namespace), it's a rather recent addition to the C++ STL.
|
||||||
|
In order to gain access to it, we need to ensure that our
|
||||||
|
project is compiled using C++17. To this end, we add
|
||||||
|
the following two lines to our CMakeLists.txt:
|
||||||
|
|
||||||
|
{{< codelines "CMake" "compiler/13/CMakeLists.txt" 5 6 >}}
|
||||||
|
|
||||||
|
Now, let's add a new base class for all of our compiler errors,
|
||||||
|
unsurprisingly called `compiler_error`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/error.hpp" 8 23 >}}
|
||||||
|
|
||||||
|
We'll put some 'common' exception functionality
|
||||||
|
into the `print_location` and `print_about` methods. If the error
|
||||||
|
has an associated location, the former method will print that
|
||||||
|
location to the screen. We don't always want to highlight
|
||||||
|
the part of the code that caused the error: for instance,
|
||||||
|
an invalid data type definition may span several lines,
|
||||||
|
and coloring that whole section of text red would be
|
||||||
|
too much. To address this, we add the `highlight`
|
||||||
|
boolean argument, which can be used to switch the
|
||||||
|
colors on and off. The `print_about` method
|
||||||
|
will simply print the `what()` message of the exception,
|
||||||
|
in addition to the "specific" error that occurred (stored
|
||||||
|
in `description`). Here are the implementations of the
|
||||||
|
functions:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/error.cpp" 3 16 >}}
|
||||||
|
|
||||||
|
We will also add a `pretty_print` method to all of
|
||||||
|
our exceptions. This method will handle
|
||||||
|
all the exception-specific printing logic.
|
||||||
|
For the generic compiler error, this means
|
||||||
|
simply printing out the error text and the location:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/error.cpp" 18 21 >}}
|
||||||
|
|
||||||
|
For `type_error`, this logic slightly changes,
|
||||||
|
enabling colors when printing the location:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/error.cpp" 27 30 >}}
|
||||||
|
|
||||||
|
Finally, for `unification_error`, we also include
|
||||||
|
the code to print out the two types that our
|
||||||
|
compiler could not unify:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/error.cpp" 32 41 >}}
|
||||||
|
|
||||||
|
There's a subtle change here. Compared to the previous
|
||||||
|
type-printing code (which we had in `main`), what
|
||||||
|
we wrote here deals with "expected" and "actual" types.
|
||||||
|
The `left` type passed to the exception is printed
|
||||||
|
first, and is treat like the "correct" type. The
|
||||||
|
`right` type, on the other hand, is treated
|
||||||
|
like the "wrong" type that should have been
|
||||||
|
unifiable with `left`. This will affect the
|
||||||
|
calling conventions of our unification code. In
|
||||||
|
`main`, we remove all our old exception printing code
|
||||||
|
in favor of calls to `pretty_print`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/main.cpp" 207 213 >}}
|
||||||
|
|
||||||
|
Now, we can go through and find all the places where
|
||||||
|
we `throw 0`. One such place was in the data type
|
||||||
|
definition code, where declaring the same type parameter
|
||||||
|
twice is invalid. We replace the `0` with a
|
||||||
|
`compiler_error`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/definition.cpp" 66 69 >}}
|
||||||
|
|
||||||
|
Not all `throw 0` statements should become exceptions.
|
||||||
|
For example, here's code from the previous version of
|
||||||
|
the compiler:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/definition.cpp" 123 127 >}}
|
||||||
|
|
||||||
|
If a definition `def_defn` has a dependency on a "nearby" (declared
|
||||||
|
in the same group) definition called `dependency`, and if
|
||||||
|
`dependency` does not exist within the same definition group,
|
||||||
|
we throw an exception. But this error is impossible
|
||||||
|
for a user to trigger: the only reason for a variable to appear
|
||||||
|
in the `nearby_variables` vector is that it was previously
|
||||||
|
found in the definition group. Here's the code that proves this
|
||||||
|
(from the current version of the compiler):
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/definition.cpp" 102 106 >}}
|
||||||
|
|
||||||
|
Not being able to find the variable in the definition group
|
||||||
|
is a compiler bug, and should never occur. So, instead
|
||||||
|
of throwing an exception, we'll use an assertion:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/definition.cpp" 128 128 >}}
|
||||||
|
|
||||||
|
For more complicated error messages, we can use a `stringstream`.
|
||||||
|
Here's an example from `parsed_type`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parsed_type.cpp" 16 23 >}}
|
||||||
|
|
||||||
|
In general, this change is also rather mechanical, but, to
|
||||||
|
maintain a balance between exceptions and assertions, here
|
||||||
|
are a couple more assertions from `type_env`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/type_env.cpp" 77 78 >}}
|
||||||
|
|
||||||
|
Once again, it should not be possible for the compiler
|
||||||
|
to try generalize the type of a variable that doesn't
|
||||||
|
exist, and nor should generalization occur twice.
|
||||||
|
|
||||||
|
While we're on the topic of types, let's talk about
|
||||||
|
`type_mgr::unify`. In practice, I suspect that a lot of
|
||||||
|
errors in our compiler will originate from this method.
|
||||||
|
However, at present, this method does not in any way
|
||||||
|
track the locations of where a unification error occurred.
|
||||||
|
To fix this, we add a new `loc` parameter to `unify`,
|
||||||
|
which we make optional to allow for unification without
|
||||||
|
a known location. Here's the declaration:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/type.hpp" 101 101 >}}
|
||||||
|
|
||||||
|
The change to the implementation is mechanical and repetitive,
|
||||||
|
so instead of showing you the whole method, I'll settle for
|
||||||
|
a couple of lines:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/type.cpp" 119 121 >}}
|
||||||
|
|
||||||
|
We want to make sure that a location provided to the
|
||||||
|
top-level call to `unify` is also forwarded to the
|
||||||
|
recursive calls, so we have to explicitly add it
|
||||||
|
to the call.
|
||||||
|
|
||||||
|
With all of that done, we can finally stand back and
|
||||||
|
marvel at the results of our hard work. Here is what a
|
||||||
|
basic unification error looks like now:
|
||||||
|
|
||||||
|
{{< figure src="unification_error.png" caption="The result of a unification error." >}}
|
||||||
|
|
||||||
|
I used an image to show colors, but here is the content of the error in textual form:
|
||||||
|
|
||||||
|
```
|
||||||
|
an error occured while checking the types of the program: failed to unify types
|
||||||
|
occuring on line 2:
|
||||||
|
3 + False
|
||||||
|
the expected type was:
|
||||||
|
!Int
|
||||||
|
while the actual type was:
|
||||||
|
!Bool
|
||||||
|
```
|
||||||
|
|
||||||
|
The exclamation marks in front of the two types are due to some
|
||||||
|
changes from section 2. Here's an error that was previously
|
||||||
|
a `throw 0` statement in our code:
|
||||||
|
|
||||||
|
```
|
||||||
|
an error occured while compiling the program: type variable a used twice in data type definition.
|
||||||
|
occuring on line 1:
|
||||||
|
data Pair a a = { MkPair a a }
|
||||||
|
```
|
||||||
|
|
||||||
|
Now, not only have we eliminated the lazy uses of `throw 0` in our
|
||||||
|
code, but we've also improved the presentation of the errors
|
||||||
|
to the user!
|
||||||
|
|
Loading…
Reference in New Issue
Block a user