Compare commits
55 Commits
8bf67c7dc3
...
search
| Author | SHA1 | Date | |
|---|---|---|---|
| e0451d026c | |||
| 1f1345477f | |||
| 44529e872f | |||
| a10996954e | |||
| 4d1dfb5f66 | |||
| f97b624688 | |||
| 8215c59122 | |||
| eb97bd9c3e | |||
| d2e100fe4b | |||
| de09a1f6bd | |||
| c40672e762 | |||
| 565d4a6955 | |||
| 8f0f2eb35e | |||
| 234b795157 | |||
| e317c56c99 | |||
| 29d12a9914 | |||
| b459e9cbfe | |||
| 52abe73ef7 | |||
| f0fe481bcf | |||
| 222446a937 | |||
| e7edd43034 | |||
| 2bc2c282e1 | |||
| 5cc92d3a9d | |||
| 4be8a25699 | |||
| d3421733e1 | |||
| 4c099a54e8 | |||
| 9f77f07ed2 | |||
| 04ab1a137c | |||
| 53744ac772 | |||
| 50a1c33adb | |||
| d153af5212 | |||
| a336b27b6c | |||
| 97eb4b6e3e | |||
| 430768eac5 | |||
| 5db864881a | |||
| d3b1047d37 | |||
| 98cac103c4 | |||
| 7226d66f67 | |||
| 8a352ed3ea | |||
| 02f8306c7b | |||
| cf6f353f20 | |||
| 7a631b3557 | |||
| 5e13047846 | |||
| c17d532802 | |||
| 55e4e61906 | |||
| f2f88ab9ca | |||
| ba418d357f | |||
| 0e3f16139d | |||
| 55486d511f | |||
| 6080094c41 | |||
| 6b8d3b0f8a | |||
| 725958137a | |||
| 1f6b4bef74 | |||
| fe1e0a6de0 | |||
| 1f3c42fc44 |
11
assets/scss/gametheory.scss
Normal file
11
assets/scss/gametheory.scss
Normal file
@@ -0,0 +1,11 @@
|
||||
@import "variables.scss";
|
||||
@import "mixins.scss";
|
||||
|
||||
.assumption-number {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.assumption {
|
||||
@include bordered-block;
|
||||
padding: 0.8rem;
|
||||
}
|
||||
102
code/aoc-coq/day1.v
Normal file
102
code/aoc-coq/day1.v
Normal file
@@ -0,0 +1,102 @@
|
||||
Require Import Coq.Lists.List.
|
||||
Require Import Omega.
|
||||
|
||||
Definition has_pair (t : nat) (is : list nat) : Prop :=
|
||||
exists n1 n2 : nat, n1 <> n2 /\ In n1 is /\ In n2 is /\ n1 + n2 = t.
|
||||
|
||||
Fixpoint find_matching (is : list nat) (total : nat) (x : nat) : option nat :=
|
||||
match is with
|
||||
| nil => None
|
||||
| cons y ys =>
|
||||
if Nat.eqb (x + y) total
|
||||
then Some y
|
||||
else find_matching ys total x
|
||||
end.
|
||||
|
||||
Fixpoint find_sum (is : list nat) (total : nat) : option (nat * nat) :=
|
||||
match is with
|
||||
| nil => None
|
||||
| cons x xs =>
|
||||
match find_matching xs total x with
|
||||
| None => find_sum xs total (* Was buggy! *)
|
||||
| Some y => Some (x, y)
|
||||
end
|
||||
end.
|
||||
|
||||
Lemma find_matching_correct : forall is k x y,
|
||||
find_matching is k x = Some y -> x + y = k.
|
||||
Proof.
|
||||
intros is. induction is;
|
||||
intros k x y Hev.
|
||||
- simpl in Hev. inversion Hev.
|
||||
- simpl in Hev. destruct (Nat.eqb (x+a) k) eqn:Heq.
|
||||
+ injection Hev as H; subst.
|
||||
apply EqNat.beq_nat_eq. auto.
|
||||
+ apply IHis. assumption.
|
||||
Qed.
|
||||
|
||||
Lemma find_matching_skip : forall k x y i is,
|
||||
find_matching is k x = Some y -> find_matching (cons i is) k x = Some y.
|
||||
Proof.
|
||||
intros k x y i is Hsmall.
|
||||
simpl. destruct (Nat.eqb (x+i) k) eqn:Heq.
|
||||
- apply find_matching_correct in Hsmall.
|
||||
symmetry in Heq. apply EqNat.beq_nat_eq in Heq.
|
||||
assert (i = y). { omega. } rewrite H. reflexivity.
|
||||
- assumption.
|
||||
Qed.
|
||||
|
||||
Lemma find_matching_works : forall is k x y, In y is /\ x + y = k ->
|
||||
find_matching is k x = Some y.
|
||||
Proof.
|
||||
intros is. induction is;
|
||||
intros k x y [Hin Heq].
|
||||
- inversion Hin.
|
||||
- inversion Hin.
|
||||
+ subst a. simpl. Search Nat.eqb.
|
||||
destruct (Nat.eqb_spec (x+y) k).
|
||||
* reflexivity.
|
||||
* exfalso. apply n. assumption.
|
||||
+ apply find_matching_skip. apply IHis.
|
||||
split; assumption.
|
||||
Qed.
|
||||
|
||||
Theorem find_sum_works :
|
||||
forall k is, has_pair k is ->
|
||||
exists x y, (find_sum is k = Some (x, y) /\ x + y = k).
|
||||
Proof.
|
||||
intros k is. generalize dependent k.
|
||||
induction is; intros k [x' [y' [Hneq [Hinx [Hiny Hsum]]]]].
|
||||
- (* is is empty. But x is in is! *)
|
||||
inversion Hinx.
|
||||
- (* is is not empty. *)
|
||||
inversion Hinx.
|
||||
+ (* x is the first element. *)
|
||||
subst a. inversion Hiny.
|
||||
* (* y is also the first element; but this is impossible! *)
|
||||
exfalso. apply Hneq. apply H.
|
||||
* (* y is somewhere in the rest of the list.
|
||||
We've proven that we will find it! *)
|
||||
exists x'. simpl.
|
||||
erewrite find_matching_works.
|
||||
{ exists y'. split. reflexivity. assumption. }
|
||||
{ split; assumption. }
|
||||
+ (* x is not the first element. *)
|
||||
inversion Hiny.
|
||||
* (* y is the first element,
|
||||
so x is somewhere in the rest of the list.
|
||||
Again, we've proven that we can find it. *)
|
||||
subst a. exists y'. simpl.
|
||||
erewrite find_matching_works.
|
||||
{ exists x'. split. reflexivity. rewrite plus_comm. assumption. }
|
||||
{ split. assumption. rewrite plus_comm. assumption. }
|
||||
* (* y is not the first element, either.
|
||||
Of course, there could be another matching pair
|
||||
starting with a. Otherwise, the inductive hypothesis applies. *)
|
||||
simpl. destruct (find_matching is k a) eqn:Hf.
|
||||
{ exists a. exists n. split.
|
||||
reflexivity.
|
||||
apply find_matching_correct with is. assumption. }
|
||||
{ apply IHis. unfold has_pair. exists x'. exists y'.
|
||||
repeat split; assumption. }
|
||||
Qed.
|
||||
@@ -37,6 +37,9 @@ add_executable(compiler
|
||||
instruction.cpp instruction.hpp
|
||||
graph.cpp graph.hpp
|
||||
global_scope.cpp global_scope.hpp
|
||||
parse_driver.cpp parse_driver.hpp
|
||||
mangler.cpp mangler.hpp
|
||||
compiler.cpp compiler.hpp
|
||||
${BISON_parser_OUTPUTS}
|
||||
${FLEX_scanner_OUTPUTS}
|
||||
main.cpp
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
#include <type_traits>
|
||||
#include "binop.hpp"
|
||||
#include "error.hpp"
|
||||
#include "instruction.hpp"
|
||||
#include "type.hpp"
|
||||
#include "type_env.hpp"
|
||||
#include "env.hpp"
|
||||
|
||||
@@ -45,7 +47,7 @@ type_ptr ast_lid::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = env;
|
||||
type_scheme_ptr lid_type = env->lookup(id);
|
||||
if(!lid_type)
|
||||
throw type_error(std::string("unknown identifier ") + id, loc);
|
||||
throw type_error("unknown identifier " + id, loc);
|
||||
return lid_type->instantiate(mgr);
|
||||
}
|
||||
|
||||
@@ -54,15 +56,10 @@ void ast_lid::translate(global_scope& scope) {
|
||||
}
|
||||
|
||||
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
auto mangled_name = this->env->get_mangled_name(id);
|
||||
|
||||
// Local names shouldn't need mangling.
|
||||
assert(!(mangled_name != id && !this->env->is_global(id)));
|
||||
|
||||
into.push_back(instruction_ptr(
|
||||
(env->has_variable(mangled_name) && !this->env->is_global(id)) ?
|
||||
(instruction*) new instruction_push(env->get_offset(id)) :
|
||||
(instruction*) new instruction_pushglobal(mangled_name)));
|
||||
(this->env->is_global(id)) ?
|
||||
(instruction*) new instruction_pushglobal(this->env->get_mangled_name(id)) :
|
||||
(instruction*) new instruction_push(env->get_offset(id))));
|
||||
}
|
||||
|
||||
void ast_uid::print(int indent, std::ostream& to) const {
|
||||
@@ -78,7 +75,7 @@ type_ptr ast_uid::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = env;
|
||||
type_scheme_ptr uid_type = env->lookup(id);
|
||||
if(!uid_type)
|
||||
throw type_error(std::string("unknown constructor ") + id, loc);
|
||||
throw type_error("unknown constructor " + id, loc);
|
||||
return uid_type->instantiate(mgr);
|
||||
}
|
||||
|
||||
@@ -108,7 +105,7 @@ type_ptr ast_binop::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
type_ptr ltype = left->typecheck(mgr, env);
|
||||
type_ptr rtype = right->typecheck(mgr, env);
|
||||
type_ptr ftype = env->lookup(op_name(op))->instantiate(mgr);
|
||||
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op), loc);
|
||||
if(!ftype) throw type_error("unknown binary operator " + op_name(op), loc);
|
||||
|
||||
// For better type errors, we first require binary function,
|
||||
// and only then unify each argument. This way, we can
|
||||
@@ -215,6 +212,10 @@ type_ptr ast_case::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
|
||||
input_type = mgr.resolve(case_type, var);
|
||||
type_app* app_type;
|
||||
if(!(app_type = dynamic_cast<type_app*>(input_type.get())) ||
|
||||
!dynamic_cast<type_data*>(app_type->constructor.get())) {
|
||||
throw type_error("attempting case analysis of non-data type");
|
||||
}
|
||||
|
||||
return branch_type;
|
||||
}
|
||||
@@ -226,209 +227,60 @@ void ast_case::translate(global_scope& scope) {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct case_mappings {
|
||||
using tag_type = typename T::tag_type;
|
||||
std::map<tag_type, std::vector<instruction_ptr>> defined_cases;
|
||||
std::optional<std::vector<instruction_ptr>> default_case;
|
||||
|
||||
std::vector<instruction_ptr>& make_case_for(tag_type tag) {
|
||||
if(default_case)
|
||||
throw type_error("attempted pattern match after catch-all");
|
||||
return defined_cases[tag];
|
||||
}
|
||||
|
||||
std::vector<instruction_ptr>& make_default_case() {
|
||||
if(default_case)
|
||||
throw type_error("attempted repeated use of catch-all");
|
||||
default_case.emplace(std::vector<instruction_ptr>());
|
||||
return *default_case;
|
||||
}
|
||||
|
||||
std::vector<instruction_ptr>& get_specific_case_for(tag_type tag) {
|
||||
auto existing_case = defined_cases.find(tag);
|
||||
assert(existing_case != defined_cases.end());
|
||||
return existing_case->second;
|
||||
}
|
||||
|
||||
std::vector<instruction_ptr>& get_default_case() {
|
||||
assert(default_case);
|
||||
return *default_case;
|
||||
}
|
||||
|
||||
std::vector<instruction_ptr>& get_case_for(tag_type tag) {
|
||||
if(case_defined_for(tag)) return get_specific_case_for(tag);
|
||||
return get_default_case();
|
||||
}
|
||||
|
||||
bool case_defined_for(tag_type tag) {
|
||||
return defined_cases.find(tag) != defined_cases.end();
|
||||
}
|
||||
|
||||
bool default_case_defined() { return default_case.has_value(); }
|
||||
|
||||
size_t defined_cases_count() { return defined_cases.size(); }
|
||||
};
|
||||
|
||||
|
||||
struct case_strategy_bool {
|
||||
using tag_type = bool;
|
||||
using repr_type = bool;
|
||||
|
||||
tag_type tag_from_repr(repr_type b) { return b; }
|
||||
|
||||
repr_type from_typed_pattern(const pattern_ptr& pt, const type* type) {
|
||||
pattern_constr* cpat;
|
||||
if(!(cpat = dynamic_cast<pattern_constr*>(pt.get())) ||
|
||||
(cpat->constr != "True" && cpat->constr != "False") ||
|
||||
cpat->params.size() != 0)
|
||||
throw type_error(
|
||||
"pattern cannot be converted to a boolean",
|
||||
pt->loc);
|
||||
return cpat->constr == "True";
|
||||
}
|
||||
|
||||
void compile_branch(
|
||||
const branch_ptr& branch,
|
||||
const env_ptr& env,
|
||||
repr_type repr,
|
||||
std::vector<instruction_ptr>& into) {
|
||||
branch->expr->compile(env_ptr(new env_offset(1, env)), into);
|
||||
into.push_back(instruction_ptr(new instruction_slide(1)));
|
||||
}
|
||||
|
||||
size_t case_count(const type* type) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
void into_instructions(
|
||||
const type* type,
|
||||
case_mappings<case_strategy_bool>& ms,
|
||||
std::vector<instruction_ptr>& into) {
|
||||
if(ms.defined_cases_count() == 0) {
|
||||
for(auto& instruction : ms.get_default_case())
|
||||
into.push_back(std::move(instruction));
|
||||
return;
|
||||
}
|
||||
|
||||
into.push_back(instruction_ptr(new instruction_if(
|
||||
std::move(ms.get_case_for(true)),
|
||||
std::move(ms.get_case_for(false)))));
|
||||
}
|
||||
};
|
||||
|
||||
struct case_strategy_data {
|
||||
using tag_type = int;
|
||||
using repr_type = std::pair<const type_data::constructor*, const std::vector<std::string>*>;
|
||||
|
||||
tag_type tag_from_repr(const repr_type& repr) { return repr.first->tag; }
|
||||
|
||||
repr_type from_typed_pattern(const pattern_ptr& pt, const type* type) {
|
||||
pattern_constr* cpat;
|
||||
if(!(cpat = dynamic_cast<pattern_constr*>(pt.get())))
|
||||
throw type_error(
|
||||
"pattern cannot be interpreted as constructor.",
|
||||
pt->loc);
|
||||
return std::make_pair(
|
||||
&static_cast<const type_data*>(type)->constructors.find(cpat->constr)->second,
|
||||
&cpat->params);
|
||||
}
|
||||
|
||||
void compile_branch(
|
||||
const branch_ptr& branch,
|
||||
const env_ptr& env,
|
||||
const repr_type& repr,
|
||||
std::vector<instruction_ptr>& into) {
|
||||
env_ptr new_env = env;
|
||||
for(auto it = repr.second->rbegin(); it != repr.second->rend(); it++) {
|
||||
new_env = env_ptr(new env_var(*it, new_env));
|
||||
}
|
||||
|
||||
into.push_back(instruction_ptr(new instruction_split(repr.second->size())));
|
||||
branch->expr->compile(new_env, into);
|
||||
into.push_back(instruction_ptr(new instruction_slide(repr.second->size())));
|
||||
}
|
||||
|
||||
size_t case_count(const type* type) {
|
||||
return static_cast<const type_data*>(type)->constructors.size();
|
||||
}
|
||||
|
||||
void into_instructions(
|
||||
const type* type,
|
||||
case_mappings<case_strategy_data>& ms,
|
||||
std::vector<instruction_ptr>& into) {
|
||||
instruction_jump* jump_instruction = new instruction_jump();
|
||||
instruction_ptr inst(jump_instruction);
|
||||
|
||||
auto data_type = static_cast<const type_data*>(type);
|
||||
for(auto& constr : data_type->constructors) {
|
||||
if(!ms.case_defined_for(constr.second.tag)) continue;
|
||||
jump_instruction->branches.push_back(
|
||||
std::move(ms.get_specific_case_for(constr.second.tag)));
|
||||
jump_instruction->tag_mappings[constr.second.tag] =
|
||||
jump_instruction->branches.size() - 1;
|
||||
}
|
||||
|
||||
if(ms.default_case_defined()) {
|
||||
jump_instruction->branches.push_back(
|
||||
std::move(ms.get_default_case()));
|
||||
for(auto& constr : data_type->constructors) {
|
||||
if(ms.case_defined_for(constr.second.tag)) continue;
|
||||
jump_instruction->tag_mappings[constr.second.tag] =
|
||||
jump_instruction->branches.size();
|
||||
}
|
||||
}
|
||||
|
||||
into.push_back(std::move(inst));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void compile_case(const ast_case& node, const env_ptr& env, const type* type, std::vector<instruction_ptr>& into) {
|
||||
T strategy;
|
||||
case_mappings<T> cases;
|
||||
for(auto& branch : node.branches) {
|
||||
pattern_var* vpat;
|
||||
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
|
||||
if(cases.defined_cases_count() == strategy.case_count(type))
|
||||
throw type_error("redundant catch-all pattern", branch->pat->loc);
|
||||
auto& branch_into = cases.make_default_case();
|
||||
env_ptr new_env(new env_var(vpat->var, env));
|
||||
branch->expr->compile(new_env, branch_into);
|
||||
branch_into.push_back(instruction_ptr(new instruction_slide(1)));
|
||||
} else {
|
||||
auto repr = strategy.from_typed_pattern(branch->pat, type);
|
||||
auto& branch_into = cases.make_case_for(strategy.tag_from_repr(repr));
|
||||
strategy.compile_branch(branch, env, repr, branch_into);
|
||||
}
|
||||
}
|
||||
|
||||
if(!(cases.defined_cases_count() == strategy.case_count(type) ||
|
||||
cases.default_case_defined()))
|
||||
throw type_error("incomplete patterns", node.loc);
|
||||
|
||||
strategy.into_instructions(type, cases, into);
|
||||
}
|
||||
|
||||
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
type_app* app_type = dynamic_cast<type_app*>(input_type.get());
|
||||
type_data* data;
|
||||
type_internal* internal;
|
||||
type_data* type = dynamic_cast<type_data*>(app_type->constructor.get());
|
||||
|
||||
of->compile(env, into);
|
||||
into.push_back(instruction_ptr(new instruction_eval()));
|
||||
|
||||
if(app_type && (data = dynamic_cast<type_data*>(app_type->constructor.get()))) {
|
||||
compile_case<case_strategy_data>(*this, env, data, into);
|
||||
return;
|
||||
} else if(app_type && (internal = dynamic_cast<type_internal*>(app_type->constructor.get()))) {
|
||||
if(internal->name == "Bool") {
|
||||
compile_case<case_strategy_bool>(*this, env, data, into);
|
||||
return;
|
||||
instruction_jump* jump_instruction = new instruction_jump();
|
||||
into.push_back(instruction_ptr(jump_instruction));
|
||||
for(auto& branch : branches) {
|
||||
std::vector<instruction_ptr> branch_instructions;
|
||||
pattern_var* vpat;
|
||||
pattern_constr* cpat;
|
||||
|
||||
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
|
||||
branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions);
|
||||
|
||||
for(auto& constr_pair : type->constructors) {
|
||||
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) !=
|
||||
jump_instruction->tag_mappings.end())
|
||||
break;
|
||||
|
||||
jump_instruction->tag_mappings[constr_pair.second.tag] =
|
||||
jump_instruction->branches.size();
|
||||
}
|
||||
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
|
||||
env_ptr new_env = env;
|
||||
for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) {
|
||||
new_env = env_ptr(new env_var(*it, new_env));
|
||||
}
|
||||
|
||||
branch_instructions.push_back(instruction_ptr(new instruction_split(
|
||||
cpat->params.size())));
|
||||
branch->expr->compile(new_env, branch_instructions);
|
||||
branch_instructions.push_back(instruction_ptr(new instruction_slide(
|
||||
cpat->params.size())));
|
||||
|
||||
int new_tag = type->constructors[cpat->constr].tag;
|
||||
if(jump_instruction->tag_mappings.find(new_tag) !=
|
||||
jump_instruction->tag_mappings.end())
|
||||
throw type_error("technically not a type error: duplicate pattern");
|
||||
|
||||
jump_instruction->tag_mappings[new_tag] =
|
||||
jump_instruction->branches.size();
|
||||
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||
}
|
||||
}
|
||||
|
||||
throw type_error("attempting unsupported case analysis", of->loc);
|
||||
for(auto& constr_pair : type->constructors) {
|
||||
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) ==
|
||||
jump_instruction->tag_mappings.end())
|
||||
throw type_error("non-total pattern");
|
||||
}
|
||||
}
|
||||
|
||||
void ast_let::print(int indent, std::ostream& to) const {
|
||||
@@ -586,7 +438,7 @@ void pattern_constr::find_variables(std::set<std::string>& into) const {
|
||||
void pattern_constr::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const {
|
||||
type_scheme_ptr constructor_type_scheme = env->lookup(constr);
|
||||
if(!constructor_type_scheme) {
|
||||
throw type_error(std::string("pattern using unknown constructor ") + constr, loc);
|
||||
throw type_error("pattern using unknown constructor " + constr, loc);
|
||||
}
|
||||
type_ptr constructor_type = constructor_type_scheme->instantiate(mgr);
|
||||
|
||||
|
||||
@@ -6,9 +6,6 @@ std::string op_name(binop op) {
|
||||
case MINUS: return "-";
|
||||
case TIMES: return "*";
|
||||
case DIVIDE: return "/";
|
||||
case MODULO: return "%";
|
||||
case EQUALS: return "==";
|
||||
case LESS_EQUALS: return "<=";
|
||||
}
|
||||
return "??";
|
||||
}
|
||||
@@ -19,9 +16,6 @@ std::string op_action(binop op) {
|
||||
case MINUS: return "minus";
|
||||
case TIMES: return "times";
|
||||
case DIVIDE: return "divide";
|
||||
case MODULO: return "modulo";
|
||||
case EQUALS: return "equals";
|
||||
case LESS_EQUALS: return "less_equals";
|
||||
}
|
||||
return "??";
|
||||
}
|
||||
|
||||
@@ -1,14 +1,16 @@
|
||||
#pragma once
|
||||
#include <array>
|
||||
#include <string>
|
||||
|
||||
enum binop {
|
||||
PLUS,
|
||||
MINUS,
|
||||
TIMES,
|
||||
DIVIDE,
|
||||
MODULO,
|
||||
EQUALS,
|
||||
LESS_EQUALS,
|
||||
DIVIDE
|
||||
};
|
||||
|
||||
constexpr binop all_binops[] = {
|
||||
PLUS, MINUS, TIMES, DIVIDE
|
||||
};
|
||||
|
||||
std::string op_name(binop op);
|
||||
|
||||
153
code/compiler/13/compiler.cpp
Normal file
153
code/compiler/13/compiler.cpp
Normal file
@@ -0,0 +1,153 @@
|
||||
#include "compiler.hpp"
|
||||
#include "binop.hpp"
|
||||
#include "error.hpp"
|
||||
#include "global_scope.hpp"
|
||||
#include "parse_driver.hpp"
|
||||
#include "type.hpp"
|
||||
#include "type_env.hpp"
|
||||
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
#include "llvm/IR/Verifier.h"
|
||||
#include "llvm/Support/TargetSelect.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
void compiler::add_default_types() {
|
||||
global_env->bind_type("Int", type_ptr(new type_base("Int")));
|
||||
}
|
||||
|
||||
void compiler::add_binop_type(binop op, type_ptr type) {
|
||||
auto name = mng.new_mangled_name(op_action(op));
|
||||
global_env->bind(op_name(op), std::move(type), visibility::global);
|
||||
global_env->set_mangled_name(op_name(op), name);
|
||||
}
|
||||
|
||||
void compiler::add_default_function_types() {
|
||||
type_ptr int_type = global_env->lookup_type("Int");
|
||||
assert(int_type != nullptr);
|
||||
type_ptr int_type_app = type_ptr(new type_app(int_type));
|
||||
|
||||
type_ptr closed_int_op_type(
|
||||
new type_arr(int_type_app, type_ptr(new type_arr(int_type_app, int_type_app))));
|
||||
|
||||
constexpr binop closed_ops[] = { PLUS, MINUS, TIMES, DIVIDE };
|
||||
for(auto& op : closed_ops) add_binop_type(op, closed_int_op_type);
|
||||
}
|
||||
|
||||
void compiler::parse() {
|
||||
if(!driver())
|
||||
throw compiler_error("failed to open file");
|
||||
}
|
||||
|
||||
void compiler::typecheck() {
|
||||
std::set<std::string> free_variables;
|
||||
global_defs.find_free(free_variables);
|
||||
global_defs.typecheck(type_m, global_env);
|
||||
}
|
||||
|
||||
void compiler::translate() {
|
||||
for(auto& data : global_defs.defs_data) {
|
||||
data.second->into_globals(global_scp);
|
||||
}
|
||||
for(auto& defn : global_defs.defs_defn) {
|
||||
auto& function = defn.second->into_global(global_scp);
|
||||
defn.second->env->set_mangled_name(defn.first, function.name);
|
||||
}
|
||||
}
|
||||
|
||||
void compiler::compile() {
|
||||
global_scp.compile();
|
||||
}
|
||||
|
||||
void compiler::create_llvm_binop(binop op) {
|
||||
auto new_function =
|
||||
ctx.create_custom_function(global_env->get_mangled_name(op_name(op)), 2);
|
||||
std::vector<instruction_ptr> instructions;
|
||||
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||
instructions.push_back(instruction_ptr(new instruction_binop(op)));
|
||||
instructions.push_back(instruction_ptr(new instruction_update(2)));
|
||||
instructions.push_back(instruction_ptr(new instruction_pop(2)));
|
||||
ctx.get_builder().SetInsertPoint(&new_function->getEntryBlock());
|
||||
for(auto& instruction : instructions) {
|
||||
instruction->gen_llvm(ctx, new_function);
|
||||
}
|
||||
ctx.get_builder().CreateRetVoid();
|
||||
}
|
||||
|
||||
void compiler::generate_llvm() {
|
||||
for(auto op : all_binops) {
|
||||
create_llvm_binop(op);
|
||||
}
|
||||
|
||||
global_scp.generate_llvm(ctx);
|
||||
}
|
||||
|
||||
void compiler::output_llvm(const std::string& into) {
|
||||
std::string targetTriple = llvm::sys::getDefaultTargetTriple();
|
||||
|
||||
llvm::InitializeNativeTarget();
|
||||
llvm::InitializeNativeTargetAsmParser();
|
||||
llvm::InitializeNativeTargetAsmPrinter();
|
||||
|
||||
std::string error;
|
||||
const llvm::Target* target =
|
||||
llvm::TargetRegistry::lookupTarget(targetTriple, error);
|
||||
if (!target) {
|
||||
std::cerr << error << std::endl;
|
||||
} else {
|
||||
std::string cpu = "generic";
|
||||
std::string features = "";
|
||||
llvm::TargetOptions options;
|
||||
std::unique_ptr<llvm::TargetMachine> targetMachine(
|
||||
target->createTargetMachine(targetTriple, cpu, features,
|
||||
options, llvm::Optional<llvm::Reloc::Model>()));
|
||||
|
||||
ctx.get_module().setDataLayout(targetMachine->createDataLayout());
|
||||
ctx.get_module().setTargetTriple(targetTriple);
|
||||
|
||||
std::error_code ec;
|
||||
llvm::raw_fd_ostream file(into, ec, llvm::sys::fs::F_None);
|
||||
if (ec) {
|
||||
throw compiler_error("failed to open object file for writing");
|
||||
} else {
|
||||
llvm::CodeGenFileType type = llvm::CGFT_ObjectFile;
|
||||
llvm::legacy::PassManager pm;
|
||||
if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) {
|
||||
throw compiler_error("failed to add passes to pass manager");
|
||||
} else {
|
||||
pm.run(ctx.get_module());
|
||||
file.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
compiler::compiler(const std::string& filename)
|
||||
: file_m(), global_defs(), driver(file_m, global_defs, filename),
|
||||
global_env(new type_env), type_m(), mng(), global_scp(mng), ctx() {
|
||||
add_default_types();
|
||||
add_default_function_types();
|
||||
}
|
||||
|
||||
void compiler::operator()(const std::string& into) {
|
||||
parse();
|
||||
typecheck();
|
||||
translate();
|
||||
compile();
|
||||
generate_llvm();
|
||||
output_llvm(into);
|
||||
}
|
||||
|
||||
file_mgr& compiler::get_file_manager() {
|
||||
return file_m;
|
||||
}
|
||||
|
||||
type_mgr& compiler::get_type_manager() {
|
||||
return type_m;
|
||||
}
|
||||
37
code/compiler/13/compiler.hpp
Normal file
37
code/compiler/13/compiler.hpp
Normal file
@@ -0,0 +1,37 @@
|
||||
#pragma once
|
||||
#include "binop.hpp"
|
||||
#include "parse_driver.hpp"
|
||||
#include "definition.hpp"
|
||||
#include "type_env.hpp"
|
||||
#include "type.hpp"
|
||||
#include "global_scope.hpp"
|
||||
#include "mangler.hpp"
|
||||
#include "llvm_context.hpp"
|
||||
|
||||
class compiler {
|
||||
private:
|
||||
file_mgr file_m;
|
||||
definition_group global_defs;
|
||||
parse_driver driver;
|
||||
type_env_ptr global_env;
|
||||
type_mgr type_m;
|
||||
mangler mng;
|
||||
global_scope global_scp;
|
||||
llvm_context ctx;
|
||||
|
||||
void add_default_types();
|
||||
void add_binop_type(binop op, type_ptr type);
|
||||
void add_default_function_types();
|
||||
void parse();
|
||||
void typecheck();
|
||||
void translate();
|
||||
void compile();
|
||||
void create_llvm_binop(binop op);
|
||||
void generate_llvm();
|
||||
void output_llvm(const std::string& into);
|
||||
public:
|
||||
compiler(const std::string& filename);
|
||||
void operator()(const std::string& into);
|
||||
file_mgr& get_file_manager();
|
||||
type_mgr& get_type_manager();
|
||||
};
|
||||
@@ -64,9 +64,9 @@ void definition_data::insert_constructors() const {
|
||||
type_ptr return_type(return_app);
|
||||
for(auto& var : vars) {
|
||||
if(var_set.find(var) != var_set.end())
|
||||
throw std::runtime_error(
|
||||
std::string("type variable ") +
|
||||
var + std::string(" used twice in data type definition."));
|
||||
throw compiler_error(
|
||||
"type variable " + var +
|
||||
" used twice in data type definition.", loc);
|
||||
var_set.insert(var);
|
||||
return_app->arguments.push_back(type_ptr(new type_var(var)));
|
||||
}
|
||||
@@ -83,7 +83,7 @@ void definition_data::insert_constructors() const {
|
||||
|
||||
type_scheme_ptr full_scheme(new type_scheme(std::move(full_type)));
|
||||
full_scheme->forall.insert(full_scheme->forall.begin(), vars.begin(), vars.end());
|
||||
env->bind(constructor->name, full_scheme);
|
||||
env->bind(constructor->name, full_scheme, visibility::global);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
int env_var::get_offset(const std::string& name) const {
|
||||
if(name == this->name) return 0;
|
||||
assert(parent);
|
||||
assert(parent != nullptr);
|
||||
return parent->get_offset(name) + 1;
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ bool env_var::has_variable(const std::string& name) const {
|
||||
}
|
||||
|
||||
int env_offset::get_offset(const std::string& name) const {
|
||||
assert(parent);
|
||||
assert(parent != nullptr);
|
||||
return parent->get_offset(name) + offset;
|
||||
}
|
||||
|
||||
|
||||
@@ -2,33 +2,38 @@
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
struct env {
|
||||
virtual ~env() = default;
|
||||
class env {
|
||||
public:
|
||||
virtual ~env() = default;
|
||||
|
||||
virtual int get_offset(const std::string& name) const = 0;
|
||||
virtual bool has_variable(const std::string& name) const = 0;
|
||||
virtual int get_offset(const std::string& name) const = 0;
|
||||
virtual bool has_variable(const std::string& name) const = 0;
|
||||
};
|
||||
|
||||
using env_ptr = std::shared_ptr<env>;
|
||||
|
||||
struct env_var : public env {
|
||||
std::string name;
|
||||
env_ptr parent;
|
||||
class env_var : public env {
|
||||
private:
|
||||
std::string name;
|
||||
env_ptr parent;
|
||||
|
||||
env_var(std::string n, env_ptr p)
|
||||
: name(std::move(n)), parent(std::move(p)) {}
|
||||
public:
|
||||
env_var(std::string n, env_ptr p)
|
||||
: name(std::move(n)), parent(std::move(p)) {}
|
||||
|
||||
int get_offset(const std::string& name) const;
|
||||
bool has_variable(const std::string& name) const;
|
||||
int get_offset(const std::string& name) const;
|
||||
bool has_variable(const std::string& name) const;
|
||||
};
|
||||
|
||||
struct env_offset : public env {
|
||||
int offset;
|
||||
env_ptr parent;
|
||||
class env_offset : public env {
|
||||
private:
|
||||
int offset;
|
||||
env_ptr parent;
|
||||
|
||||
env_offset(int o, env_ptr p)
|
||||
: offset(o), parent(std::move(p)) {}
|
||||
public:
|
||||
env_offset(int o, env_ptr p)
|
||||
: offset(o), parent(std::move(p)) {}
|
||||
|
||||
int get_offset(const std::string& name) const;
|
||||
bool has_variable(const std::string& name) const;
|
||||
int get_offset(const std::string& name) const;
|
||||
bool has_variable(const std::string& name) const;
|
||||
};
|
||||
|
||||
@@ -1,23 +1,36 @@
|
||||
#include "error.hpp"
|
||||
|
||||
const char* compiler_error::what() const noexcept {
|
||||
return "an error occured while compiling the program";
|
||||
}
|
||||
|
||||
void compiler_error::print_about(std::ostream& to) {
|
||||
to << what() << ": ";
|
||||
to << description << std::endl;
|
||||
}
|
||||
|
||||
void compiler_error::print_location(std::ostream& to, file_mgr& fm, bool highlight) {
|
||||
if(!loc) return;
|
||||
to << "occuring on line " << loc->begin.line << ":" << std::endl;
|
||||
fm.print_location(to, *loc, highlight);
|
||||
}
|
||||
|
||||
void compiler_error::pretty_print(std::ostream& to, file_mgr& fm) {
|
||||
print_about(to);
|
||||
print_location(to, fm);
|
||||
}
|
||||
|
||||
const char* type_error::what() const noexcept {
|
||||
return "an error occured while checking the types of the program";
|
||||
}
|
||||
|
||||
void type_error::pretty_print(std::ostream& to, parse_driver& drv) {
|
||||
to << "encountered error while typechecking program: ";
|
||||
to << description << std::endl;
|
||||
|
||||
if(loc) {
|
||||
to << "occuring on line " << loc->begin.line << ":" << std::endl;
|
||||
to << std::endl << "```" << std::endl;
|
||||
drv.print_highlighted_location(to, *loc);
|
||||
to << "```" << std::endl << std::endl;
|
||||
}
|
||||
void type_error::pretty_print(std::ostream& to, file_mgr& fm) {
|
||||
print_about(to);
|
||||
print_location(to, fm, true);
|
||||
}
|
||||
|
||||
void unification_error::pretty_print(std::ostream& to, parse_driver& drv, type_mgr& mgr) {
|
||||
type_error::pretty_print(to, drv);
|
||||
void unification_error::pretty_print(std::ostream& to, file_mgr& fm, type_mgr& mgr) {
|
||||
type_error::pretty_print(to, fm);
|
||||
to << "the expected type was:" << std::endl;
|
||||
to << " \033[34m";
|
||||
left->print(mgr, to);
|
||||
|
||||
@@ -7,24 +7,43 @@
|
||||
|
||||
using maybe_location = std::optional<yy::location>;
|
||||
|
||||
struct type_error : std::exception {
|
||||
std::string description;
|
||||
std::optional<yy::location> loc;
|
||||
class compiler_error : std::exception {
|
||||
private:
|
||||
std::string description;
|
||||
maybe_location loc;
|
||||
|
||||
type_error(std::string d, maybe_location l = std::nullopt)
|
||||
: description(std::move(d)), loc(std::move(l)) {}
|
||||
public:
|
||||
compiler_error(std::string d, maybe_location l = std::nullopt)
|
||||
: description(std::move(d)), loc(std::move(l)) {}
|
||||
|
||||
const char* what() const noexcept override;
|
||||
void pretty_print(std::ostream& to, parse_driver& drv);
|
||||
const char* what() const noexcept override;
|
||||
|
||||
void print_about(std::ostream& to);
|
||||
void print_location(std::ostream& to, file_mgr& fm, bool highlight = false);
|
||||
|
||||
void pretty_print(std::ostream& to, file_mgr& fm);
|
||||
};
|
||||
|
||||
struct unification_error : public type_error {
|
||||
type_ptr left;
|
||||
type_ptr right;
|
||||
class type_error : compiler_error {
|
||||
private:
|
||||
|
||||
unification_error(type_ptr l, type_ptr r, maybe_location loc = std::nullopt)
|
||||
: left(std::move(l)), right(std::move(r)),
|
||||
public:
|
||||
type_error(std::string d, maybe_location l = std::nullopt)
|
||||
: compiler_error(std::move(d), std::move(l)) {}
|
||||
|
||||
const char* what() const noexcept override;
|
||||
void pretty_print(std::ostream& to, file_mgr& fm);
|
||||
};
|
||||
|
||||
class unification_error : public type_error {
|
||||
private:
|
||||
type_ptr left;
|
||||
type_ptr right;
|
||||
|
||||
public:
|
||||
unification_error(type_ptr l, type_ptr r, maybe_location loc = std::nullopt)
|
||||
: left(std::move(l)), right(std::move(r)),
|
||||
type_error("failed to unify types", std::move(loc)) {}
|
||||
|
||||
void pretty_print(std::ostream& to, parse_driver& drv, type_mgr& mgr);
|
||||
void pretty_print(std::ostream& to, file_mgr& fm, type_mgr& mgr);
|
||||
};
|
||||
|
||||
@@ -16,11 +16,11 @@ void global_function::declare_llvm(llvm_context& ctx) {
|
||||
}
|
||||
|
||||
void global_function::generate_llvm(llvm_context& ctx) {
|
||||
ctx.builder.SetInsertPoint(&generated_function->getEntryBlock());
|
||||
ctx.get_builder().SetInsertPoint(&generated_function->getEntryBlock());
|
||||
for(auto& instruction : instructions) {
|
||||
instruction->gen_llvm(ctx, generated_function);
|
||||
}
|
||||
ctx.builder.CreateRetVoid();
|
||||
ctx.get_builder().CreateRetVoid();
|
||||
}
|
||||
|
||||
void global_constructor::generate_llvm(llvm_context& ctx) {
|
||||
@@ -29,21 +29,30 @@ void global_constructor::generate_llvm(llvm_context& ctx) {
|
||||
std::vector<instruction_ptr> instructions;
|
||||
instructions.push_back(instruction_ptr(new instruction_pack(tag, arity)));
|
||||
instructions.push_back(instruction_ptr(new instruction_update(0)));
|
||||
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
||||
ctx.get_builder().SetInsertPoint(&new_function->getEntryBlock());
|
||||
for (auto& instruction : instructions) {
|
||||
instruction->gen_llvm(ctx, new_function);
|
||||
}
|
||||
ctx.builder.CreateRetVoid();
|
||||
ctx.get_builder().CreateRetVoid();
|
||||
}
|
||||
|
||||
global_function& global_scope::add_function(std::string n, std::vector<std::string> ps, ast_ptr b) {
|
||||
global_function* new_function = new global_function(mangle_name(n), std::move(ps), std::move(b));
|
||||
global_function& global_scope::add_function(
|
||||
const std::string& n,
|
||||
std::vector<std::string> ps,
|
||||
ast_ptr b) {
|
||||
auto name = mng->new_mangled_name(n);
|
||||
global_function* new_function =
|
||||
new global_function(std::move(name), std::move(ps), std::move(b));
|
||||
functions.push_back(global_function_ptr(new_function));
|
||||
return *new_function;
|
||||
}
|
||||
|
||||
global_constructor& global_scope::add_constructor(std::string n, int8_t t, size_t a) {
|
||||
global_constructor* new_constructor = new global_constructor(mangle_name(n), t, a);
|
||||
global_constructor& global_scope::add_constructor(
|
||||
const std::string& n,
|
||||
int8_t t,
|
||||
size_t a) {
|
||||
auto name = mng->new_mangled_name(n);
|
||||
global_constructor* new_constructor = new global_constructor(name, t, a);
|
||||
constructors.push_back(global_constructor_ptr(new_constructor));
|
||||
return *new_constructor;
|
||||
}
|
||||
@@ -65,19 +74,3 @@ void global_scope::generate_llvm(llvm_context& ctx) {
|
||||
function->generate_llvm(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
std::string global_scope::mangle_name(const std::string& n) {
|
||||
auto occurence_it = occurence_count.find(n);
|
||||
int occurence = 0;
|
||||
if(occurence_it != occurence_count.end()) {
|
||||
occurence = occurence_it->second + 1;
|
||||
}
|
||||
occurence_count[n] = occurence;
|
||||
|
||||
std::string final_name = n;
|
||||
if (occurence != 0) {
|
||||
final_name += "_";
|
||||
final_name += std::to_string(occurence);
|
||||
}
|
||||
return final_name;
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <vector>
|
||||
#include <llvm/IR/Function.h>
|
||||
#include "instruction.hpp"
|
||||
#include "mangler.hpp"
|
||||
|
||||
struct ast;
|
||||
using ast_ptr = std::unique_ptr<ast>;
|
||||
@@ -39,17 +40,21 @@ struct global_constructor {
|
||||
|
||||
using global_constructor_ptr = std::unique_ptr<global_constructor>;
|
||||
|
||||
struct global_scope {
|
||||
std::map<std::string, int> occurence_count;
|
||||
std::vector<global_function_ptr> functions;
|
||||
std::vector<global_constructor_ptr> constructors;
|
||||
|
||||
global_function& add_function(std::string n, std::vector<std::string> ps, ast_ptr b);
|
||||
global_constructor& add_constructor(std::string n, int8_t t, size_t a);
|
||||
|
||||
void compile();
|
||||
void generate_llvm(llvm_context& ctx);
|
||||
|
||||
class global_scope {
|
||||
private:
|
||||
std::string mangle_name(const std::string& n);
|
||||
std::vector<global_function_ptr> functions;
|
||||
std::vector<global_constructor_ptr> constructors;
|
||||
mangler* mng;
|
||||
|
||||
public:
|
||||
global_scope(mangler& m) : mng(&m) {}
|
||||
|
||||
global_function& add_function(
|
||||
const std::string& n,
|
||||
std::vector<std::string> ps,
|
||||
ast_ptr b);
|
||||
global_constructor& add_constructor(const std::string& n, int8_t t, size_t a);
|
||||
|
||||
void compile();
|
||||
void generate_llvm(llvm_context& ctx);
|
||||
};
|
||||
|
||||
@@ -17,37 +17,38 @@ struct group {
|
||||
using group_ptr = std::unique_ptr<group>;
|
||||
|
||||
class function_graph {
|
||||
using group_id = size_t;
|
||||
private:
|
||||
using group_id = size_t;
|
||||
|
||||
struct group_data {
|
||||
std::set<function> functions;
|
||||
std::set<group_id> adjacency_list;
|
||||
size_t indegree;
|
||||
struct group_data {
|
||||
std::set<function> functions;
|
||||
std::set<group_id> adjacency_list;
|
||||
size_t indegree;
|
||||
|
||||
group_data() : indegree(0) {}
|
||||
};
|
||||
group_data() : indegree(0) {}
|
||||
};
|
||||
|
||||
using data_ptr = std::shared_ptr<group_data>;
|
||||
using edge = std::pair<function, function>;
|
||||
using group_edge = std::pair<group_id, group_id>;
|
||||
using data_ptr = std::shared_ptr<group_data>;
|
||||
using edge = std::pair<function, function>;
|
||||
using group_edge = std::pair<group_id, group_id>;
|
||||
|
||||
std::map<function, std::set<function>> adjacency_lists;
|
||||
std::set<edge> edges;
|
||||
std::map<function, std::set<function>> adjacency_lists;
|
||||
std::set<edge> edges;
|
||||
|
||||
std::set<edge> compute_transitive_edges();
|
||||
void create_groups(
|
||||
const std::set<edge>&,
|
||||
std::map<function, group_id>&,
|
||||
std::map<group_id, data_ptr>&);
|
||||
void create_edges(
|
||||
std::map<function, group_id>&,
|
||||
std::map<group_id, data_ptr>&);
|
||||
std::vector<group_ptr> generate_order(
|
||||
std::map<function, group_id>&,
|
||||
std::map<group_id, data_ptr>&);
|
||||
std::set<edge> compute_transitive_edges();
|
||||
void create_groups(
|
||||
const std::set<edge>&,
|
||||
std::map<function, group_id>&,
|
||||
std::map<group_id, data_ptr>&);
|
||||
void create_edges(
|
||||
std::map<function, group_id>&,
|
||||
std::map<group_id, data_ptr>&);
|
||||
std::vector<group_ptr> generate_order(
|
||||
std::map<function, group_id>&,
|
||||
std::map<group_id, data_ptr>&);
|
||||
|
||||
public:
|
||||
std::set<function>& add_function(const function& f);
|
||||
void add_edge(const function& from, const function& to);
|
||||
std::vector<group_ptr> compute_order();
|
||||
std::set<function>& add_function(const function& f);
|
||||
void add_edge(const function& from, const function& to);
|
||||
std::vector<group_ptr> compute_order();
|
||||
};
|
||||
|
||||
@@ -24,9 +24,9 @@ void instruction_pushglobal::print(int indent, std::ostream& to) const {
|
||||
}
|
||||
|
||||
void instruction_pushglobal::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
auto& global_f = ctx.custom_functions.at("f_" + name);
|
||||
auto arity = ctx.create_i32(global_f->arity);
|
||||
ctx.create_push(f, ctx.create_global(f, global_f->function, arity));
|
||||
auto& global_f = ctx.get_custom_function(name);
|
||||
auto arity = ctx.create_i32(global_f.arity);
|
||||
ctx.create_push(f, ctx.create_global(f, global_f.function, arity));
|
||||
}
|
||||
|
||||
void instruction_push::print(int indent, std::ostream& to) const {
|
||||
@@ -101,17 +101,17 @@ void instruction_jump::print(int indent, std::ostream& to) const {
|
||||
void instruction_jump::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
auto top_node = ctx.create_peek(f, ctx.create_size(0));
|
||||
auto tag = ctx.unwrap_data_tag(top_node);
|
||||
auto safety_block = BasicBlock::Create(ctx.ctx, "safety", f);
|
||||
auto switch_op = ctx.builder.CreateSwitch(tag, safety_block, tag_mappings.size());
|
||||
auto safety_block = ctx.create_basic_block("safety", f);
|
||||
auto switch_op = ctx.get_builder().CreateSwitch(tag, safety_block, tag_mappings.size());
|
||||
std::vector<BasicBlock*> blocks;
|
||||
|
||||
for(auto& branch : branches) {
|
||||
auto branch_block = BasicBlock::Create(ctx.ctx, "branch", f);
|
||||
ctx.builder.SetInsertPoint(branch_block);
|
||||
auto branch_block = ctx.create_basic_block("branch", f);
|
||||
ctx.get_builder().SetInsertPoint(branch_block);
|
||||
for(auto& instruction : branch) {
|
||||
instruction->gen_llvm(ctx, f);
|
||||
}
|
||||
ctx.builder.CreateBr(safety_block);
|
||||
ctx.get_builder().CreateBr(safety_block);
|
||||
blocks.push_back(branch_block);
|
||||
}
|
||||
|
||||
@@ -119,46 +119,7 @@ void instruction_jump::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
switch_op->addCase(ctx.create_i8(mapping.first), blocks[mapping.second]);
|
||||
}
|
||||
|
||||
ctx.builder.SetInsertPoint(safety_block);
|
||||
}
|
||||
|
||||
void instruction_if::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "If(" << std::endl;
|
||||
for(auto& instruction : on_true) {
|
||||
instruction->print(indent + 2, to);
|
||||
}
|
||||
to << std::endl;
|
||||
for(auto& instruction : on_false) {
|
||||
instruction->print(indent + 2, to);
|
||||
}
|
||||
print_indent(indent, to);
|
||||
to << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_if::gen_llvm(llvm_context& ctx, llvm::Function* f) const {
|
||||
auto top_node = ctx.create_peek(f, ctx.create_size(0));
|
||||
auto num = ctx.unwrap_num(top_node);
|
||||
|
||||
auto nonzero_block = BasicBlock::Create(ctx.ctx, "nonzero", f);
|
||||
auto zero_block = BasicBlock::Create(ctx.ctx, "zero", f);
|
||||
auto resume_block = BasicBlock::Create(ctx.ctx, "resume", f);
|
||||
auto switch_op = ctx.builder.CreateSwitch(num, nonzero_block, 2);
|
||||
|
||||
switch_op->addCase(ctx.create_i32(0), zero_block);
|
||||
ctx.builder.SetInsertPoint(nonzero_block);
|
||||
for(auto& instruction : on_true) {
|
||||
instruction->gen_llvm(ctx, f);
|
||||
}
|
||||
ctx.builder.CreateBr(resume_block);
|
||||
|
||||
ctx.builder.SetInsertPoint(zero_block);
|
||||
for(auto& instruction : on_false) {
|
||||
instruction->gen_llvm(ctx, f);
|
||||
}
|
||||
ctx.builder.CreateBr(resume_block);
|
||||
|
||||
ctx.builder.SetInsertPoint(resume_block);
|
||||
ctx.get_builder().SetInsertPoint(safety_block);
|
||||
}
|
||||
|
||||
void instruction_slide::print(int indent, std::ostream& to) const {
|
||||
@@ -180,13 +141,10 @@ void instruction_binop::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
auto right_int = ctx.unwrap_num(ctx.create_pop(f));
|
||||
llvm::Value* result;
|
||||
switch(op) {
|
||||
case PLUS: result = ctx.builder.CreateAdd(left_int, right_int); break;
|
||||
case MINUS: result = ctx.builder.CreateSub(left_int, right_int); break;
|
||||
case TIMES: result = ctx.builder.CreateMul(left_int, right_int); break;
|
||||
case DIVIDE: result = ctx.builder.CreateSDiv(left_int, right_int); break;
|
||||
case MODULO: result = ctx.builder.CreateSRem(left_int, right_int); break;
|
||||
case EQUALS: result = ctx.builder.CreateICmpEQ(left_int, right_int); break;
|
||||
case LESS_EQUALS: result = ctx.builder.CreateICmpSLE(left_int, right_int); break;
|
||||
case PLUS: result = ctx.get_builder().CreateAdd(left_int, right_int); break;
|
||||
case MINUS: result = ctx.get_builder().CreateSub(left_int, right_int); break;
|
||||
case TIMES: result = ctx.get_builder().CreateMul(left_int, right_int); break;
|
||||
case DIVIDE: result = ctx.get_builder().CreateSDiv(left_int, right_int); break;
|
||||
}
|
||||
ctx.create_push(f, ctx.create_num(f, result));
|
||||
}
|
||||
|
||||
@@ -101,19 +101,6 @@ struct instruction_jump : public instruction {
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_if : public instruction {
|
||||
std::vector<instruction_ptr> on_true;
|
||||
std::vector<instruction_ptr> on_false;
|
||||
|
||||
instruction_if(
|
||||
std::vector<instruction_ptr> t,
|
||||
std::vector<instruction_ptr> f)
|
||||
: on_true(std::move(t)), on_false(std::move(f)) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_slide : public instruction {
|
||||
int offset;
|
||||
|
||||
|
||||
@@ -163,6 +163,18 @@ void llvm_context::create_functions() {
|
||||
);
|
||||
}
|
||||
|
||||
IRBuilder<>& llvm_context::get_builder() {
|
||||
return builder;
|
||||
}
|
||||
|
||||
Module& llvm_context::get_module() {
|
||||
return module;
|
||||
}
|
||||
|
||||
BasicBlock* llvm_context::create_basic_block(const std::string& name, llvm::Function* f) {
|
||||
return BasicBlock::Create(ctx, name, f);
|
||||
}
|
||||
|
||||
ConstantInt* llvm_context::create_i8(int8_t i) {
|
||||
return ConstantInt::get(ctx, APInt(8, i));
|
||||
}
|
||||
@@ -225,7 +237,12 @@ Value* llvm_context::unwrap_gmachine_stack_ptr(Value* g) {
|
||||
}
|
||||
|
||||
Value* llvm_context::unwrap_num(Value* v) {
|
||||
return builder.CreatePtrToInt(v, IntegerType::getInt32Ty(ctx));
|
||||
auto num_ptr_type = PointerType::getUnqual(struct_types.at("node_num"));
|
||||
auto cast = builder.CreatePointerCast(v, num_ptr_type);
|
||||
auto offset_0 = create_i32(0);
|
||||
auto offset_1 = create_i32(1);
|
||||
auto int_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
|
||||
return builder.CreateLoad(int_ptr);
|
||||
}
|
||||
Value* llvm_context::create_num(Function* f, Value* v) {
|
||||
auto alloc_num_f = functions.at("alloc_num");
|
||||
@@ -254,7 +271,7 @@ Value* llvm_context::create_app(Function* f, Value* l, Value* r) {
|
||||
return create_track(f, alloc_app_call);
|
||||
}
|
||||
|
||||
llvm::Function* llvm_context::create_custom_function(std::string name, int32_t arity) {
|
||||
llvm::Function* llvm_context::create_custom_function(const std::string& name, int32_t arity) {
|
||||
auto void_type = llvm::Type::getVoidTy(ctx);
|
||||
auto new_function = llvm::Function::Create(
|
||||
function_type,
|
||||
@@ -271,3 +288,7 @@ llvm::Function* llvm_context::create_custom_function(std::string name, int32_t a
|
||||
|
||||
return new_function;
|
||||
}
|
||||
|
||||
llvm_context::custom_function& llvm_context::get_custom_function(const std::string& name) {
|
||||
return *custom_functions.at("f_" + name);
|
||||
}
|
||||
|
||||
@@ -7,66 +7,75 @@
|
||||
#include <llvm/IR/Value.h>
|
||||
#include <map>
|
||||
|
||||
struct llvm_context {
|
||||
struct custom_function {
|
||||
llvm::Function* function;
|
||||
int32_t arity;
|
||||
};
|
||||
class llvm_context {
|
||||
public:
|
||||
struct custom_function {
|
||||
llvm::Function* function;
|
||||
int32_t arity;
|
||||
};
|
||||
|
||||
using custom_function_ptr = std::unique_ptr<custom_function>;
|
||||
using custom_function_ptr = std::unique_ptr<custom_function>;
|
||||
|
||||
llvm::LLVMContext ctx;
|
||||
llvm::IRBuilder<> builder;
|
||||
llvm::Module module;
|
||||
private:
|
||||
llvm::LLVMContext ctx;
|
||||
llvm::IRBuilder<> builder;
|
||||
llvm::Module module;
|
||||
|
||||
std::map<std::string, custom_function_ptr> custom_functions;
|
||||
std::map<std::string, llvm::Function*> functions;
|
||||
std::map<std::string, llvm::StructType*> struct_types;
|
||||
std::map<std::string, custom_function_ptr> custom_functions;
|
||||
std::map<std::string, llvm::Function*> functions;
|
||||
std::map<std::string, llvm::StructType*> struct_types;
|
||||
|
||||
llvm::StructType* stack_type;
|
||||
llvm::StructType* gmachine_type;
|
||||
llvm::PointerType* stack_ptr_type;
|
||||
llvm::PointerType* gmachine_ptr_type;
|
||||
llvm::PointerType* node_ptr_type;
|
||||
llvm::IntegerType* tag_type;
|
||||
llvm::FunctionType* function_type;
|
||||
llvm::StructType* stack_type;
|
||||
llvm::StructType* gmachine_type;
|
||||
llvm::PointerType* stack_ptr_type;
|
||||
llvm::PointerType* gmachine_ptr_type;
|
||||
llvm::PointerType* node_ptr_type;
|
||||
llvm::IntegerType* tag_type;
|
||||
llvm::FunctionType* function_type;
|
||||
|
||||
llvm_context()
|
||||
: builder(ctx), module("bloglang", ctx) {
|
||||
create_types();
|
||||
create_functions();
|
||||
}
|
||||
void create_types();
|
||||
void create_functions();
|
||||
|
||||
void create_types();
|
||||
void create_functions();
|
||||
public:
|
||||
llvm_context()
|
||||
: builder(ctx), module("bloglang", ctx) {
|
||||
create_types();
|
||||
create_functions();
|
||||
}
|
||||
|
||||
llvm::ConstantInt* create_i8(int8_t);
|
||||
llvm::ConstantInt* create_i32(int32_t);
|
||||
llvm::ConstantInt* create_size(size_t);
|
||||
llvm::IRBuilder<>& get_builder();
|
||||
llvm::Module& get_module();
|
||||
|
||||
llvm::Value* create_pop(llvm::Function*);
|
||||
llvm::Value* create_peek(llvm::Function*, llvm::Value*);
|
||||
void create_push(llvm::Function*, llvm::Value*);
|
||||
void create_popn(llvm::Function*, llvm::Value*);
|
||||
void create_update(llvm::Function*, llvm::Value*);
|
||||
void create_pack(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||
void create_split(llvm::Function*, llvm::Value*);
|
||||
void create_slide(llvm::Function*, llvm::Value*);
|
||||
void create_alloc(llvm::Function*, llvm::Value*);
|
||||
llvm::Value* create_track(llvm::Function*, llvm::Value*);
|
||||
llvm::BasicBlock* create_basic_block(const std::string& name, llvm::Function* f);
|
||||
|
||||
void create_unwind(llvm::Function*);
|
||||
llvm::ConstantInt* create_i8(int8_t);
|
||||
llvm::ConstantInt* create_i32(int32_t);
|
||||
llvm::ConstantInt* create_size(size_t);
|
||||
|
||||
llvm::Value* unwrap_gmachine_stack_ptr(llvm::Value*);
|
||||
llvm::Value* create_pop(llvm::Function*);
|
||||
llvm::Value* create_peek(llvm::Function*, llvm::Value*);
|
||||
void create_push(llvm::Function*, llvm::Value*);
|
||||
void create_popn(llvm::Function*, llvm::Value*);
|
||||
void create_update(llvm::Function*, llvm::Value*);
|
||||
void create_pack(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||
void create_split(llvm::Function*, llvm::Value*);
|
||||
void create_slide(llvm::Function*, llvm::Value*);
|
||||
void create_alloc(llvm::Function*, llvm::Value*);
|
||||
llvm::Value* create_track(llvm::Function*, llvm::Value*);
|
||||
|
||||
llvm::Value* unwrap_num(llvm::Value*);
|
||||
llvm::Value* create_num(llvm::Function*, llvm::Value*);
|
||||
void create_unwind(llvm::Function*);
|
||||
|
||||
llvm::Value* unwrap_data_tag(llvm::Value*);
|
||||
llvm::Value* unwrap_gmachine_stack_ptr(llvm::Value*);
|
||||
|
||||
llvm::Value* create_global(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||
llvm::Value* unwrap_num(llvm::Value*);
|
||||
llvm::Value* create_num(llvm::Function*, llvm::Value*);
|
||||
|
||||
llvm::Value* create_app(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||
llvm::Value* unwrap_data_tag(llvm::Value*);
|
||||
|
||||
llvm::Function* create_custom_function(std::string name, int32_t arity);
|
||||
llvm::Value* create_global(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||
|
||||
llvm::Value* create_app(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||
|
||||
llvm::Function* create_custom_function(const std::string& name, int32_t arity);
|
||||
custom_function& get_custom_function(const std::string& name);
|
||||
};
|
||||
|
||||
@@ -1,213 +1,27 @@
|
||||
#include "ast.hpp"
|
||||
#include <iostream>
|
||||
#include "binop.hpp"
|
||||
#include "definition.hpp"
|
||||
#include "graph.hpp"
|
||||
#include "instruction.hpp"
|
||||
#include "llvm_context.hpp"
|
||||
#include "parser.hpp"
|
||||
#include "compiler.hpp"
|
||||
#include "error.hpp"
|
||||
#include "type.hpp"
|
||||
#include "parse_driver.hpp"
|
||||
#include "type_env.hpp"
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
#include "llvm/IR/Verifier.h"
|
||||
#include "llvm/Support/TargetSelect.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
void yy::parser::error(const yy::location& loc, const std::string& msg) {
|
||||
std::cout << "An error occured: " << msg << std::endl;
|
||||
}
|
||||
|
||||
void prelude_types(definition_group& defs, type_env_ptr env) {
|
||||
type_ptr int_type = type_ptr(new type_internal("Int"));
|
||||
env->bind_type("Int", int_type);
|
||||
type_ptr int_type_app = type_ptr(new type_app(int_type));
|
||||
|
||||
type_ptr bool_type = type_ptr(new type_internal("Bool"));
|
||||
env->bind_type("Bool", bool_type);
|
||||
type_ptr bool_type_app = type_ptr(new type_app(bool_type));
|
||||
|
||||
type_ptr binop_type = type_ptr(new type_arr(
|
||||
int_type_app,
|
||||
type_ptr(new type_arr(int_type_app, int_type_app))));
|
||||
type_ptr cmp_type = type_ptr(new type_arr(
|
||||
int_type_app,
|
||||
type_ptr(new type_arr(int_type_app, bool_type_app))));
|
||||
|
||||
constexpr binop number_ops[] = { PLUS, MINUS, TIMES, DIVIDE, MODULO };
|
||||
constexpr binop cmp_ops[] = { EQUALS, LESS_EQUALS };
|
||||
for(auto& op : number_ops) {
|
||||
env->bind(op_name(op), binop_type, visibility::global);
|
||||
env->set_mangled_name(op_name(op), op_action(op));
|
||||
}
|
||||
for(auto& op : cmp_ops) {
|
||||
env->bind(op_name(op), cmp_type, visibility::global);
|
||||
env->set_mangled_name(op_name(op), op_action(op));
|
||||
}
|
||||
|
||||
env->bind("True", bool_type_app, visibility::global);
|
||||
env->bind("False", bool_type_app, visibility::global);
|
||||
}
|
||||
|
||||
void typecheck_program(
|
||||
definition_group& defs,
|
||||
type_mgr& mgr, type_env_ptr& env) {
|
||||
prelude_types(defs, env);
|
||||
|
||||
std::set<std::string> free;
|
||||
defs.find_free(free);
|
||||
defs.typecheck(mgr, env);
|
||||
|
||||
#ifdef DEBUG_OUT
|
||||
for(auto& pair : defs.env->names) {
|
||||
std::cout << pair.first << ": ";
|
||||
pair.second.type->print(mgr, std::cout);
|
||||
std::cout << std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
global_scope translate_program(definition_group& group) {
|
||||
global_scope scope;
|
||||
for(auto& data : group.defs_data) {
|
||||
data.second->into_globals(scope);
|
||||
}
|
||||
for(auto& defn : group.defs_defn) {
|
||||
auto& function = defn.second->into_global(scope);
|
||||
function.body->env->parent->set_mangled_name(defn.first, function.name);
|
||||
}
|
||||
return scope;
|
||||
}
|
||||
|
||||
void output_llvm(llvm_context& ctx, const std::string& filename) {
|
||||
std::string targetTriple = llvm::sys::getDefaultTargetTriple();
|
||||
|
||||
llvm::InitializeNativeTarget();
|
||||
llvm::InitializeNativeTargetAsmParser();
|
||||
llvm::InitializeNativeTargetAsmPrinter();
|
||||
|
||||
std::string error;
|
||||
const llvm::Target* target =
|
||||
llvm::TargetRegistry::lookupTarget(targetTriple, error);
|
||||
if (!target) {
|
||||
std::cerr << error << std::endl;
|
||||
} else {
|
||||
std::string cpu = "generic";
|
||||
std::string features = "";
|
||||
llvm::TargetOptions options;
|
||||
std::unique_ptr<llvm::TargetMachine> targetMachine(
|
||||
target->createTargetMachine(targetTriple, cpu, features,
|
||||
options, llvm::Optional<llvm::Reloc::Model>()));
|
||||
|
||||
ctx.module.setDataLayout(targetMachine->createDataLayout());
|
||||
ctx.module.setTargetTriple(targetTriple);
|
||||
|
||||
std::error_code ec;
|
||||
llvm::raw_fd_ostream file(filename, ec, llvm::sys::fs::F_None);
|
||||
if (ec) {
|
||||
throw std::runtime_error("failed to open object file for writing");
|
||||
} else {
|
||||
llvm::CodeGenFileType type = llvm::CGFT_ObjectFile;
|
||||
llvm::legacy::PassManager pm;
|
||||
if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) {
|
||||
throw std::runtime_error("failed to add passes to pass manager");
|
||||
} else {
|
||||
pm.run(ctx.module);
|
||||
file.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gen_llvm_internal_op(llvm_context& ctx, binop op) {
|
||||
auto new_function = ctx.create_custom_function(op_action(op), 2);
|
||||
std::vector<instruction_ptr> instructions;
|
||||
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||
instructions.push_back(instruction_ptr(new instruction_binop(op)));
|
||||
instructions.push_back(instruction_ptr(new instruction_update(2)));
|
||||
instructions.push_back(instruction_ptr(new instruction_pop(2)));
|
||||
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
||||
for(auto& instruction : instructions) {
|
||||
instruction->gen_llvm(ctx, new_function);
|
||||
}
|
||||
ctx.builder.CreateRetVoid();
|
||||
}
|
||||
|
||||
void gen_llvm_boolean_constructor(llvm_context& ctx, const std::string& s, bool b) {
|
||||
auto new_function = ctx.create_custom_function(s, 0);
|
||||
std::vector<instruction_ptr> instructions;
|
||||
instructions.push_back(instruction_ptr(new instruction_pushint(b)));
|
||||
instructions.push_back(instruction_ptr(new instruction_update(0)));
|
||||
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
||||
for(auto& instruction : instructions) {
|
||||
instruction->gen_llvm(ctx, new_function);
|
||||
}
|
||||
ctx.builder.CreateRetVoid();
|
||||
}
|
||||
|
||||
void gen_llvm(global_scope& scope) {
|
||||
llvm_context ctx;
|
||||
gen_llvm_internal_op(ctx, PLUS);
|
||||
gen_llvm_internal_op(ctx, MINUS);
|
||||
gen_llvm_internal_op(ctx, TIMES);
|
||||
gen_llvm_internal_op(ctx, DIVIDE);
|
||||
gen_llvm_internal_op(ctx, MODULO);
|
||||
gen_llvm_internal_op(ctx, EQUALS);
|
||||
gen_llvm_internal_op(ctx, LESS_EQUALS);
|
||||
gen_llvm_boolean_constructor(ctx, "True", true);
|
||||
gen_llvm_boolean_constructor(ctx, "False", false);
|
||||
|
||||
scope.generate_llvm(ctx);
|
||||
|
||||
#ifdef DEBUG_OUT
|
||||
ctx.module.print(llvm::outs(), nullptr);
|
||||
#endif
|
||||
|
||||
output_llvm(ctx, "program.o");
|
||||
std::cerr << "An error occured: " << msg << std::endl;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if(argc != 2) {
|
||||
std::cerr << "please enter a file to compile." << std::endl;
|
||||
}
|
||||
parse_driver driver(argv[1]);
|
||||
if(!driver.run_parse()) {
|
||||
std::cerr << "failed to open file " << argv[1] << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
type_mgr mgr;
|
||||
type_env_ptr env(new type_env);
|
||||
|
||||
#ifdef DEBUG_OUT
|
||||
for(auto& def_defn : driver.global_defs.defs_defn) {
|
||||
std::cout << def_defn.second->name;
|
||||
for(auto& param : def_defn.second->params) std::cout << " " << param;
|
||||
std::cout << ":" << std::endl;
|
||||
def_defn.second->body->print(1, std::cout);
|
||||
|
||||
std::cout << std::endl;
|
||||
}
|
||||
#endif
|
||||
compiler cmp(argv[1]);
|
||||
|
||||
try {
|
||||
typecheck_program(driver.global_defs, mgr, env);
|
||||
global_scope scope = translate_program(driver.global_defs);
|
||||
scope.compile();
|
||||
gen_llvm(scope);
|
||||
cmp("program.o");
|
||||
} catch(unification_error& err) {
|
||||
err.pretty_print(std::cerr, driver, mgr);
|
||||
err.pretty_print(std::cerr, cmp.get_file_manager(), cmp.get_type_manager());
|
||||
} catch(type_error& err) {
|
||||
err.pretty_print(std::cerr, driver);
|
||||
} catch(std::runtime_error& err) {
|
||||
std::cerr << err.what() << std::endl;
|
||||
err.pretty_print(std::cerr, cmp.get_file_manager());
|
||||
} catch (compiler_error& err) {
|
||||
err.pretty_print(std::cerr, cmp.get_file_manager());
|
||||
}
|
||||
}
|
||||
|
||||
17
code/compiler/13/mangler.cpp
Normal file
17
code/compiler/13/mangler.cpp
Normal file
@@ -0,0 +1,17 @@
|
||||
#include "mangler.hpp"
|
||||
|
||||
std::string mangler::new_mangled_name(const std::string& n) {
|
||||
auto occurence_it = occurence_count.find(n);
|
||||
int occurence = 0;
|
||||
if(occurence_it != occurence_count.end()) {
|
||||
occurence = occurence_it->second + 1;
|
||||
}
|
||||
occurence_count[n] = occurence;
|
||||
|
||||
std::string final_name = n;
|
||||
if (occurence != 0) {
|
||||
final_name += "_";
|
||||
final_name += std::to_string(occurence);
|
||||
}
|
||||
return final_name;
|
||||
}
|
||||
11
code/compiler/13/mangler.hpp
Normal file
11
code/compiler/13/mangler.hpp
Normal file
@@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
class mangler {
|
||||
private:
|
||||
std::map<std::string, int> occurence_count;
|
||||
|
||||
public:
|
||||
std::string new_mangled_name(const std::string& str);
|
||||
};
|
||||
72
code/compiler/13/parse_driver.cpp
Normal file
72
code/compiler/13/parse_driver.cpp
Normal file
@@ -0,0 +1,72 @@
|
||||
#include "parse_driver.hpp"
|
||||
#include "scanner.hpp"
|
||||
#include <sstream>
|
||||
|
||||
file_mgr::file_mgr() : file_offset(0) {
|
||||
line_offsets.push_back(0);
|
||||
}
|
||||
|
||||
void file_mgr::write(const char* buf, size_t len) {
|
||||
string_stream.write(buf, len);
|
||||
file_offset += len;
|
||||
}
|
||||
|
||||
void file_mgr::mark_line() {
|
||||
line_offsets.push_back(file_offset);
|
||||
}
|
||||
|
||||
void file_mgr::finalize() {
|
||||
file_contents = string_stream.str();
|
||||
}
|
||||
|
||||
size_t file_mgr::get_index(int line, int column) const {
|
||||
assert(line > 0 && line <= line_offsets.size());
|
||||
return line_offsets.at(line-1) + column - 1;
|
||||
}
|
||||
|
||||
size_t file_mgr::get_line_end(int line) const {
|
||||
if(line == line_offsets.size()) return file_contents.size();
|
||||
return get_index(line+1, 1);
|
||||
}
|
||||
|
||||
void file_mgr::print_location(
|
||||
std::ostream& stream,
|
||||
const yy::location& loc,
|
||||
bool highlight) const {
|
||||
size_t print_start = get_index(loc.begin.line, 1);
|
||||
size_t highlight_start = get_index(loc.begin.line, loc.begin.column);
|
||||
size_t highlight_end = get_index(loc.end.line, loc.end.column);
|
||||
size_t print_end = get_line_end(loc.end.line);
|
||||
const char* content = file_contents.c_str();
|
||||
stream.write(content + print_start, highlight_start - print_start);
|
||||
if(highlight) stream << "\033[4;31m";
|
||||
stream.write(content + highlight_start, highlight_end - highlight_start);
|
||||
if(highlight) stream << "\033[0m";
|
||||
stream.write(content + highlight_end, print_end - highlight_end);
|
||||
}
|
||||
|
||||
bool parse_driver::operator()() {
|
||||
FILE* stream = fopen(file_name.c_str(), "r");
|
||||
if(!stream) return false;
|
||||
yyscan_t scanner;
|
||||
yylex_init(&scanner);
|
||||
yyset_in(stream, scanner);
|
||||
yy::parser parser(scanner, *this);
|
||||
parser();
|
||||
yylex_destroy(scanner);
|
||||
fclose(stream);
|
||||
file_m->finalize();
|
||||
return true;
|
||||
}
|
||||
|
||||
yy::location& parse_driver::get_current_location() {
|
||||
return location;
|
||||
}
|
||||
|
||||
file_mgr& parse_driver::get_file_manager() const {
|
||||
return *file_m;
|
||||
}
|
||||
|
||||
definition_group& parse_driver::get_global_defs() const {
|
||||
return *global_defs;
|
||||
}
|
||||
@@ -11,68 +11,46 @@ struct parse_driver;
|
||||
void scanner_init(parse_driver* d, yyscan_t* scanner);
|
||||
void scanner_destroy(yyscan_t* scanner);
|
||||
|
||||
struct parse_driver {
|
||||
std::string file_name;
|
||||
std::ifstream file_stream;
|
||||
std::ostringstream string_stream;
|
||||
class file_mgr {
|
||||
private:
|
||||
std::ostringstream string_stream;
|
||||
std::string file_contents;
|
||||
|
||||
yy::location location;
|
||||
size_t file_offset;
|
||||
size_t file_offset;
|
||||
std::vector<size_t> line_offsets;
|
||||
public:
|
||||
file_mgr();
|
||||
|
||||
std::vector<size_t> line_offsets;
|
||||
definition_group global_defs;
|
||||
std::string read_file;
|
||||
void write(const char* buffer, size_t len);
|
||||
void mark_line();
|
||||
void finalize();
|
||||
|
||||
parse_driver(const std::string& file)
|
||||
: file_name(file), file_offset(0) {}
|
||||
size_t get_index(int line, int column) const;
|
||||
size_t get_line_end(int line) const;
|
||||
void print_location(
|
||||
std::ostream& stream,
|
||||
const yy::location& loc,
|
||||
bool highlight = true) const;
|
||||
};
|
||||
|
||||
bool run_parse() {
|
||||
file_stream.open(file_name);
|
||||
if(!file_stream.good()) return false;
|
||||
line_offsets.push_back(0);
|
||||
yyscan_t scanner;
|
||||
scanner_init(this, &scanner);
|
||||
yy::parser parser(scanner, *this);
|
||||
parser();
|
||||
scanner_destroy(&scanner);
|
||||
read_file = string_stream.str();
|
||||
return true;
|
||||
}
|
||||
class parse_driver {
|
||||
private:
|
||||
std::string file_name;
|
||||
yy::location location;
|
||||
definition_group* global_defs;
|
||||
file_mgr* file_m;
|
||||
|
||||
int get() {
|
||||
int new_char = file_stream.get();
|
||||
if(new_char == EOF) return EOF;
|
||||
file_offset++;
|
||||
if(new_char == '\n') line_offsets.push_back(file_offset);
|
||||
string_stream.put(new_char);
|
||||
return new_char;
|
||||
}
|
||||
public:
|
||||
parse_driver(
|
||||
file_mgr& mgr,
|
||||
definition_group& defs,
|
||||
const std::string& file)
|
||||
: file_name(file), file_m(&mgr), global_defs(&defs) {}
|
||||
|
||||
size_t get_index(int line, int column) {
|
||||
assert(line > 0);
|
||||
assert(line <= line_offsets.size());
|
||||
size_t file_offset = line_offsets[line-1];
|
||||
file_offset += column - 1;
|
||||
return file_offset;
|
||||
}
|
||||
|
||||
size_t get_line_end(int line) {
|
||||
if(line > line_offsets.size()) return read_file.size();
|
||||
return get_index(line+1, 1);
|
||||
}
|
||||
|
||||
void print_highlighted_location(std::ostream& stream, const yy::location& loc) {
|
||||
size_t print_start = get_index(loc.begin.line, 1);
|
||||
size_t highlight_start = get_index(loc.begin.line, loc.begin.column);
|
||||
size_t highlight_end = get_index(loc.end.line, loc.end.column);
|
||||
size_t print_end = get_line_end(loc.end.line);
|
||||
const char* content = read_file.c_str();
|
||||
stream.write(content + print_start, highlight_start - print_start);
|
||||
stream << "\033[4;31m";
|
||||
stream.write(content + highlight_start, highlight_end - highlight_start);
|
||||
stream << "\033[0m";
|
||||
stream.write(content + highlight_end, print_end - highlight_end);
|
||||
}
|
||||
bool operator()();
|
||||
yy::location& get_current_location();
|
||||
file_mgr& get_file_manager() const;
|
||||
definition_group& get_global_defs() const;
|
||||
};
|
||||
|
||||
#define YY_DECL yy::parser::symbol_type yylex(yyscan_t yyscanner, parse_driver& drv)
|
||||
|
||||
@@ -9,10 +9,10 @@ type_ptr parsed_type_app::to_type(
|
||||
const type_env& e) const {
|
||||
auto parent_type = e.lookup_type(name);
|
||||
if(parent_type == nullptr)
|
||||
throw type_error(std::string("no such type or type constructor ") + name);
|
||||
throw type_error("no such type or type constructor " + name);
|
||||
type_base* base_type;
|
||||
if(!(base_type = dynamic_cast<type_base*>(parent_type.get())))
|
||||
throw type_error(std::string("invalid type ") + name);
|
||||
throw type_error("invalid type " + name);
|
||||
if(base_type->arity != arguments.size()) {
|
||||
std::ostringstream error_stream;
|
||||
error_stream << "invalid application of type ";
|
||||
@@ -34,7 +34,7 @@ type_ptr parsed_type_var::to_type(
|
||||
const std::set<std::string>& vars,
|
||||
const type_env& e) const {
|
||||
if(vars.find(var) == vars.end())
|
||||
throw type_error(std::string("the type variable ") + var + std::string(" was not explicitly declared."));
|
||||
throw type_error("the type variable " + var + " was not explicitly declared.");
|
||||
return type_ptr(new type_var(var));
|
||||
}
|
||||
|
||||
|
||||
@@ -18,14 +18,10 @@ using yyscan_t = void*;
|
||||
}
|
||||
|
||||
%token BACKSLASH
|
||||
%token BACKTICK
|
||||
%token PLUS
|
||||
%token TIMES
|
||||
%token MINUS
|
||||
%token DIVIDE
|
||||
%token MODULO
|
||||
%token EQUALS
|
||||
%token LESS_EQUALS
|
||||
%token <int> INT
|
||||
%token DEFN
|
||||
%token DATA
|
||||
@@ -53,10 +49,9 @@ using yyscan_t = void*;
|
||||
%type <std::vector<branch_ptr>> branches
|
||||
%type <std::vector<constructor_ptr>> constructors
|
||||
%type <std::vector<parsed_type_ptr>> typeList
|
||||
%type <binop> anyBinop
|
||||
%type <definition_group> definitions
|
||||
%type <parsed_type_ptr> type nonArrowType typeListElement
|
||||
%type <ast_ptr> aInfix aEq aAdd aMul case let lambda app appBase
|
||||
%type <ast_ptr> aAdd aMul case let lambda app appBase
|
||||
%type <definition_data_ptr> data
|
||||
%type <definition_defn_ptr> defn
|
||||
%type <branch_ptr> branch
|
||||
@@ -68,7 +63,7 @@ using yyscan_t = void*;
|
||||
%%
|
||||
|
||||
program
|
||||
: definitions { $1.vis = visibility::global; std::swap(drv.global_defs, $1); }
|
||||
: definitions { $1.vis = visibility::global; std::swap(drv.get_global_defs(), $1); }
|
||||
;
|
||||
|
||||
definitions
|
||||
@@ -78,7 +73,7 @@ definitions
|
||||
;
|
||||
|
||||
defn
|
||||
: DEFN LID lowercaseParams EQUAL OCURLY aInfix CCURLY
|
||||
: DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY
|
||||
{ $$ = definition_defn_ptr(
|
||||
new definition_defn(std::move($2), std::move($3), std::move($6), @$)); }
|
||||
;
|
||||
@@ -88,22 +83,6 @@ lowercaseParams
|
||||
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||
;
|
||||
|
||||
aInfix
|
||||
: aInfix BACKTICK LID BACKTICK aEq
|
||||
{ $$ = ast_ptr(new ast_app(
|
||||
ast_ptr(new ast_app(ast_ptr(new ast_lid(std::move($3))), std::move($1))), std::move($5))); }
|
||||
| aInfix BACKTICK UID BACKTICK aEq
|
||||
{ $$ = ast_ptr(new ast_app(
|
||||
ast_ptr(new ast_app(ast_ptr(new ast_uid(std::move($3))), std::move($1))), std::move($5))); }
|
||||
| aEq { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
aEq
|
||||
: aAdd EQUALS aAdd { $$ = ast_ptr(new ast_binop(EQUALS, std::move($1), std::move($3), @$)); }
|
||||
| aAdd LESS_EQUALS aAdd { $$ = ast_ptr(new ast_binop(LESS_EQUALS, std::move($1), std::move($3), @$)); }
|
||||
| aAdd { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
aAdd
|
||||
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3), @$)); }
|
||||
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3), @$)); }
|
||||
@@ -113,7 +92,6 @@ aAdd
|
||||
aMul
|
||||
: aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3), @$)); }
|
||||
| aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3), @$)); }
|
||||
| aMul MODULO app { $$ = ast_ptr(new ast_binop(MODULO, std::move($1), std::move($3), @$)); }
|
||||
| app { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
@@ -126,35 +104,24 @@ appBase
|
||||
: INT { $$ = ast_ptr(new ast_int($1, @$)); }
|
||||
| LID { $$ = ast_ptr(new ast_lid(std::move($1), @$)); }
|
||||
| UID { $$ = ast_ptr(new ast_uid(std::move($1), @$)); }
|
||||
| OPAREN aInfix CPAREN { $$ = std::move($2); }
|
||||
| OPAREN anyBinop CPAREN { $$ = ast_ptr(new ast_lid(op_name($2))); }
|
||||
| OPAREN aAdd CPAREN { $$ = std::move($2); }
|
||||
| case { $$ = std::move($1); }
|
||||
| let { $$ = std::move($1); }
|
||||
| lambda { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
anyBinop
|
||||
: PLUS { $$ = PLUS; }
|
||||
| MINUS { $$ = MINUS; }
|
||||
| TIMES { $$ = TIMES; }
|
||||
| DIVIDE { $$ = DIVIDE; }
|
||||
| MODULO { $$ = MODULO; }
|
||||
| EQUALS { $$ = EQUALS; }
|
||||
| LESS_EQUALS { $$ = LESS_EQUALS; }
|
||||
;
|
||||
|
||||
let
|
||||
: LET OCURLY definitions CCURLY IN OCURLY aInfix CCURLY
|
||||
: LET OCURLY definitions CCURLY IN OCURLY aAdd CCURLY
|
||||
{ $$ = ast_ptr(new ast_let(std::move($3), std::move($7), @$)); }
|
||||
;
|
||||
|
||||
lambda
|
||||
: BACKSLASH lowercaseParams ARROW OCURLY aInfix CCURLY
|
||||
: BACKSLASH lowercaseParams ARROW OCURLY aAdd CCURLY
|
||||
{ $$ = ast_ptr(new ast_lambda(std::move($2), std::move($5), @$)); }
|
||||
;
|
||||
|
||||
case
|
||||
: CASE aInfix OF OCURLY branches CCURLY
|
||||
: CASE aAdd OF OCURLY branches CCURLY
|
||||
{ $$ = ast_ptr(new ast_case(std::move($2), std::move($5), @$)); }
|
||||
;
|
||||
|
||||
@@ -164,7 +131,7 @@ branches
|
||||
;
|
||||
|
||||
branch
|
||||
: pattern ARROW OCURLY aInfix CCURLY
|
||||
: pattern ARROW OCURLY aAdd CCURLY
|
||||
{ $$ = branch_ptr(new branch(std::move($1), std::move($4))); }
|
||||
;
|
||||
|
||||
|
||||
@@ -1,13 +1,9 @@
|
||||
#include <bits/stdint-intn.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <memory.h>
|
||||
#include <stdio.h>
|
||||
#include "runtime.h"
|
||||
|
||||
#define INT_MARKER (1l << 63)
|
||||
#define IS_INT(n) ((uint64_t) n & INT_MARKER)
|
||||
|
||||
struct node_base* alloc_node() {
|
||||
struct node_base* new_node = malloc(sizeof(struct node_app));
|
||||
new_node->gc_next = NULL;
|
||||
@@ -25,7 +21,10 @@ struct node_app* alloc_app(struct node_base* l, struct node_base* r) {
|
||||
}
|
||||
|
||||
struct node_num* alloc_num(int32_t n) {
|
||||
return (struct node_num*) (INT_MARKER | n);
|
||||
struct node_num* node = (struct node_num*) alloc_node();
|
||||
node->base.tag = NODE_NUM;
|
||||
node->value = n;
|
||||
return node;
|
||||
}
|
||||
|
||||
struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a) {
|
||||
@@ -50,7 +49,7 @@ void free_node_direct(struct node_base* n) {
|
||||
}
|
||||
|
||||
void gc_visit_node(struct node_base* n) {
|
||||
if(IS_INT(n) || n->gc_reachable) return;
|
||||
if(n->gc_reachable) return;
|
||||
n->gc_reachable = 1;
|
||||
|
||||
if(n->tag == NODE_APP) {
|
||||
@@ -170,7 +169,6 @@ void gmachine_split(struct gmachine* g, size_t n) {
|
||||
}
|
||||
|
||||
struct node_base* gmachine_track(struct gmachine* g, struct node_base* b) {
|
||||
if(IS_INT(b)) return b;
|
||||
g->gc_node_count++;
|
||||
b->gc_next = g->gc_nodes;
|
||||
g->gc_nodes = b;
|
||||
@@ -210,9 +208,7 @@ void unwind(struct gmachine* g) {
|
||||
|
||||
while(1) {
|
||||
struct node_base* peek = stack_peek(s, 0);
|
||||
if(IS_INT(peek)) {
|
||||
break;
|
||||
} else if(peek->tag == NODE_APP) {
|
||||
if(peek->tag == NODE_APP) {
|
||||
struct node_app* n = (struct node_app*) peek;
|
||||
stack_push(s, n->left);
|
||||
} else if(peek->tag == NODE_GLOBAL) {
|
||||
@@ -238,9 +234,7 @@ void unwind(struct gmachine* g) {
|
||||
extern void f_main(struct gmachine* s);
|
||||
|
||||
void print_node(struct node_base* n) {
|
||||
if(IS_INT(n)) {
|
||||
printf("%d", (int32_t) n);
|
||||
} else if(n->tag == NODE_APP) {
|
||||
if(n->tag == NODE_APP) {
|
||||
struct node_app* app = (struct node_app*) n;
|
||||
print_node(app->left);
|
||||
putchar(' ');
|
||||
@@ -252,6 +246,9 @@ void print_node(struct node_base* n) {
|
||||
printf("(Global: %p)", global->function);
|
||||
} else if(n->tag == NODE_IND) {
|
||||
print_node(((struct node_ind*) n)->next);
|
||||
} else if(n->tag == NODE_NUM) {
|
||||
struct node_num* num = (struct node_num*) n;
|
||||
printf("%d", num->value);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
%option noyywrap
|
||||
%option reentrant
|
||||
%option header-file="scanner.hpp"
|
||||
|
||||
%{
|
||||
#include <iostream>
|
||||
@@ -8,51 +9,37 @@
|
||||
#include "parse_driver.hpp"
|
||||
#include "parser.hpp"
|
||||
|
||||
#define YY_EXTRA_TYPE parse_driver*
|
||||
#define YY_USER_ACTION drv.location.step(); drv.location.columns(yyleng);
|
||||
#define YY_INPUT(buf,result,max_size) \
|
||||
{ \
|
||||
int c = yyextra->get(); \
|
||||
result = (c == EOF) ? YY_NULL : (buf[0] = c, 1); \
|
||||
}
|
||||
#define YY_USER_ACTION \
|
||||
drv.get_file_manager().write(yytext, yyleng); \
|
||||
LOC.step(); LOC.columns(yyleng);
|
||||
#define LOC drv.get_current_location()
|
||||
%}
|
||||
|
||||
%%
|
||||
|
||||
\n { drv.location.lines(); }
|
||||
\n { drv.get_current_location().lines(); drv.get_file_manager().mark_line(); }
|
||||
[ ]+ {}
|
||||
\\ { return yy::parser::make_BACKSLASH(drv.location); }
|
||||
\+ { return yy::parser::make_PLUS(drv.location); }
|
||||
\* { return yy::parser::make_TIMES(drv.location); }
|
||||
- { return yy::parser::make_MINUS(drv.location); }
|
||||
\/ { return yy::parser::make_DIVIDE(drv.location); }
|
||||
% { return yy::parser::make_MODULO(drv.location); }
|
||||
== { return yy::parser::make_EQUALS(drv.location); }
|
||||
\<= { return yy::parser::make_LESS_EQUALS(drv.location); }
|
||||
` { return yy::parser::make_BACKTICK(drv.location); }
|
||||
[0-9]+ { return yy::parser::make_INT(atoi(yytext), drv.location); }
|
||||
defn { return yy::parser::make_DEFN(drv.location); }
|
||||
data { return yy::parser::make_DATA(drv.location); }
|
||||
case { return yy::parser::make_CASE(drv.location); }
|
||||
of { return yy::parser::make_OF(drv.location); }
|
||||
let { return yy::parser::make_LET(drv.location); }
|
||||
in { return yy::parser::make_IN(drv.location); }
|
||||
\{ { return yy::parser::make_OCURLY(drv.location); }
|
||||
\} { return yy::parser::make_CCURLY(drv.location); }
|
||||
\( { return yy::parser::make_OPAREN(drv.location); }
|
||||
\) { return yy::parser::make_CPAREN(drv.location); }
|
||||
, { return yy::parser::make_COMMA(drv.location); }
|
||||
-> { return yy::parser::make_ARROW(drv.location); }
|
||||
= { return yy::parser::make_EQUAL(drv.location); }
|
||||
[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext), drv.location); }
|
||||
[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext), drv.location); }
|
||||
<<EOF>> { return yy::parser::make_YYEOF(drv.location); }
|
||||
\\ { return yy::parser::make_BACKSLASH(LOC); }
|
||||
\+ { return yy::parser::make_PLUS(LOC); }
|
||||
\* { return yy::parser::make_TIMES(LOC); }
|
||||
- { return yy::parser::make_MINUS(LOC); }
|
||||
\/ { return yy::parser::make_DIVIDE(LOC); }
|
||||
[0-9]+ { return yy::parser::make_INT(atoi(yytext), LOC); }
|
||||
defn { return yy::parser::make_DEFN(LOC); }
|
||||
data { return yy::parser::make_DATA(LOC); }
|
||||
case { return yy::parser::make_CASE(LOC); }
|
||||
of { return yy::parser::make_OF(LOC); }
|
||||
let { return yy::parser::make_LET(LOC); }
|
||||
in { return yy::parser::make_IN(LOC); }
|
||||
\{ { return yy::parser::make_OCURLY(LOC); }
|
||||
\} { return yy::parser::make_CCURLY(LOC); }
|
||||
\( { return yy::parser::make_OPAREN(LOC); }
|
||||
\) { return yy::parser::make_CPAREN(LOC); }
|
||||
, { return yy::parser::make_COMMA(LOC); }
|
||||
-> { return yy::parser::make_ARROW(LOC); }
|
||||
= { return yy::parser::make_EQUAL(LOC); }
|
||||
[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext), LOC); }
|
||||
[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext), LOC); }
|
||||
<<EOF>> { return yy::parser::make_YYEOF(LOC); }
|
||||
|
||||
%%
|
||||
|
||||
void scanner_init(parse_driver* d, yyscan_t* scanner) {
|
||||
yylex_init_extra(d, scanner);
|
||||
}
|
||||
void scanner_destroy(yyscan_t* scanner) {
|
||||
yylex_destroy(*scanner);
|
||||
}
|
||||
|
||||
@@ -26,9 +26,9 @@ type_ptr type_scheme::instantiate(type_mgr& mgr) const {
|
||||
}
|
||||
|
||||
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
auto it = mgr.types.find(name);
|
||||
if(it != mgr.types.end()) {
|
||||
it->second->print(mgr, to);
|
||||
auto type = mgr.lookup(name);
|
||||
if(type) {
|
||||
type->print(mgr, to);
|
||||
} else {
|
||||
to << name;
|
||||
}
|
||||
@@ -38,10 +38,6 @@ void type_base::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
to << name;
|
||||
}
|
||||
|
||||
void type_internal::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
to << "!" << name;
|
||||
}
|
||||
|
||||
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
type_var* var;
|
||||
bool print_parenths = dynamic_cast<type_arr*>(mgr.resolve(left, var).get()) != nullptr;
|
||||
@@ -82,6 +78,12 @@ type_ptr type_mgr::new_arrow_type() {
|
||||
return type_ptr(new type_arr(new_type(), new_type()));
|
||||
}
|
||||
|
||||
type_ptr type_mgr::lookup(const std::string& var) const {
|
||||
auto types_it = types.find(var);
|
||||
if(types_it != types.end()) return types_it->second;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
|
||||
type_var* cast;
|
||||
|
||||
@@ -122,8 +124,7 @@ void type_mgr::unify(type_ptr l, type_ptr r, const std::optional<yy::location>&
|
||||
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
|
||||
(rid = dynamic_cast<type_base*>(r.get()))) {
|
||||
if(lid->name == rid->name &&
|
||||
lid->arity == rid->arity &&
|
||||
lid->is_internal() == rid->is_internal())
|
||||
lid->arity == rid->arity)
|
||||
return;
|
||||
} else if((lapp = dynamic_cast<type_app*>(l.get())) &&
|
||||
(rapp = dynamic_cast<type_app*>(r.get()))) {
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
#include <optional>
|
||||
#include "location.hh"
|
||||
|
||||
struct type_mgr;
|
||||
class type_mgr;
|
||||
|
||||
struct type {
|
||||
virtual ~type() = default;
|
||||
@@ -46,17 +46,6 @@ struct type_base : public type {
|
||||
: name(std::move(n)), arity(a) {}
|
||||
|
||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||
|
||||
virtual bool is_internal() const { return false; }
|
||||
};
|
||||
|
||||
struct type_internal : public type_base {
|
||||
type_internal(std::string n, int32_t a = 0)
|
||||
: type_base(std::move(n), a) {}
|
||||
|
||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||
|
||||
bool is_internal() const { return true; }
|
||||
};
|
||||
|
||||
struct type_data : public type_base {
|
||||
@@ -90,20 +79,23 @@ struct type_app : public type {
|
||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||
};
|
||||
|
||||
struct type_mgr {
|
||||
int last_id = 0;
|
||||
std::map<std::string, type_ptr> types;
|
||||
class type_mgr {
|
||||
private:
|
||||
int last_id = 0;
|
||||
std::map<std::string, type_ptr> types;
|
||||
|
||||
std::string new_type_name();
|
||||
type_ptr new_type();
|
||||
type_ptr new_arrow_type();
|
||||
public:
|
||||
std::string new_type_name();
|
||||
type_ptr new_type();
|
||||
type_ptr new_arrow_type();
|
||||
|
||||
void unify(type_ptr l, type_ptr r, const std::optional<yy::location>& loc = std::nullopt);
|
||||
type_ptr substitute(
|
||||
const std::map<std::string, type_ptr>& subst,
|
||||
const type_ptr& t) const;
|
||||
type_ptr resolve(type_ptr t, type_var*& var) const;
|
||||
void bind(const std::string& s, type_ptr t);
|
||||
void find_free(const type_ptr& t, std::set<std::string>& into) const;
|
||||
void find_free(const type_scheme_ptr& t, std::set<std::string>& into) const;
|
||||
void unify(type_ptr l, type_ptr r, const std::optional<yy::location>& loc = std::nullopt);
|
||||
type_ptr substitute(
|
||||
const std::map<std::string, type_ptr>& subst,
|
||||
const type_ptr& t) const;
|
||||
type_ptr lookup(const std::string& var) const;
|
||||
type_ptr resolve(type_ptr t, type_var*& var) const;
|
||||
void bind(const std::string& s, type_ptr t);
|
||||
void find_free(const type_ptr& t, std::set<std::string>& into) const;
|
||||
void find_free(const type_scheme_ptr& t, std::set<std::string>& into) const;
|
||||
};
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#include "type_env.hpp"
|
||||
#include "type.hpp"
|
||||
#include "error.hpp"
|
||||
#include <cassert>
|
||||
|
||||
void type_env::find_free(const type_mgr& mgr, std::set<std::string>& into) const {
|
||||
if(parent != nullptr) parent->find_free(mgr, into);
|
||||
@@ -34,20 +35,23 @@ bool type_env::is_global(const std::string& name) const {
|
||||
|
||||
void type_env::set_mangled_name(const std::string& name, const std::string& mangled) {
|
||||
auto it = names.find(name);
|
||||
if(it != names.end()) {
|
||||
// Local names shouldn't need mangling.
|
||||
assert(it->second.vis == visibility::global);
|
||||
|
||||
it->second.mangled_name = mangled;
|
||||
}
|
||||
// Can't set mangled name for non-existent variable.
|
||||
assert(it != names.end());
|
||||
// Local names shouldn't need mangling.
|
||||
assert(it->second.vis == visibility::global);
|
||||
|
||||
it->second.mangled_name = mangled;
|
||||
}
|
||||
|
||||
const std::string& type_env::get_mangled_name(const std::string& name) const {
|
||||
auto it = names.find(name);
|
||||
if(it != names.end())
|
||||
return (it->second.mangled_name != "") ? it->second.mangled_name : name;
|
||||
if(parent) return parent->get_mangled_name(name);
|
||||
return name;
|
||||
if(it != names.end()) {
|
||||
assert(it->second.mangled_name);
|
||||
return *it->second.mangled_name;
|
||||
}
|
||||
assert(parent != nullptr);
|
||||
return parent->get_mangled_name(name);
|
||||
}
|
||||
|
||||
type_ptr type_env::lookup_type(const std::string& name) const {
|
||||
@@ -59,7 +63,7 @@ type_ptr type_env::lookup_type(const std::string& name) const {
|
||||
|
||||
void type_env::bind(const std::string& name, type_ptr t, visibility v) {
|
||||
type_scheme_ptr new_scheme(new type_scheme(std::move(t)));
|
||||
names[name] = variable_data(std::move(new_scheme), v, "");
|
||||
names[name] = variable_data(std::move(new_scheme), v, std::nullopt);
|
||||
}
|
||||
|
||||
void type_env::bind(const std::string& name, type_scheme_ptr t, visibility v) {
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <optional>
|
||||
#include "graph.hpp"
|
||||
#include "type.hpp"
|
||||
|
||||
@@ -10,39 +11,41 @@ using type_env_ptr = std::shared_ptr<type_env>;
|
||||
|
||||
enum class visibility { global,local };
|
||||
|
||||
struct type_env {
|
||||
struct variable_data {
|
||||
type_scheme_ptr type;
|
||||
visibility vis;
|
||||
std::string mangled_name;
|
||||
class type_env {
|
||||
private:
|
||||
struct variable_data {
|
||||
type_scheme_ptr type;
|
||||
visibility vis;
|
||||
std::optional<std::string> mangled_name;
|
||||
|
||||
variable_data()
|
||||
: variable_data(nullptr, visibility::local, "") {}
|
||||
variable_data(type_scheme_ptr t, visibility v, std::string n)
|
||||
: type(std::move(t)), vis(v), mangled_name(std::move(n)) {}
|
||||
};
|
||||
variable_data()
|
||||
: variable_data(nullptr, visibility::local, std::nullopt) {}
|
||||
variable_data(type_scheme_ptr t, visibility v, std::optional<std::string> n)
|
||||
: type(std::move(t)), vis(v), mangled_name(std::move(n)) {}
|
||||
};
|
||||
|
||||
type_env_ptr parent;
|
||||
std::map<std::string, variable_data> names;
|
||||
std::map<std::string, type_ptr> type_names;
|
||||
type_env_ptr parent;
|
||||
std::map<std::string, variable_data> names;
|
||||
std::map<std::string, type_ptr> type_names;
|
||||
|
||||
type_env(type_env_ptr p) : parent(std::move(p)) {}
|
||||
type_env() : type_env(nullptr) {}
|
||||
public:
|
||||
type_env(type_env_ptr p) : parent(std::move(p)) {}
|
||||
type_env() : type_env(nullptr) {}
|
||||
|
||||
void find_free(const type_mgr& mgr, std::set<std::string>& into) const;
|
||||
void find_free_except(const type_mgr& mgr, const group& avoid,
|
||||
std::set<std::string>& into) const;
|
||||
type_scheme_ptr lookup(const std::string& name) const;
|
||||
bool is_global(const std::string& name) const;
|
||||
void set_mangled_name(const std::string& name, const std::string& mangled);
|
||||
const std::string& get_mangled_name(const std::string& name) const;
|
||||
type_ptr lookup_type(const std::string& name) const;
|
||||
void bind(const std::string& name, type_ptr t,
|
||||
visibility v = visibility::local);
|
||||
void bind(const std::string& name, type_scheme_ptr t,
|
||||
visibility v = visibility::local);
|
||||
void bind_type(const std::string& type_name, type_ptr t);
|
||||
void generalize(const std::string& name, const group& grp, type_mgr& mgr);
|
||||
void find_free(const type_mgr& mgr, std::set<std::string>& into) const;
|
||||
void find_free_except(const type_mgr& mgr, const group& avoid,
|
||||
std::set<std::string>& into) const;
|
||||
type_scheme_ptr lookup(const std::string& name) const;
|
||||
bool is_global(const std::string& name) const;
|
||||
void set_mangled_name(const std::string& name, const std::string& mangled);
|
||||
const std::string& get_mangled_name(const std::string& name) const;
|
||||
type_ptr lookup_type(const std::string& name) const;
|
||||
void bind(const std::string& name, type_ptr t,
|
||||
visibility v = visibility::local);
|
||||
void bind(const std::string& name, type_scheme_ptr t,
|
||||
visibility v = visibility::local);
|
||||
void bind_type(const std::string& type_name, type_ptr t);
|
||||
void generalize(const std::string& name, const group& grp, type_mgr& mgr);
|
||||
};
|
||||
|
||||
|
||||
|
||||
102
code/typesafe-imperative/TypesafeImp.idr
Normal file
102
code/typesafe-imperative/TypesafeImp.idr
Normal file
@@ -0,0 +1,102 @@
|
||||
data Reg = A | B | R
|
||||
|
||||
data Ty = IntTy | BoolTy
|
||||
|
||||
TypeState : Type
|
||||
TypeState = (Ty, Ty, Ty)
|
||||
|
||||
getRegTy : Reg -> TypeState -> Ty
|
||||
getRegTy A (a, _, _) = a
|
||||
getRegTy B (_, b, _) = b
|
||||
getRegTy R (_, _, r) = r
|
||||
|
||||
setRegTy : Reg -> Ty -> TypeState -> TypeState
|
||||
setRegTy A a (_, b, r) = (a, b, r)
|
||||
setRegTy B b (a, _, r) = (a, b, r)
|
||||
setRegTy R r (a, b, _) = (a, b, r)
|
||||
|
||||
data Expr : TypeState -> Ty -> Type where
|
||||
Lit : Int -> Expr s IntTy
|
||||
Load : (r : Reg) -> Expr s (getRegTy r s)
|
||||
Add : Expr s IntTy -> Expr s IntTy -> Expr s IntTy
|
||||
Leq : Expr s IntTy -> Expr s IntTy -> Expr s BoolTy
|
||||
Not : Expr s BoolTy -> Expr s BoolTy
|
||||
|
||||
mutual
|
||||
data Stmt : TypeState -> TypeState -> TypeState -> Type where
|
||||
Store : (r : Reg) -> Expr s t -> Stmt l s (setRegTy r t s)
|
||||
If : Expr s BoolTy -> Prog l s n -> Prog l s n -> Stmt l s n
|
||||
Loop : Prog s s s -> Stmt l s s
|
||||
Break : Stmt s s s
|
||||
|
||||
data Prog : TypeState -> TypeState -> TypeState -> Type where
|
||||
Nil : Prog l s s
|
||||
(::) : Stmt l s n -> Prog l n m -> Prog l s m
|
||||
|
||||
initialState : TypeState
|
||||
initialState = (IntTy, IntTy, IntTy)
|
||||
|
||||
testProg : Prog Main.initialState Main.initialState Main.initialState
|
||||
testProg =
|
||||
[ Store A (Lit 1 `Leq` Lit 2)
|
||||
, If (Load A)
|
||||
[ Store A (Lit 1) ]
|
||||
[ Store A (Lit 2) ]
|
||||
, Store B (Lit 2)
|
||||
, Store R (Add (Load A) (Load B))
|
||||
]
|
||||
|
||||
prodProg : Prog Main.initialState Main.initialState Main.initialState
|
||||
prodProg =
|
||||
[ Store A (Lit 7)
|
||||
, Store B (Lit 9)
|
||||
, Store R (Lit 0)
|
||||
, Loop
|
||||
[ If (Load A `Leq` Lit 0)
|
||||
[ Break ]
|
||||
[ Store R (Load R `Add` Load B)
|
||||
, Store A (Load A `Add` Lit (-1))
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
repr : Ty -> Type
|
||||
repr IntTy = Int
|
||||
repr BoolTy = Bool
|
||||
|
||||
data State : TypeState -> Type where
|
||||
MkState : (repr a, repr b, repr c) -> State (a, b, c)
|
||||
|
||||
getReg : (r : Reg) -> State s -> repr (getRegTy r s)
|
||||
getReg A (MkState (a, _, _)) = a
|
||||
getReg B (MkState (_, b, _)) = b
|
||||
getReg R (MkState (_, _, r)) = r
|
||||
|
||||
setReg : (r : Reg) -> repr t -> State s -> State (setRegTy r t s)
|
||||
setReg A a (MkState (_, b, r)) = MkState (a, b, r)
|
||||
setReg B b (MkState (a, _, r)) = MkState (a, b, r)
|
||||
setReg R r (MkState (a, b, _)) = MkState (a, b, r)
|
||||
|
||||
expr : Expr s t -> State s -> repr t
|
||||
expr (Lit i) _ = i
|
||||
expr (Load r) s = getReg r s
|
||||
expr (Add l r) s = expr l s + expr r s
|
||||
expr (Leq l r) s = expr l s <= expr r s
|
||||
expr (Not e) s = not $ expr e s
|
||||
|
||||
mutual
|
||||
stmt : Stmt l s n -> State s -> Either (State l) (State n)
|
||||
stmt (Store r e) s = Right $ setReg r (expr e s) s
|
||||
stmt (If c t e) s = if expr c s then prog t s else prog e s
|
||||
stmt (Loop p) s =
|
||||
case prog p s >>= stmt (Loop p) of
|
||||
Right s => Right s
|
||||
Left s => Right s
|
||||
stmt Break s = Left s
|
||||
|
||||
prog : Prog l s n -> State s -> Either (State l) (State n)
|
||||
prog Nil s = Right s
|
||||
prog (st::p) s = stmt st s >>= prog p
|
||||
|
||||
run : Prog l s l -> State s -> State l
|
||||
run p s = either id id $ prog p s
|
||||
@@ -6,6 +6,15 @@ pygmentsCodeFences = true
|
||||
pygmentsUseClasses = true
|
||||
summaryLength = 20
|
||||
|
||||
[outputFormats]
|
||||
[outputFormats.Toml]
|
||||
name = "toml"
|
||||
mediaType = "application/toml"
|
||||
isHTML = false
|
||||
|
||||
[outputs]
|
||||
home = ["html","rss","toml"]
|
||||
|
||||
[markup]
|
||||
[markup.tableOfContents]
|
||||
endLevel = 4
|
||||
|
||||
350
content/blog/00_aoc_coq.md
Normal file
350
content/blog/00_aoc_coq.md
Normal file
@@ -0,0 +1,350 @@
|
||||
---
|
||||
title: "Advent of Code in Coq - Day 1"
|
||||
date: 2020-12-02T18:44:56-08:00
|
||||
tags: ["Advent of Code", "Coq"]
|
||||
---
|
||||
|
||||
The first puzzle of this year's [Advent of Code](https://adventofcode.com) was quite
|
||||
simple, which gave me a thought: "Hey, this feels within reach for me to formally verify!"
|
||||
At first, I wanted to formalize and prove the correctness of the [two-pointer solution](https://www.geeksforgeeks.org/two-pointers-technique/).
|
||||
However, I didn't have the time to mess around with the various properties of sorted
|
||||
lists and their traversals. So, I settled for the brute force solution. Despite
|
||||
the simplicity of its implementation, there is plenty to talk about when proving
|
||||
its correctness using Coq. Let's get right into it!
|
||||
|
||||
Before we start, in the interest of keeping the post self-contained, here's the (paraphrased)
|
||||
problem statement:
|
||||
|
||||
> Given an unsorted list of numbers, find two distinct numbers that add up to 2020.
|
||||
|
||||
With this in mind, we can move on to writing some Coq!
|
||||
|
||||
### Defining the Functions
|
||||
The first step to proving our code correct is to actually write the code! To start with,
|
||||
let's write a helper function that, given a number `x`, tries to find another number
|
||||
`y` such that `x + y = 2020`. In fact, rather than hardcoding the desired
|
||||
sum to `2020`, let's just use another argument called `total`. The code is quite simple:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 7 14 >}}
|
||||
|
||||
Here, `is` is the list of numbers that we want to search.
|
||||
We proceed by case analysis: if the list is empty, we can't
|
||||
find a match, so we return `None` (the Coq equivalent of Haskell's `Nothing`).
|
||||
On the other hand, if the list has at least one element `y`, we see if it adds
|
||||
up to `total`, and return `Some y` (equivalent to `Just y` in Haskell) if it does.
|
||||
If it doesn't, we continue our search into the rest of the list.
|
||||
|
||||
It's somewhat unusual, in my experience, to put the list argument first when writing
|
||||
functions in a language with [currying](https://wiki.haskell.org/Currying). However,
|
||||
it seems as though Coq's `simpl` tactic, which we will use later, works better
|
||||
for our purposes when the argument being case analyzed is given first.
|
||||
|
||||
We can now use `find_matching` to define our `find_sum` function, which solves part 1.
|
||||
Here's the code:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 16 24 >}}
|
||||
|
||||
For every `x` that we encounter in our input list `is`, we want to check if there's
|
||||
a matching number in the rest of the list. We only search the remainder of the list
|
||||
because we can't use `x` twice: the `x` and `y` we return that add up to `total`
|
||||
must be different elements. We use `find_matching` to try find a complementary number
|
||||
for `x`. If we don't find it, this `x` isn't it, so we recursively move on to `xs`.
|
||||
On the other hand, if we _do_ find a matching `y`, we're done! We return `(x,y)`,
|
||||
wrapped in `Some` to indicate that we found something useful.
|
||||
|
||||
What about that `(* Was buggy! *)` line? Well, it so happens that my initial
|
||||
implementation had a bug on this line, one that came up as I was proving
|
||||
the correctness of my function. When I wasn't able to prove a particular
|
||||
behavior in one of the cases, I realized something was wrong. In short,
|
||||
my proof actually helped me find and fix a bug!
|
||||
|
||||
This is all the code we'll need to get our solution. Next, let's talk about some
|
||||
properties of our two functions.
|
||||
|
||||
### Our First Lemma
|
||||
When we call `find_matching`, we want to be sure that if we get a number,
|
||||
it does indeed add up to our expected total. We can state it a little bit more
|
||||
formally as follows:
|
||||
|
||||
> For any numbers `k` and `x`, and for any list of number `is`,
|
||||
> if `find_matching is k x` returns a number `y`, then `x + y = k`.
|
||||
|
||||
And this is how we write it in Coq:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 26 27 >}}
|
||||
|
||||
The arrow, `->`, reads "implies". Other than that, I think this
|
||||
property reads pretty well. The proof, unfortunately, is a little bit more involved.
|
||||
Here are the first few lines:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 28 31 >}}
|
||||
|
||||
We start with the `intros is` tactic, which is akin to saying
|
||||
"consider a particular list of integers `is`". We do this without losing
|
||||
generality: by simply examining a concrete list, we've said nothing about
|
||||
what that list is like. We then proceed by induction on `is`.
|
||||
|
||||
To prove something by induction for a list, we need to prove two things:
|
||||
|
||||
* The __base case__. Whatever property we want to hold, it must
|
||||
hold for the empty list, which is the simplest possible list.
|
||||
In our case, this means `find_matching` searching an empty list.
|
||||
* The __inductive case__. Assuming that a property holds for any list
|
||||
`[b, c, ...]`, we want to show that the property also holds for
|
||||
the list `[a, b, c, ...]`. That is, the property must remain true if we
|
||||
prepend an element to a list for which this property holds.
|
||||
|
||||
These two things combined give us a proof for _all_ lists, which is exactly
|
||||
what we want! If you don't belive me, here's how it works. Suppose you want
|
||||
to prove that some property `P` holds for `[1,2,3,4]`. Given the base
|
||||
case, we know that `P []` holds. Next, by the inductive case, since
|
||||
`P []` holds, we can prepend `4` to the list, and the property will
|
||||
still hold. Thus, `P [4]`. Now that `P [4]` holds, we can again prepend
|
||||
an element to the list, this time a `3`, and conclude that `P [3,4]`.
|
||||
Repeating this twice more, we arrive at our desired fact: `P [1,2,3,4]`.
|
||||
|
||||
When we write `induction is`, Coq will generate two proof goals for us,
|
||||
one for the base case, and one for the inductive case. We will have to prove
|
||||
each of them separately. Since we have
|
||||
not yet introduced the variables `k`, `x`, and `y`, they remain
|
||||
inside a `forall` quantifier at that time. To be able to refer
|
||||
to them, we want to use `intros`. We want to do this in both the
|
||||
base and the inductive case. To quickly do this, we use Coq's `;`
|
||||
operator. When we write `a; b`, Coq runs the tactic `a`, and then
|
||||
runs the tactic `b` in every proof goal generated by `a`. This is
|
||||
exactly what we want.
|
||||
|
||||
There's one more variable inside our second `intros`: `Hev`.
|
||||
This variable refers to the hypothesis of our statement:
|
||||
that is, the part on the left of the `->`. To prove that `A`
|
||||
implies `B`, we assume that `A` holds, and try to argue `B` from there.
|
||||
Here is no different: when we use `intros Hev`, we say, "suppose that you have
|
||||
a proof that `find_matching` evaluates to `Some y`, called `Hev`". The thing
|
||||
on the right of `->` becomes our proof goal.
|
||||
|
||||
Now, it's time to look at the cases. To focus on one case at a time,
|
||||
we use `-`. The first case is our base case. Here's what Coq prints
|
||||
out at this time:
|
||||
|
||||
```
|
||||
k, x, y : nat
|
||||
Hev : find_matching nil k x = Some y
|
||||
|
||||
========================= (1 / 1)
|
||||
|
||||
x + y = k
|
||||
```
|
||||
|
||||
All the stuff above the `===` line are our hypotheses. We know
|
||||
that we have some `k`, `x`, and `y`, all of which are numbers.
|
||||
We also have the assumption that `find_matching` returns `Some y`.
|
||||
In the base case, `is` is just `[]`, and this is reflected in the
|
||||
type for `Hev`. To make this more clear, we'll simplify the call to `find_matching`
|
||||
in `Hev`, using `simpl in Hev`. Now, here's what Coq has to say about `Hev`:
|
||||
|
||||
```
|
||||
Hev : None = Some y
|
||||
```
|
||||
|
||||
Well, this doesn't make any sense. How can something be equal to nothing?
|
||||
We ask Coq this question using `inversion Hev`. Effectively, the question
|
||||
that `inversion` asks is: what are the possible ways we could have acquired `Hev`?
|
||||
Coq generates a proof goal for each of these possible ways. Alas, there are
|
||||
no ways to arrive at this contradictory assumption: the number of proof sub-goals
|
||||
is zero. This means we're done with the base case!
|
||||
|
||||
The inductive case is the meat of this proof. Here's the corresponding part
|
||||
of the Coq source file:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 32 36 >}}
|
||||
|
||||
This time, the proof state is more complicated:
|
||||
|
||||
```
|
||||
a : nat
|
||||
is : list nat
|
||||
IHis : forall k x y : nat, find_matching is k x = Some y -> x + y = k
|
||||
k, x, y : nat
|
||||
Hev : find_matching (a :: is) k x = Some y
|
||||
|
||||
========================= (1 / 1)
|
||||
|
||||
x + y = k
|
||||
```
|
||||
|
||||
Following the footsteps of our informal description of the inductive case,
|
||||
Coq has us prove our property for `(a :: is)`, or the list `is` to which
|
||||
`a` is being prepended. Like before, we assume that our property holds for `is`.
|
||||
This is represented in the __induction hypothesis__ `IHis`. It states that if
|
||||
`find_matching` finds a `y` in `is`, it must add up to `k`. However, `IHis`
|
||||
doesn't tell us anything about `a :: is`: that's our job. We also still have
|
||||
`Hev`, which is our assumption that `find_matching` finds a `y` in `(a :: is)`.
|
||||
Running `simpl in Hev` gives us the following:
|
||||
|
||||
```
|
||||
Hev : (if x + a =? k then Some a else find_matching is k x) = Some y
|
||||
```
|
||||
|
||||
The result of `find_matching` now depends on whether or not the new element `a`
|
||||
adds up to `k`. If it does, then `find_matching` will return `a`, which means
|
||||
that `y` is the same as `a`. If not, it must be that `find_matching` finds
|
||||
the `y` in the rest of the list, `is`. We're not sure which of the possibilities
|
||||
is the case. Fortunately, we don't need to be!
|
||||
If we can prove that the `y` that `find_matching` finds is correct regardless
|
||||
of whether `a` adds up to `k` or not, we're good to go! To do this,
|
||||
we perform case analysis using `destruct`.
|
||||
|
||||
Our particular use of `destruct` says: check any possible value for `x + a ?= k`,
|
||||
and create an equation `Heq` that tells us what that value is. `?=` returns a boolean
|
||||
value, and so `destruct` generates two new goals: one where the function returns `true`,
|
||||
and one where it returns `false`. We start with the former. Here's the proof state:
|
||||
|
||||
```
|
||||
a : nat
|
||||
is : list nat
|
||||
IHis : forall k x y : nat, find_matching is k x = Some y -> x + y = k
|
||||
k, x, y : nat
|
||||
Heq : (x + a =? k) = true
|
||||
Hev : Some a = Some y
|
||||
|
||||
========================= (1 / 1)
|
||||
|
||||
x + y = k
|
||||
```
|
||||
|
||||
There is a new hypothesis: `Heq`. It tells us that we're currently
|
||||
considering the case where `?=` evaluates to `true`. Also,
|
||||
`Hev` has been considerably simplified: now that we know the condition
|
||||
of the `if` expression, we can just replace it with the `then` branch.
|
||||
|
||||
Looking at `Hev`, we can see that our prediction was right: `a` is equal to `y`. After all,
|
||||
if they weren't, `Some a` wouldn't equal to `Some y`. To make Coq
|
||||
take this information into account, we use `injection`. This will create
|
||||
a new hypothesis, `a = y`. But if one is equal to the other, why don't we
|
||||
just use only one of these variables everywhere? We do exactly that by using
|
||||
`subst`, which replaces `a` with `y` everywhere in our proof.
|
||||
|
||||
The proof state is now:
|
||||
|
||||
```
|
||||
is : list nat
|
||||
IHis : forall k x y : nat, find_matching is k x = Some y -> x + y = k
|
||||
k, x, y : nat
|
||||
Heq : (x + y =? k) = true
|
||||
|
||||
========================= (1 / 1)
|
||||
|
||||
x + y = k
|
||||
```
|
||||
|
||||
We're close, but there's one more detail to keep in mind. Our goal, `x + y = k`,
|
||||
is the __proposition__ that `x + y` is equal to `k`. However, `Heq` tells us
|
||||
that the __function__ `?=` evaluates to `true`. These are fundamentally different.
|
||||
One talks about mathematical equality, while the other about some function `?=`
|
||||
defined somewhere in Coq's standard library. Who knows - maybe there's a bug in
|
||||
Coq's implementation! Fortunately, Coq comes with a proof that if two numbers
|
||||
are equal according to `?=`, they are mathematically equal. This proof is
|
||||
called `eqb_nat_eq`. We tell Coq to use this with `apply`. Our proof goal changes to:
|
||||
|
||||
```
|
||||
true = (x + y =? k)
|
||||
```
|
||||
|
||||
This is _almost_ like `Heq`, but flipped. Instead of manually flipping it and using `apply`
|
||||
with `Heq`, I let Coq do the rest of the work using `auto`.
|
||||
|
||||
Phew! All this for the `true` case of `?=`. Next, what happens if `x + a` does not equal `k`?
|
||||
Here's the proof state at this time:
|
||||
|
||||
```
|
||||
a : nat
|
||||
is : list nat
|
||||
IHis : forall k x y : nat, find_matching is k x = Some y -> x + y = k
|
||||
k, x, y : nat
|
||||
Heq : (x + a =? k) = false
|
||||
Hev : find_matching is k x = Some y
|
||||
|
||||
========================= (1 / 1)
|
||||
|
||||
x + y = k
|
||||
```
|
||||
|
||||
Since `a` was not what it was looking for, `find_matching` moved on to `is`. But hey,
|
||||
we're in the inductive case! We are assuming that `find_matching` will work properly
|
||||
with the list `is`. Since `find_matching` found its `y` in `is`, this should be all we need!
|
||||
We use our induction hypothesis `IHis` with `apply`. `IHis` itself does not know that
|
||||
`find_matching` moved on to `is`, so it asks us to prove it. Fortunately, `Hev` tells us
|
||||
exactly that, so we use `assumption`, and the proof is complete! Quod erat demonstrandum, QED!
|
||||
|
||||
### The Rest of the Owl
|
||||
Here are a couple of other properties of `find_matching`. For brevity's sake, I will
|
||||
not go through their proofs step-by-step. I find that the best way to understand
|
||||
Coq proofs is to actually step through them in the IDE!
|
||||
|
||||
First on the list is `find_matching_skip`. Here's the type:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 38 39 >}}
|
||||
|
||||
It reads: if we correctly find a number in a small list `is`, we can find that same number
|
||||
even if another number is prepended to `is`. That makes sense: _adding_ a number to
|
||||
a list doesn't remove whatever we found in it! I used this lemma to prove another,
|
||||
`find_matching_works`:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 49 50 >}}
|
||||
|
||||
This reads, if there _is_ an element `y` in `is` that adds up to `k` with `x`, then
|
||||
`find_matching` will find it. This is an important property. After all, if it didn't
|
||||
hold, it would mean that `find_matching` would occasionally fail to find a matching
|
||||
number, even though it's there! We can't have that.
|
||||
|
||||
Finally, we want to specify what it means for `find_sum`, our solution function, to actually
|
||||
work. The naive definition would be:
|
||||
|
||||
> Given a list of integers, `find_sum` always finds a pair of numbers that add up to `k`.
|
||||
|
||||
Unfortunately, this is not true. What if, for instance, we give `find_sum` an empty list?
|
||||
There are no numbers from that list to find and add together. Even a non-empty list
|
||||
may not include such a pair! We need a way to characterize valid input lists. I claim
|
||||
that all lists from this Advent of Code puzzle are guaranteed to have two numbers that
|
||||
add up to our goal, and that these numbers are not equal to each other. In Coq,
|
||||
we state this as follows:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 4 5 >}}
|
||||
|
||||
This defines a new property, `has_pair t is` (read "`is` has a pair of numbers that add to `t`"),
|
||||
which means:
|
||||
|
||||
> There are two numbers `n1` and `n2` such that, they are not equal to each other (`n1<>n2`) __and__
|
||||
> the number `n1` is an element of `is` (`In n1 is`) __and__
|
||||
> the number `n2` is an element of `is` (`In n2 is`) __and__
|
||||
> the two numbers add up to `t` (`n1 + n2 = t`).
|
||||
|
||||
When making claims about the correctness of our algorithm, we will assume that this
|
||||
property holds. Finally, here's the theorem we want to prove:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 64 66 >}}
|
||||
|
||||
It reads, "for any total `k` and list `is`, if `is` has a pair of numbers that add to `k`,
|
||||
then `find_sum` will return a pair of numbers `x` and `y` that add to `k`".
|
||||
There's some nuance here. We hardly reference the `has_pair` property in this definition,
|
||||
and for good reason. Our `has_pair` hypothesis only says that there is _at least one_
|
||||
pair of numbers in `is` that meets our criteria. However, this pair need not be the only
|
||||
one, nor does it need to be the one returned by `find_sum`! However, if we have many pairs,
|
||||
we want to confirm that `find_sum` will find one of them. Finally, here is the proof.
|
||||
I will not be able to go through it in detail in this post, but I did comment it to
|
||||
make it easier to read:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 67 102 >}}
|
||||
|
||||
Coq seems happy with it, and so am I! The bug I mentioned earlier popped up on line 96.
|
||||
I had accidentally made `find_sum` return `None` if it couldn't find a complement
|
||||
for the `x` it encountered. This meant that it never recursed into the remaining
|
||||
list `xs`, and thus, the pair was never found at all! It this became impossible
|
||||
to prove that `find_some` will return `Some y`, and I had to double back
|
||||
and check my definitions.
|
||||
|
||||
I hope you enjoyed this post! If you're interested to learn more about Coq, I strongly recommend
|
||||
checking out [Software Foundations](https://softwarefoundations.cis.upenn.edu/), a series
|
||||
of books on Coq written as comments in a Coq source file! In particular, check out
|
||||
[Logical Foundations](https://softwarefoundations.cis.upenn.edu/lf-current/index.html)
|
||||
for an introduction to using Coq. Thanks for reading!
|
||||
@@ -145,4 +145,5 @@ Here are the posts that I've written so far for this series:
|
||||
* [Polymorphism]({{< relref "10_compiler_polymorphism.md" >}})
|
||||
* [Polymorphic Data Types]({{< relref "11_compiler_polymorphic_data_types.md" >}})
|
||||
* [Let/In and Lambdas]({{< relref "12_compiler_let_in_lambda/index.md" >}})
|
||||
* [Cleanup]({{< relref "13_compiler_cleanup/index.md" >}})
|
||||
|
||||
|
||||
@@ -984,5 +984,6 @@ Before either of those things, though, I think that I want to go through
|
||||
the compiler and perform another round of improvements, similarly to
|
||||
[part 4]({{< relref "04_compiler_improvements" >}}). It's hard to do a lot
|
||||
of refactoring while covering new content, since major changes need to
|
||||
be explained and presented for the post to make sense. I hope to see
|
||||
you in these future posts!
|
||||
be explained and presented for the post to make sense.
|
||||
I do this in [part 13]({{< relref "13_compiler_cleanup/index.md" >}}) - cleanup.
|
||||
I hope to see you there!
|
||||
|
||||
964
content/blog/13_compiler_cleanup/index.md
Normal file
964
content/blog/13_compiler_cleanup/index.md
Normal file
@@ -0,0 +1,964 @@
|
||||
---
|
||||
title: Compiling a Functional Language Using C++, Part 13 - Cleanup
|
||||
date: 2020-09-19T16:14:13-07:00
|
||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||
description: "In this post, we clean up our compiler."
|
||||
---
|
||||
|
||||
In [part 12]({{< relref "12_compiler_let_in_lambda" >}}), we added `let/in`
|
||||
and lambda expressions to our compiler. At the end of that post, I mentioned
|
||||
that before we move on to bigger and better things, I wanted to take a
|
||||
step back and clean up the compiler. Now is the time to do that.
|
||||
|
||||
In particular, I identified four things that could be improved
|
||||
or cleaned up:
|
||||
|
||||
* __Error handling__. We need to stop using `throw 0` and start
|
||||
using `assert`. We can also make our errors much more descriptive
|
||||
by including source locations in the output.
|
||||
* __Name mangling__. I don't think I got it quite right last
|
||||
time. Now is the time to clean it up.
|
||||
* __Code organization__. I think we can benefit from a top-level
|
||||
class, and a more clear "dependency order" between the various
|
||||
classes and structures we've defined.
|
||||
* __Code style__. In particular, I've been lazily using `struct`
|
||||
in a lot of places. That's not a good idea; it's better
|
||||
to use `class`, and only expose _some_ fields and methods
|
||||
to the rest of the code.
|
||||
|
||||
### Error Reporting and Handling
|
||||
The previous post was rather long, which led me to omit
|
||||
a rather important aspect of the compiler: proper error reporting.
|
||||
Once again our compiler has instances of `throw 0`, which is a cheap way
|
||||
of avoiding properly handling a runtime error. Before we move on,
|
||||
it's best to get rid of such blatantly lazy code.
|
||||
|
||||
Our existing exceptions (mostly type errors) can use some work, too.
|
||||
Even the most descriptive issues our compiler reports -- unification errors --
|
||||
don't include the crucial information of _where_ the error is. For large
|
||||
programs, this means having to painstakingly read through the entire file
|
||||
to try figure out which subexpression could possibly have an incorrect type.
|
||||
This is far from the ideal debugging experience.
|
||||
|
||||
Addressing all this is a multi-step change in itself. We want to:
|
||||
|
||||
* Replace all `throw 0` code with actual exceptions.
|
||||
* Replace some exceptions that shouldn't be possible for a user to trigger
|
||||
with assertions.
|
||||
* Keep track of source locations of each subexpression, so that we may
|
||||
be able to print it if it causes an error.
|
||||
* Be able to print out said source locations at will. This isn't
|
||||
a _necessity_, but virtually all "big" compilers do this. Instead
|
||||
of reporting that an error occurs on a particular line, we will
|
||||
actually print the line.
|
||||
|
||||
Let's start with gathering the actual location data.
|
||||
|
||||
#### Bison's Locations
|
||||
Bison actually has some rather nice support for location tracking. It can
|
||||
automatically assemble the "from" and "to" locations of a nonterminal
|
||||
from the locations of children, which would be very tedious to write
|
||||
by hand. We enable this feature using the following option:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parser.y" 46 46 >}}
|
||||
|
||||
There's just one hitch, though. Sure, Bison can compute bigger
|
||||
locations from smaller ones, but it must get the smaller ones
|
||||
from somewhere. Since Bison operates on _tokens_, rather
|
||||
than _characters_, it effectively doesn't interact with the source
|
||||
text at all, and can't determine from which line or column a token
|
||||
originated. The task of determining the locations of input tokens
|
||||
is delegated to the tokenizer -- Flex, in our case. Flex, on the
|
||||
other hand, doesn't have a built-in mechanism for tracking
|
||||
locations. Fortunately, Bison provides a `yy::location` class that
|
||||
includes most of the needed functionality.
|
||||
|
||||
A `yy::location` consists of two source positions, `begin` and `end`,
|
||||
which themselves are represented using lines and columns. It
|
||||
also has the following methods:
|
||||
|
||||
* `yy::location::columns(int)` advances the `end` position by
|
||||
the given number of columns, while `begin` stays the same.
|
||||
If `begin` and `end` both point to the beginning of a token,
|
||||
then `columns(token_length)` will move `end` to the token's end,
|
||||
and thus make the whole `location` contain the token.
|
||||
* `yy::location::lines(int)` behaves similarly to `columns`,
|
||||
except that it advances `end` by the given number of lines,
|
||||
rather than columns. It also resets the columns counter to `1`.
|
||||
* `yy::location::step()` moves `begin` to where `end` is. This
|
||||
is useful for when we've finished processing a token, and want
|
||||
to move on to the next one.
|
||||
|
||||
For Flex specifically, `yyleng` has the length of the token
|
||||
currently being processed. Rather than adding the calls
|
||||
to `columns` and `step` to every rule, we can define the
|
||||
`YY_USER_ACTION` macro, which is run before each token
|
||||
is processed.
|
||||
|
||||
{{< codelines "C++" "compiler/13/scanner.l" 12 14 >}}
|
||||
|
||||
We'll see why we are using `LOC` instead of something like `location` soon;
|
||||
for now, you can treat `LOC` as if it were a global variable declared
|
||||
in the tokenizer. Before processing each token, we ensure that
|
||||
the `yy::location` has its `begin` and `end` at the same position,
|
||||
and then advance `end` by `yyleng` columns. This is
|
||||
{{< sidenote "right" "sufficient-note" "sufficient" >}}
|
||||
This doesn't hold for all languages. It may be possible for a language
|
||||
to have tokens that contain <code>\n</code>, in which case,
|
||||
rather than just using <code>yyleng</code>, we'd need to
|
||||
add special logic to iterate over the token and detect the line
|
||||
breaks.<br>
|
||||
<br>
|
||||
Also, this requires that the <code>end</code> of the previous token was
|
||||
correctly computed.
|
||||
{{< /sidenote >}}
|
||||
to make `LOC` represent our token's source position. For
|
||||
the moment, don't worry too much about `drv`; this is the
|
||||
parsing driver, and we will talk about it shortly.
|
||||
|
||||
So now we have a "global" variable `LOC` that gives
|
||||
us the source position of the current token. To get it
|
||||
to Bison, we have to pass it as an argument to each
|
||||
of the `make_TOKEN` calls. Here are a few sample lines
|
||||
that should give you the general idea:
|
||||
|
||||
{{< codelines "C++" "compiler/13/scanner.l" 40 43 >}}
|
||||
|
||||
That last line is actually new. Previously, we somehow
|
||||
got away without explicitly sending the end-of-file token to Bison.
|
||||
I suspect that this was due to some kind of implicit conversion
|
||||
of the Flex macro `YY_NULL` into a token; now that we have
|
||||
to pass a position to every token constructor, such an implicit
|
||||
conversion is probably impossible.
|
||||
|
||||
Now we have Bison computing source locations for each nonterminal.
|
||||
However, at the moment, we still aren't using them. To change that,
|
||||
we need to add a `yy::location` argument to each of our `ast` nodes,
|
||||
as well as to the `pattern` subclasses, `definition_defn` and
|
||||
`definition_data`. To avoid breaking all the code that creates
|
||||
AST nodes and definitions outside of the parser, we'll make this
|
||||
argument optional. Inside of `ast.hpp`, we define a new field as follows:
|
||||
|
||||
{{< codelines "C++" "compiler/13/ast.hpp" 16 16 >}}
|
||||
|
||||
Then, we add a constructor to `ast` as follows:
|
||||
|
||||
{{< codelines "C++" "compiler/13/ast.hpp" 18 18 >}}
|
||||
|
||||
Note that it's not optional here, since `ast` itself is an
|
||||
abstract class, and thus will never be constructed directly.
|
||||
It is in the subclasses of `ast` that we provide a default
|
||||
value. The change is rather mechanical, but here's an example
|
||||
from `ast_binop`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/ast.hpp" 98 99 >}}
|
||||
|
||||
Finally, we tell Bison to pass the computed location
|
||||
data as an argument when constructing our data structures.
|
||||
This too is a mechanical change, and I think the following
|
||||
few lines demonstrate the general idea in sufficient
|
||||
detail:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parser.y" 92 96 >}}
|
||||
|
||||
Here, the `@$` character is used to reference the current
|
||||
nonterminal's location data.
|
||||
|
||||
#### Line Offsets, File Input, and the Parsing Driver
|
||||
There are three more challenges with printing out the line
|
||||
of code where an error occurred. First of all, to
|
||||
print out a line of code, we need to have that line of code
|
||||
available to us. We do not currently meet this requirement:
|
||||
our compiler reads code form `stdin` (as is default for Flex),
|
||||
and `stdin` doesn't always support rewinding. This, in turn,
|
||||
means that once Flex has read a character from the input,
|
||||
it may not be possible to go back and retrieve that character
|
||||
again.
|
||||
|
||||
Second, even if we do have have the entire stream or buffer
|
||||
available to us, to retrieve an offset and length within
|
||||
that buffer from just a line and column number would be a lot
|
||||
of work. A naive approach would be to iterate through
|
||||
the input again, once more keeping track of lines and columns,
|
||||
and print the desired line once we reach it. However, this
|
||||
would lead us to redo a lot of work that our tokenizer
|
||||
is already doing.
|
||||
|
||||
Third, Flex's input mechanism, even if it it's configured
|
||||
not to read from `stdin`, uses a global file descriptor called
|
||||
`yyin`. However, we're better off minimizing global state (especially
|
||||
if we want to read, parse, and compile multiple files in
|
||||
the future). While we're configuring Flex's input mechanism,
|
||||
we may as well fix this, too.
|
||||
|
||||
There are several approaches to fixing the first issue. One possible
|
||||
way is to store the content of `stdin` into a temporary file. Then,
|
||||
it's possible to read from the file multiple times by using
|
||||
the C functions `fseek` and `rewind`. However, since we're
|
||||
working with files, why not just work directly with the files
|
||||
created by the user? Instead of reading from `stdin`, we may
|
||||
as well take in a path to a file via `argv`, and read from there.
|
||||
Also, instead of `fseek` and `rewind`, we can just read the file
|
||||
into memory, and access it like a normal character buffer. This
|
||||
does mean that we can stick with `stdin`, but it's more conventional
|
||||
to read source code from files, anyway.
|
||||
|
||||
To address the second issue, we can keep a mapping of line numbers
|
||||
to their locations in the source buffer. This is rather easy to
|
||||
maintain using an array: the first element of the array is 0,
|
||||
which is the beginning of the first line in any source file. From there,
|
||||
every time we encounter the character `\n`, we can push
|
||||
the current source location to the top, marking it as
|
||||
the beginning of another line. Where exactly we store this
|
||||
array is as yet unclear, since we're trying to avoid global variables.
|
||||
|
||||
Finally, to begin addressing the third issue, we can use Flex's `reentrant`
|
||||
option, which makes it so that all of the tokenizer's state is stored in an
|
||||
opaque `yyscan_t` structure, rather than in global variables. This way,
|
||||
we can configure `yyin` without setting a global variable, which is a step
|
||||
in the right direction. We'll work on this momentarily.
|
||||
|
||||
Our tokenizing and parsing stack has more global variables
|
||||
than just those specific to Flex. Among these variables is `global_defs`,
|
||||
which receives all the top-level function and data type definitions. We
|
||||
will also need some way of accessing the `yy::location` instance, and
|
||||
a way of storing our file input in memory. Fortunately, we're not
|
||||
the only ones to have ever come across the issue of creating non-global
|
||||
state: the Bison documentation has a
|
||||
[section in its C++ guide](https://www.gnu.org/software/bison/manual/html_node/Calc_002b_002b-Parsing-Driver.html)
|
||||
that describes a technique for manipulating
|
||||
state -- "parsing context", in their words. This technique involves the
|
||||
creation of a _parsing driver_.
|
||||
|
||||
The parsing driver is a class (or struct) that holds all the parse-related
|
||||
state. We can arrange for this class to be available to our tokenizing
|
||||
and parsing functions, which will allow us to use it pretty much like we'd
|
||||
use a global variable. This is the `drv` that we saw in `YY_USER_ACTION`.
|
||||
We can define it as follows:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.hpp" 36 54 >}}
|
||||
|
||||
There aren't many fields here. The `file_name` string represents
|
||||
the file that we'll be reading code from. The `location` field
|
||||
will be accessed by Flex via `get_current_location`. Bison will
|
||||
store the function and data type definitions it reads into `global_defs`
|
||||
via `get_global_defs`. Finally, `file_m` will be used to keep track
|
||||
of the content of the file we're reading, as well as the line offsets
|
||||
within that file. Notice that a couple of these fields are pointers
|
||||
that we take by reference in the constructor. The `parse_driver` doesn't
|
||||
_own_ the global definitions, nor the file manager. They exist outside
|
||||
of it, and will continue to be used in other ways the `parse_driver`
|
||||
does not need to know about. Also, the `LOC` variable in Flex is
|
||||
actually a call to `get_current_location`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/scanner.l" 15 15 >}}
|
||||
|
||||
The methods of `parse_driver` are rather simple. The majority of
|
||||
them deals with giving access to the parser's members: the `yy::location`,
|
||||
the `definition_group`, and the `file_mgr`. The only exception
|
||||
to this is `operator()`, which we use to actually trigger the parsing process.
|
||||
We'll make this method return `true` if parsing succeeded, and `false`
|
||||
otherwise (if, say, the file we tried to read doesn't exist).
|
||||
Here's its implementation:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 48 60 >}}
|
||||
|
||||
We try open the user-specified file, and return `false` if we can't.
|
||||
After this, we start doing the setup specific to a reentrant
|
||||
Flex scanner. We declare a `yyscan_t` variable, which
|
||||
will contain all of Flex's state. Then, we initialize
|
||||
it using `yylex_init`. Finally, since we can no longer
|
||||
touch the `yyin` global variable (it doesn't exist),
|
||||
we have to resort to using a setter function provided by Flex
|
||||
to configure the tokenizer's input stream.
|
||||
|
||||
Next, we construct our Bison-generated parser. Note that
|
||||
unlike before, we have to pass in two arguments:
|
||||
`scanner` and `*this`, the latter being of type `parse_driver&`.
|
||||
We'll come back to how this works in a moment. With
|
||||
the scanner and parser initialized, we invoke `parser::operator()`,
|
||||
which actually runs the Flex- and Bison-generated code.
|
||||
To clean up, we run `yylex_destroy` and `fclose`. Finally,
|
||||
we call `file_mgr::finalize`, and return. But what
|
||||
_is_ `file_mgr`?
|
||||
|
||||
The `file_mgr` class does two things: it stores the part of the file
|
||||
that has already been read by Flex in memory, and it keeps track of
|
||||
where each line in our source file begins within the text. Here is its
|
||||
definition:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.hpp" 14 34 >}}
|
||||
|
||||
In this class, the `string_stream` member is used to construct
|
||||
an `std::string` from the bits of text that Flex reads,
|
||||
processes, and feeds to the `file_mgr` using the `write` method.
|
||||
It's more efficient to use a string stream than to concatenate
|
||||
strings repeatedly. Once Flex is finished processing the file,
|
||||
the final contents of the `string_stream` are transferred into
|
||||
the `file_contents` string using the `finalize` method. The `offset`
|
||||
and `line_offsets` fields will be used as we described earlier: each time Flex
|
||||
encounters the `\n` character, the `offset` variable will pushed
|
||||
in top of the `line_offsets` vector, marking the beginning of
|
||||
the corresponding line. The methods of the class are as follows:
|
||||
|
||||
* `write` will be called from Flex, and will allow us to
|
||||
record the content of the file we're processing to the `string_stream`.
|
||||
We've already seen it used in the `YY_USER_ACTION` macro.
|
||||
* `mark_line` will also be called from Flex, and will mark the current
|
||||
`file_offset` as the beginning of a line by pushing it into `line_offsets`.
|
||||
* `finalize` will be called by the `parse_driver` when the parsing
|
||||
finishes. At this time, the `string_stream` should contain all of
|
||||
the input file, and this data is transferred to `file_contents`, as
|
||||
we mentioned above.
|
||||
* `get_index` and `get_line_end` will be used for converting
|
||||
`yy::location` instances to offsets within the source code buffer.
|
||||
* `print_location` will be used for printing errors.
|
||||
It will print the lines spanned by the given location, with the
|
||||
location itself colored and underlined if the last argument is `true`.
|
||||
This will make our errors easier on the eyes.
|
||||
|
||||
Let's take a look at their implementations. First, `write`.
|
||||
For the most part, this method is a proxy for the `write`
|
||||
method of our `string_stream`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 9 12 >}}
|
||||
|
||||
We do, however, also keep track of the `file_offset` variable
|
||||
here, which ensures we have up-to-date information
|
||||
regarding our position in the source file. The implementation
|
||||
of `mark_line` uses this information:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 14 16 >}}
|
||||
|
||||
The `finalize` method is trivial, and requires little additional
|
||||
discussion:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 18 20 >}}
|
||||
|
||||
Once we have the line offsets, `get_index` becomes very simple:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 22 25 >}}
|
||||
|
||||
Here, we use an assertion for the first time. Calling
|
||||
`get_index` with a negative or zero line doesn't make
|
||||
any sense, since Bison starts tracking line numbers
|
||||
at 1. Similarly, asking for a line for which we don't
|
||||
have a recorded offset is invalid. Both
|
||||
of these nonsensical calls to `get_index` cannot
|
||||
be caused by the user under normal circumstances,
|
||||
and indicate the method's misuse by the author of
|
||||
the compiler (us!). Thus, we terminate the program.
|
||||
|
||||
Finally, the implementation of `line_end` just finds the
|
||||
beginning of the next line. We stick to the C convention
|
||||
of marking 'end' indices exclusive (pointing just past
|
||||
the end of the array):
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 27 30 >}}
|
||||
|
||||
Since `line_offsets` has as many elements as there are lines,
|
||||
the last line number would be equal to the vector's size.
|
||||
When looking up the end of the last line, we can't look for
|
||||
the beginning of the next line, so instead we return the end of the file.
|
||||
|
||||
Next, the `print_location` method prints three sections
|
||||
of the source file. These are the text "before" the error,
|
||||
the error itself, and, finally, the text "after" the error.
|
||||
For example, if an error began on the fifth column of the third
|
||||
line, and ended on the eighth column of the fourth line, the
|
||||
"before" section would include the first four columns of the third
|
||||
line, and the "after" section would be the ninth column onward
|
||||
on the fourth line. Before and after the error itself,
|
||||
if the `highlight` argument is true,
|
||||
we sprinkle the ANSI escape codes to enable and disable
|
||||
special formatting, respectively. For now, the special
|
||||
formatting involves underlining the text and making it red.
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 32 46 >}}
|
||||
|
||||
Finally, to get the forward declarations for the `yy*` functions
|
||||
and types, we set the `header-file` option in Flex:
|
||||
|
||||
{{< codelines "C++" "compiler/13/scanner.l" 3 3 >}}
|
||||
|
||||
We also include this `scanner.hpp` file in our `parse_driver.cpp`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 2 2 >}}
|
||||
|
||||
#### Adding the Driver to Flex and Bison
|
||||
Bison's C++ language template generates a class called
|
||||
`yy::parser`. We don't really want to modify this class
|
||||
in any way: not only is it generated code, but it's
|
||||
also rather complex. Instead, Bison provides us
|
||||
with a mechanism to pass more data in to the parser.
|
||||
This data is made available to all the actions
|
||||
that the parser runs. Better yet, Bison also attempts
|
||||
to pass this data on to the tokenizer, which in our
|
||||
case would mean that whatever data we provide Bison
|
||||
will also be available to Flex. This is how we'll
|
||||
allow the two components to access our new `parse_driver`
|
||||
class. This is also how we'll pass in the `yyscan_t`
|
||||
that Flex now needs to run its tokenizing code. To
|
||||
do all this, we use Bison's `%param` option. I'm
|
||||
going to include a few more lines from `parser.y`,
|
||||
since they contain the necessary `#include` directives
|
||||
and a required type definition:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parser.y" 1 18 >}}
|
||||
|
||||
The `%param` option effectively adds the parameter listed
|
||||
between the curly braces to the constructor of the generated
|
||||
`yy::parser`. We've already seen this in the implementation
|
||||
of our driver, where we passed `scanner` and `*this` as
|
||||
arguments when creating the parser. The parameters we declare are also passed to the
|
||||
`yylex` function, which is expected to accept them in the same order.
|
||||
|
||||
Since we're adding `parse_driver` as an argument we have to
|
||||
declare it. However, we can't include the `parse_driver` header
|
||||
right away because `parse_driver` itself includes the `parser` header:
|
||||
we'd end up with a circular dependency. Instead, we resort to
|
||||
forward-declaring the driver class, as well as the `yyscan_t`
|
||||
structure containing Flex's state.
|
||||
|
||||
Adding a parameter to Bison doesn't automatically affect
|
||||
Flex. To let Flex know that its `yylex` function must now accept
|
||||
the state and the parsing driver, we have to define the
|
||||
`YY_DECL` macro. We do this in `parse_driver.hpp`, since
|
||||
this forward declaration will be used by both Flex
|
||||
and Bison:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.hpp" 56 58 >}}
|
||||
|
||||
#### Improving Exceptions
|
||||
Now, it's time to add location data (and a little bit more) to our
|
||||
exceptions. We want to make it possible for exceptions to include
|
||||
data about where the error occurred, and to print this data to the user.
|
||||
However, it's also possible for us to have exceptions that simply
|
||||
do not have that location data. Furthermore, we want to know
|
||||
whether or not an exception has an associated location; we'd
|
||||
rather not print an invalid or "default" location when an error
|
||||
occurs.
|
||||
|
||||
In the old days of programming, we could represent the absence
|
||||
of location data with a `nullptr`, or `NULL`. But not only
|
||||
does this approach expose us to all kind of `NULl`-safety
|
||||
bugs, but it also requires heap allocation! This doesn't
|
||||
make it sound all that appealing; instead, I think we should
|
||||
opt for using `std::optional`.
|
||||
|
||||
Though `std::optional` is standard (as may be obvious from its
|
||||
namespace), it's a rather recent addition to the C++ STL.
|
||||
In order to gain access to it, we need to ensure that our
|
||||
project is compiled using C++17. To this end, we add
|
||||
the following two lines to our CMakeLists.txt:
|
||||
|
||||
{{< codelines "CMake" "compiler/13/CMakeLists.txt" 5 6 >}}
|
||||
|
||||
Now, let's add a new base class for all of our compiler errors,
|
||||
unsurprisingly called `compiler_error`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/error.hpp" 10 26 >}}
|
||||
|
||||
We'll put some 'common' exception functionality
|
||||
into the `print_location` and `print_about` methods. If the error
|
||||
has an associated location, the former method will print that
|
||||
location to the screen. We don't always want to highlight
|
||||
the part of the code that caused the error: for instance,
|
||||
an invalid data type definition may span several lines,
|
||||
and coloring that whole section of text red would be
|
||||
too much. To address this, we add the `highlight`
|
||||
boolean argument, which can be used to switch the
|
||||
colors on and off. The `print_about` method
|
||||
will simply print the `what()` message of the exception,
|
||||
in addition to the "specific" error that occurred (stored
|
||||
in `description`). Here are the implementations of the
|
||||
functions:
|
||||
|
||||
{{< codelines "C++" "compiler/13/error.cpp" 3 16 >}}
|
||||
|
||||
We will also add a `pretty_print` method to all of
|
||||
our exceptions. This method will handle
|
||||
all the exception-specific printing logic.
|
||||
For the generic compiler error, this means
|
||||
simply printing out the error text and the location:
|
||||
|
||||
{{< codelines "C++" "compiler/13/error.cpp" 18 21 >}}
|
||||
|
||||
For `type_error`, this logic slightly changes,
|
||||
enabling colors when printing the location:
|
||||
|
||||
{{< codelines "C++" "compiler/13/error.cpp" 27 30 >}}
|
||||
|
||||
Finally, for `unification_error`, we also include
|
||||
the code to print out the two types that our
|
||||
compiler could not unify:
|
||||
|
||||
{{< codelines "C++" "compiler/13/error.cpp" 32 41 >}}
|
||||
|
||||
There's a subtle change here. Compared to the previous
|
||||
type-printing code (which we had in `main`), what
|
||||
we wrote here deals with "expected" and "actual" types.
|
||||
The `left` type passed to the exception is printed
|
||||
first, and is treat like the "correct" type. The
|
||||
`right` type, on the other hand, is treated
|
||||
like the "wrong" type that should have been
|
||||
unifiable with `left`. This will affect the
|
||||
calling conventions of our unification code.
|
||||
|
||||
Now, we can go through and find all the places where
|
||||
we `throw 0`. One such place was in the data type
|
||||
definition code, where declaring the same type parameter
|
||||
twice is invalid. We replace the `0` with a
|
||||
`compiler_error`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/definition.cpp" 66 69 >}}
|
||||
|
||||
Not all `throw 0` statements should become exceptions.
|
||||
For example, here's code from the previous version of
|
||||
the compiler:
|
||||
|
||||
{{< codelines "C++" "compiler/12/definition.cpp" 123 127 >}}
|
||||
|
||||
If a definition `def_defn` has a dependency on a "nearby" (declared
|
||||
in the same group) definition called `dependency`, and if
|
||||
`dependency` does not exist within the same definition group,
|
||||
we throw an exception. But this error is impossible
|
||||
for a user to trigger: the only reason for a variable to appear
|
||||
in the `nearby_variables` vector is that it was previously
|
||||
found in the definition group. Here's the code that proves this
|
||||
(from the current version of the compiler):
|
||||
|
||||
{{< codelines "C++" "compiler/13/definition.cpp" 102 106 >}}
|
||||
|
||||
Not being able to find the variable in the definition group
|
||||
is a compiler bug, and should never occur. So, instead
|
||||
of throwing an exception, we'll use an assertion:
|
||||
|
||||
{{< codelines "C++" "compiler/13/definition.cpp" 128 128 >}}
|
||||
|
||||
For more complicated error messages, we can use a `stringstream`.
|
||||
Here's an example from `parsed_type`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parsed_type.cpp" 16 23 >}}
|
||||
|
||||
In general, this change is also rather mechanical. Before we
|
||||
move on, to maintain a balance between exceptions and assertions, here
|
||||
are a couple more assertions from `type_env`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/type_env.cpp" 81 82 >}}
|
||||
|
||||
Once again, it should not be possible for the compiler
|
||||
to try generalize the type of a variable that doesn't
|
||||
exist, and nor should generalization occur twice.
|
||||
|
||||
While we're on the topic of types, let's talk about
|
||||
`type_mgr::unify`. In practice, I suspect that a lot of
|
||||
errors in our compiler will originate from this method.
|
||||
However, at present, this method does not in any way
|
||||
track the locations of where a unification error occurred.
|
||||
To fix this, we add a new `loc` parameter to `unify`,
|
||||
which we make optional to allow for unification without
|
||||
a known location. Here's the declaration:
|
||||
|
||||
{{< codelines "C++" "compiler/13/type.hpp" 92 92 >}}
|
||||
|
||||
The change to the implementation is mechanical and repetitive,
|
||||
so instead of showing you the whole method, I'll settle for
|
||||
a couple of lines:
|
||||
|
||||
{{< codelines "C++" "compiler/13/type.cpp" 121 122 >}}
|
||||
|
||||
We want to make sure that a location provided to the
|
||||
top-level call to `unify` is also forwarded to the
|
||||
recursive calls, so we have to explicitly add it
|
||||
to the call.
|
||||
|
||||
We'll also have to update the 'main' code to call the
|
||||
`pretty_print` methods, but there's another big change
|
||||
that we're going to make before then. However, once that
|
||||
change is made, our errors will look a lot better.
|
||||
Here is what's printed out to the user when a type error
|
||||
occurs:
|
||||
|
||||
```
|
||||
an error occured while checking the types of the program: failed to unify types
|
||||
occuring on line 2:
|
||||
3 + False
|
||||
the expected type was:
|
||||
Int
|
||||
while the actual type was:
|
||||
Bool
|
||||
```
|
||||
|
||||
Here's an error that was previously a `throw 0` statement in our code:
|
||||
|
||||
```
|
||||
an error occured while compiling the program: type variable a used twice in data type definition.
|
||||
occuring on line 1:
|
||||
data Pair a a = { MkPair a a }
|
||||
```
|
||||
|
||||
Now, not only have we eliminated the lazy uses of `throw 0` in our
|
||||
code, but we've also improved the presentation of the errors
|
||||
to the user!
|
||||
|
||||
### Rethinking Name Mangling
|
||||
In the previous post, I said the following:
|
||||
|
||||
> One more thing. Let’s adopt the convention of storing mangled names into the compilation environment. This way, rather than looking up mangled names only for global functions, which would be a ‘gotcha’ for anyone working on the compiler, we will always use the mangled names during compilation.
|
||||
|
||||
Now that I've had some more time to think about it
|
||||
(and now that I've returned to the compiler after
|
||||
a brief hiatus), I think that this was not the right call.
|
||||
Mangled names make sense when translating to LLVM; we certainly
|
||||
don't want to declare two LLVM functions
|
||||
{{< sidenote "right" "mangling-note" "with the same name." >}}
|
||||
By the way, LLVM has its own name mangling functionality. If you
|
||||
declare two functions with the same name, they'll appear as
|
||||
<code>function</code> and <code>function.0</code>. Since LLVM
|
||||
uses the <code>Function*</code> C++ values to refer to functions,
|
||||
as long as we keep them seaprate on <em>our</em> end, things will
|
||||
work.<br>
|
||||
<br>
|
||||
However, in our compiler, name mangling occurs before LLVM is
|
||||
introduced, at translation time. We could create LLVM functions
|
||||
at that time, too, and associate them with variables. But then,
|
||||
our G-machine instructions will be coupled to LLVM, which
|
||||
would not be as clean.
|
||||
{{< /sidenote >}}
|
||||
But things are different for local variables. Our local variables
|
||||
are graphs on a stack, and are not actually compiled to LLVM
|
||||
definitions. It doesn't make sense to mangle their names, since
|
||||
their names aren't present anywhere in the final executable.
|
||||
It's not even "consistent" to mangle them, since global definitions
|
||||
are compiled directly to __PushGlobal__ instructions, while local
|
||||
variables are only referenced through the current `env`.
|
||||
So, I opted to reverse my decision. We will go back to
|
||||
placing variable names directly into `env_var`. Here's
|
||||
an example of this from `global_scope.cpp`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/global_scope.cpp" 6 8 >}}
|
||||
|
||||
Now that we've started using assertions, I also think it's worth
|
||||
to put our new invariant -- "only global definitions have mangled
|
||||
names" -- into code:
|
||||
|
||||
{{< codelines "C++" "compiler/13/type_env.cpp" 36 45 >}}
|
||||
|
||||
Furthermore, we'll _require_ that a global definition
|
||||
has a mangled name. This way, we can be more confident
|
||||
that a variable from a __PushGlobal__ instruction
|
||||
is referencing the right function. To achieve
|
||||
this, we change `get_mangled_name` to stop
|
||||
returning the input string if a mangled name was not
|
||||
found; doing so makes it impossible to check if a mangled
|
||||
name was explicitly defined. Instead,
|
||||
we add two assertions. First, if an environment scope doesn't
|
||||
contain a variable, then it _must_ have a parent.
|
||||
If it does contain variable, that variable _must_ have
|
||||
a mangled name. We end up with the following:
|
||||
|
||||
{{< codelines "C++" "compiler/13/type_env.cpp" 47 55 >}}
|
||||
|
||||
For this to work, we make one more change. Now that we've
|
||||
enabled C++17, we have access to `std::optional`. We
|
||||
can thus represent the presence or absence of mangled
|
||||
names using an optional field, rather than with the empty string `""`.
|
||||
I hear that C++ compilers have pretty good
|
||||
[empty string optimizations](https://www.youtube.com/watch?v=kPR8h4-qZdk),
|
||||
but nonetheless, I think it makes more sense semantically
|
||||
to use "absent" (`nullopt`) instead of "empty" (`""`).
|
||||
Here's the definition of `type_env::variable_data` now:
|
||||
|
||||
{{< codelines "C++" "compiler/13/type_env.hpp" 16 25 >}}
|
||||
|
||||
Since looking up a mangled name for non-global variable
|
||||
{{< sidenote "right" "unrepresentable-note" "will now result in an assertion failure," >}}
|
||||
A very wise human at the very dawn of our species once said,
|
||||
"make illegal states unrepresentable". Their friends and family were a little
|
||||
busy making a fire, and didn't really understand what the heck they meant. Now,
|
||||
we kind of do.<br>
|
||||
<br>
|
||||
It's <em>possible</em> for our <code>type_env</code> to include a
|
||||
<code>variable_data</code> entry that is both global and has no mangled
|
||||
name. But it doesn't have to be this way. We could define two subclasses
|
||||
of <code>variable_data</code>, one global and one local,
|
||||
where only the global one has a <code>mangled_name</code>
|
||||
field. It would be impossible to reach this assertion failure then.
|
||||
{{< /sidenote >}} we have to change
|
||||
`ast_lid::compile` to only call `get_mangled_name` once
|
||||
it ensures that the variable being compiled is, in fact,
|
||||
global:
|
||||
|
||||
{{< codelines "C++" "compiler/13/ast.cpp" 58 63 >}}
|
||||
|
||||
Since all global functions now need to have mangled
|
||||
names, we run into a bit of a problem. What are
|
||||
the mangled names of `(+)`, `(-)`, and so on? We could
|
||||
continue to hardcode them as `plus`, `minus`, etc., but this can
|
||||
(and currently does!) lead to errors. Consider the following
|
||||
piece of code:
|
||||
|
||||
```
|
||||
defn plus x y = { x + y }
|
||||
defn main = { plus 320 6 }
|
||||
```
|
||||
|
||||
We've hardcoded the mangled name of `(+)` to be `plus`. However,
|
||||
`global_scope` doesn't know about this, so when the actual
|
||||
`plus` function gets translated, it also gets assigned the
|
||||
mangled name `plus`. The name is also overwritten in the
|
||||
`llvm_context`, which effectively means that `(+)` is
|
||||
now compiled to a call of the user-defined `plus` function.
|
||||
If we didn't overwrite the name, we would've run into an assertion
|
||||
failure in this scenario anyway. In short, this example illustrates
|
||||
an important point: mangling information needs to be available
|
||||
outside of a `global_scope`. We don't want to do this by having
|
||||
every function take in a `global_scope` to access the mangling
|
||||
information; instead, we'll store the mangling information in
|
||||
a new `mangler` class, which `global_scope` will take as an argument.
|
||||
The new class is very simple:
|
||||
|
||||
{{< codelines "C++" "compiler/13/mangler.hpp" 5 11 >}}
|
||||
|
||||
As with `parse_driver`, `global_scope` takes `mangler` by reference
|
||||
and stores a pointer:
|
||||
|
||||
{{< codelines "C++" "compiler/13/global_scope.hpp" 50 50 >}}
|
||||
|
||||
The implementation of `new_mangled_name` doesn't change, so I'm
|
||||
not going to show it here. With this new mangling information
|
||||
in hand, we can now correctly set the mangled names of binary
|
||||
operators:
|
||||
|
||||
{{< codelines "C++" "compiler/13/compiler.cpp" 22 27 >}}
|
||||
|
||||
Wait a moment, what's a `compiler`? Let's talk about that next.
|
||||
|
||||
### A Top-Level Class
|
||||
Now that we've moved name mangling out of `global_scope`, we have
|
||||
to put it somewhere. The same goes for global definition group
|
||||
and the file manager that are given to `parse_driver`. The two
|
||||
classes _make use_ of the other data, but they don't _own it_.
|
||||
That's why they take it by reference, and store it as a pointer.
|
||||
They're just temporarily allowed access.
|
||||
|
||||
So, what should be the owner of all of these disparate components?
|
||||
Thus far, that has been the `main` function, or the utility
|
||||
functions that it calls out to. However, this is sloppy:
|
||||
we have related data and operations on it, but we don't group
|
||||
them into an object. We can group all of the components of our
|
||||
compiler into a `compiler` object, and leave `main.cpp` with
|
||||
exception printing code.
|
||||
|
||||
The definition of the `compiler` class begins with all of the data
|
||||
structures that we use in the process of compilation:
|
||||
|
||||
{{< codelines "C++" "compiler/13/compiler.hpp" 12 20 >}}
|
||||
|
||||
There's a loose ordering to these fields. In C++, class members are
|
||||
initialized in the order they are declared; we therefore want to make
|
||||
sure that fields that are depended on by other fields are initialized first.
|
||||
Otherwise, I tried to keep the order consistent with the conceptual path
|
||||
of the code through the compiler.
|
||||
* Parsing happens first, so we begin with `parse_driver`, which needs a
|
||||
`file_manager` (to populate with line information) and a `definition_group`
|
||||
(to receive the global definitions from the parser).
|
||||
* We then proceed to typechecking, for which we use a global `type_env_ptr`
|
||||
(to define the built-in functions and constructors) and a `type_mgr` (to
|
||||
manage the assignments of type variables).
|
||||
* Once a program is typechecked, we transform it, eliminating local
|
||||
function definitions and lambda functions. This is done by storing
|
||||
newly-emitted global functions into the `global_scope`, which requires a
|
||||
`mangler` to generate new names for the target functions.
|
||||
* Finally, to generate LLVM IR, we need our `llvm_context` class.
|
||||
|
||||
The methods of the compiler are arranged similarly:
|
||||
|
||||
{{< codelines "C++" "compiler/13/compiler.hpp" 22 31 >}}
|
||||
|
||||
The methods go as follows:
|
||||
|
||||
* `add_default_types` adds the built-in types to the `global_env`.
|
||||
At this point, these types only include `Int`.
|
||||
* `add_binop_type` adds a single binary operator to the global
|
||||
type environment. We saw its implementation earlier: it deals
|
||||
with both binding a type, and setting a mangled name.
|
||||
* `add_default_types` adds the types for each binary operator.
|
||||
* `parse`, `typecheck`, `translate` and `compile` all do exactly
|
||||
what they say. In this case, compilation refers to creating G-machine
|
||||
instructions.
|
||||
* `create_llvm_binop` creates an internal function that forces the
|
||||
evaluation of its two arguments, and actually applies the given binary
|
||||
operator. Recall that the `(+)` in user code constructs a call to this
|
||||
function, but leaves it unevaluated until it's needed.
|
||||
* `generate_llvm` converts all the definitions in `global_scope`, which
|
||||
are at this point compiled into G-machine `instruction`s, into LLVM IR.
|
||||
* `output_llvm` contains all the code to actually generate an object
|
||||
file from the LLVM IR.
|
||||
|
||||
These functions are mostly taken from part 12's `main.cpp`, and adjusted
|
||||
to use the `compiler`'s members rather than local definitions or arguments.
|
||||
You should compare part 12's
|
||||
[`main.cpp`](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/12/main.cpp)
|
||||
file with the
|
||||
[`compiler.cpp`](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/13/compiler.cpp)
|
||||
file that we end up with at the end of this post.
|
||||
|
||||
Next, we have the compiler's constructor, and its `operator()`. The
|
||||
latter, analogously to our parsing driver, will trigger the compilation
|
||||
process. Their implementations are straightforward:
|
||||
|
||||
{{< codelines "C++" "compiler/13/compiler.cpp" 131 145 >}}
|
||||
|
||||
We also add a couple of methods to give external code access to
|
||||
some of the compiler's data structures. I omit their (trivial)
|
||||
implementations, but they have the following signatures:
|
||||
|
||||
{{< codelines "C++" "compiler/13/compiler.hpp" 35 36 >}}
|
||||
|
||||
With all the compilation code tucked into our new `compiler` class,
|
||||
`main` becomes very simple. We also finally get to use our exception
|
||||
pretty printing code:
|
||||
|
||||
{{< codelines "C++" "compiler/13/main.cpp" 11 27 >}}
|
||||
|
||||
With this, we complete our transition to a compiler object.
|
||||
All that's left is to clean up the code style.
|
||||
|
||||
### Keeping Things Private
|
||||
Hand-writing or generating hundreds of trivial getters and setters
|
||||
for the fields of a data class (which is standard in the world of Java) seems
|
||||
absurd to me. So, for most of this project, I stuck with
|
||||
`struct`s, rather than classes. But this is not a good policy
|
||||
to apply _everywhere_. I still think it makes sense to make
|
||||
data structures like `ast` and `type` public-by-default;
|
||||
however, I _don't_ think that way about classes like `type_mgr`,
|
||||
`llvm_context`, `type_env`, and `env`. All of these have information
|
||||
that we should never be accessing directly. Some guard this
|
||||
information with assertions. In short, it should be protected.
|
||||
|
||||
For most classes, the changes are mechanical. For instance, we
|
||||
can make `type_env` a class simply by changing its declaration,
|
||||
and marking all of its functions public. This requires a slight
|
||||
refactoring of a line that used its `parent` field. Here's
|
||||
what it used to be (in context):
|
||||
|
||||
{{< codelines "C++" "compiler/12/main.cpp" 57 60 >}}
|
||||
|
||||
And here's what it is now:
|
||||
|
||||
{{< codelines "C++" "compiler/13/compiler.cpp" 55 58 >}}
|
||||
|
||||
Rather than traversing the chain of environments from
|
||||
the body of the definition, we just use the definition's
|
||||
own `env_ptr`. This is cleaner and more explicit, and
|
||||
it helps us not use the private members of `type_env`!
|
||||
|
||||
The deal with `env` is about as simple. We just make
|
||||
it and its two descendants classes, and mark their
|
||||
methods and constructors public. The same
|
||||
goes for `global_scope`. To make `type_mgr`
|
||||
a class, we have to add a new method: `lookup`.
|
||||
Here's its implementation:
|
||||
|
||||
{{< codelines "C++" "compiler/13/type.cpp" 81 85 >}}
|
||||
|
||||
It's used in `type_var::print` as follows:
|
||||
|
||||
{{< codelines "C++" "compiler/13/type.cpp" 28 35 >}}
|
||||
|
||||
We can't use `resolve` here because it takes (and returns)
|
||||
a `type_ptr`. If we make it _take_ a `type*`, it won't
|
||||
be able to return its argument if it's already resolved. If we
|
||||
allow it to _return_ `type*`, we won't have an owning
|
||||
reference. We also don't want to duplicate the
|
||||
method just for this one call. Notice, though, how similar
|
||||
`type_var::print`/`lookup` and `resolve` are in terms of execution.
|
||||
|
||||
The change for `llvm_context` requires a little more work.
|
||||
Right now, `ctx.builder` is used a _lot_ in `instruction.cpp`.
|
||||
Since we don't want to forward each of the LLVM builder methods,
|
||||
and since it feels weird to make `llvm_context` extend `llvm::IRBuilder`,
|
||||
we'll just provide a getter for the `builder` field. The
|
||||
same goes for `module`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/llvm_context.hpp" 46 47 >}}
|
||||
|
||||
Here's what some of the code from `instruction.cpp` looks like now:
|
||||
|
||||
{{< codelines "C++" "compiler/13/instruction.cpp" 144 145 >}}
|
||||
|
||||
Right now, the `ctx` field of the `llvm_context` (which contains
|
||||
the `llvm::LLVMContext`) is only externally used to create
|
||||
instances of `llvm::BasicBlock`. We'll add a proxy method
|
||||
for this functionality:
|
||||
|
||||
{{< codelines "C++" "compiler/13/llvm_context.cpp" 174 176 >}}
|
||||
|
||||
Finally, `instruction_pushglobal` needs to access the
|
||||
`llvm::Function` instances that we create in the process
|
||||
of compilation. We add a new `get_custom_function` method
|
||||
to support this, which automatically prefixes the function
|
||||
name with `f_`, much like `create_custom_function`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/llvm_context.cpp" 292 294 >}}
|
||||
|
||||
I think that's enough. If we chose to turn more compiler
|
||||
data structures into classes, I think we would've quickly drowned
|
||||
in one-line getter and setter methods.
|
||||
|
||||
That's all for the cleanup! We've added locations and more errors
|
||||
to the compiler, stopped throwing `0` in favor of proper exceptions
|
||||
or assertions, made name mangling more reasonable, fixed a bug with
|
||||
accidentally shadowing default functions, organized our compilation
|
||||
process into a `compiler` class, and made more things into classes.
|
||||
In the next post, I hope to tackle __strings__ and __Input/Output__.
|
||||
I also think that implementing __modules__ would be a good idea,
|
||||
though at the moment I don't know too much on the subject. I hope
|
||||
you'll join me in my future writing!
|
||||
|
||||
### Appendix: Optimization
|
||||
When I started working on the compiler after the previous post,
|
||||
I went a little overboard. I started working on optimizing the generated programs,
|
||||
but eventually decided I wasn't doing a
|
||||
{{< sidenote "right" "good-note" "good enough" >}}
|
||||
I think authors should feel a certain degree of responsibility
|
||||
for the content they create. If I do something badly, somebody
|
||||
else trusts me and learns from it, who knows how much damage I've done.
|
||||
I try not to do damage.<br>
|
||||
<br>
|
||||
If anyone reads what I write, anyway!
|
||||
{{< /sidenote >}} job to present it to others,
|
||||
and scrapped that part of the compiler altogether. I'm not
|
||||
sure if I will try again in the near future. But,
|
||||
if you're curious about optimization, here are a few avenues
|
||||
I've explored or thought about:
|
||||
|
||||
* __Unboxing numbers__. Right now, numbers are allocated and garbage
|
||||
collected just like the rest of the graph nodes. This is far from ideal.
|
||||
We could use pointers to represent numbers, by tagging their most significant
|
||||
bits on 64-bit CPUs. Rather than allocating a node, the runtime will just
|
||||
cast a number to a pointer, tag it, and push it on the stack.
|
||||
* __Converting enumeration data types to numbers__. If no constructor
|
||||
of a data type takes any arguments, then the tag uniquely identifies
|
||||
each constructor. Combined with unboxed numbers, this can save unnecessary
|
||||
allocations and memory accesses.
|
||||
* __Special treatment for global constants__. It makes sense for
|
||||
global functions to be converted into LLVM functions, but the
|
||||
same is not the case for
|
||||
{{< sidenote "right" "constant-note" "constants." >}}
|
||||
Yeah, yeah, a constant is just a nullary function. Get
|
||||
out of here with your pedantry!
|
||||
{{< /sidenote >}} We can find a way to
|
||||
initialize global constants once, which would save some work. To
|
||||
make more constants suitable for this, we could employ
|
||||
[monomorphism restriction](https://wiki.haskell.org/Monomorphism_restriction).
|
||||
* __Optimizing stack operations.__ If you read through the LLVM IR
|
||||
we produce, you can see a lot of code that peeks at something twice,
|
||||
or pops-then-pushes the same value, or does other absurd things. LLVM
|
||||
isn't aware of the semantics of our stacks, but perhaps we could write an
|
||||
optimization pass to deal with some of the more blatant instances of
|
||||
this issue.
|
||||
|
||||
If you attempt any of these, let me know how it goes, please!
|
||||
476
content/blog/typesafe_imperative_lang.md
Normal file
476
content/blog/typesafe_imperative_lang.md
Normal file
@@ -0,0 +1,476 @@
|
||||
---
|
||||
title: "A Typesafe Representation of an Imperative Language"
|
||||
date: 2020-11-02T01:07:21-08:00
|
||||
tags: ["Idris"]
|
||||
---
|
||||
|
||||
A recent homework assignment for my university's programming languages
|
||||
course was to encode the abstract syntax for a small imperative language
|
||||
into Haskell data types. The language consisted of very few constructs, and was very much a "toy".
|
||||
On the expression side of things, it had three registers (`A`, `B`, and `R`),
|
||||
numbers, addition, comparison using "less than", and logical negation. It also
|
||||
included a statement for storing the result of an expression into
|
||||
a register, `if/else`, and an infinite loop construct with an associated `break` operation.
|
||||
A sample program in the language which computes the product of two
|
||||
numbers is as follows:
|
||||
|
||||
```
|
||||
A := 7
|
||||
B := 9
|
||||
R := 0
|
||||
do
|
||||
if A <= 0 then
|
||||
break
|
||||
else
|
||||
R := R + B;
|
||||
A := A + -1;
|
||||
end
|
||||
end
|
||||
```
|
||||
|
||||
The homework notes that type errors may arise in the little imperative language.
|
||||
We could, for instance, try to add a boolean to a number: `3 + (1 < 2)`. Alternatively,
|
||||
we could try use a number in the condition of an `if/else` expression. A "naive"
|
||||
encoding of the abstract syntax would allow for such errors.
|
||||
|
||||
However, assuming that registers could only store integers and not booleans, it is fairly easy to
|
||||
separate the expression grammar into two nonterminals, yielding boolean
|
||||
and integer expressions respectively. Since registers can only store integers,
|
||||
the `(:=)` operation will always require an integer expression, and an `if/else`
|
||||
statement will always require a boolean expression. A matching Haskell encoding
|
||||
would not allow "invalid" programs to compile. That is, the programs would be
|
||||
type-correct by construction.
|
||||
|
||||
Then, a question arose in the ensuing discussion: what if registers _could_
|
||||
contain booleans? It would be impossible to create such a "correct-by-construction"
|
||||
representation then, wouldn't it?
|
||||
{{< sidenote "right" "haskell-note" "Although I don't know about Haskell," >}}
|
||||
I am pretty certain that a similar encoding in Haskell is possible. However,
|
||||
Haskell wasn't originally created for that kind of abuse of its type system,
|
||||
so it would probably not look very good.
|
||||
{{< /sidenote >}} I am sure that it _is_ possible to do this
|
||||
in Idris, a dependently typed programming language. In this post I will
|
||||
talk about how to do that.
|
||||
|
||||
### Registers and Expressions
|
||||
Let's start by encoding registers. Since we only have three registers, we
|
||||
can encode them using a simple data type declaration, much the same as we
|
||||
would in Haskell:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 1 1 >}}
|
||||
|
||||
Now that registers can store either integers or booleans (and only those two),
|
||||
we need to know which one is which. For this purpose, we can declare another
|
||||
data type:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 3 3 >}}
|
||||
|
||||
At any point in the (hypothetical) execution of our program, each
|
||||
of the registers will have a type, either boolean or integer. The
|
||||
combined state of the three registers would then be the combination
|
||||
of these three states; we can represent this using a 3-tuple:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 5 6 >}}
|
||||
|
||||
Let's say that the first element of the tuple will be the type of the register
|
||||
`A`, the second the type of `B`, and the third the type of `R`. Then,
|
||||
we can define two helper functions, one for retrieving the type of
|
||||
a register, and one for changing it:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 8 16 >}}
|
||||
|
||||
Now, it's time to talk about expressions. We know now that an expression
|
||||
can evaluate to either a boolean or an integer value (because a register
|
||||
can contain either of those types of values). Perhaps we can specify
|
||||
the type that an expression evaluates to in the expression's own type:
|
||||
`Expr IntTy` would evaluate to integers, and `Expr BoolTy` would evaluate
|
||||
to booleans. Then, we could have constructors as follows:
|
||||
|
||||
```Idris
|
||||
Lit : Int -> Expr IntTy
|
||||
Not : Expr BoolTy -> Expr BoolTy
|
||||
```
|
||||
|
||||
Sounds good! But what about loading a register?
|
||||
|
||||
```Idris
|
||||
Load : Reg -> Expr IntTy -- no; what if the register is a boolean?
|
||||
Load : Reg -> Expr BoolTy -- no; what if the register is an integer?
|
||||
Load : Reg -> Expr a -- no; a register access can't be either!
|
||||
```
|
||||
|
||||
The type of an expression that loads a register depends on the current
|
||||
state of the program! If we last stored an integer into a register,
|
||||
then loading from that register would give us an integer. But if we
|
||||
last stored a boolean into a register, then reading from it would
|
||||
give us a boolean. Our expressions need to be aware of the current
|
||||
types of each register. To do so, we add the state as a parameter to
|
||||
our `Expr` type. This would lead to types like the following:
|
||||
|
||||
```Idris
|
||||
-- An expression that produces a boolean when all the registers
|
||||
-- are integers.
|
||||
Expr (IntTy, IntTy, IntTy) BoolTy
|
||||
|
||||
-- An expression that produces an integer when A and B are integers,
|
||||
-- and R is a boolean.
|
||||
Expr (IntTy, IntTy, BoolTy) IntTy
|
||||
```
|
||||
|
||||
In Idris, the whole definition becomes:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 18 23 >}}
|
||||
|
||||
The only "interesting" constructor is `Load`, which, given a register `r`,
|
||||
creates an expression that has `r`'s type in the current state `s`.
|
||||
|
||||
### Statements
|
||||
Statements are a bit different. Unlike expressions, they don't evaluate to
|
||||
anything; rather, they do something. That "something" may very well be changing
|
||||
the current state. We could, for instance, set `A` to be a boolean, while it was
|
||||
previously an integer. This suggests equipping our `Stmt` type with two
|
||||
arguments: the initial state (before the statement's execution), and the final
|
||||
state (after the statement's execution). This would lead to types like this:
|
||||
|
||||
```Idris
|
||||
-- Statement that, when run while all registers contain integers,
|
||||
-- terminates with registers B and R having been assigned boolean values.
|
||||
Stmt (IntTy, IntTy, IntTy) (IntTy, BoolTy, BoolTy)
|
||||
```
|
||||
|
||||
However, there's a problem with `loop` and `break`. When we run a loop,
|
||||
we will require that the state at the end of one iteration is the
|
||||
same as the state at its beginning. Otherwise, it would be possible
|
||||
for a loop to keep changing the types of registers every iteration,
|
||||
and it would become impossible for us to infer the final state
|
||||
without actually running the program. In itself, this restriction
|
||||
isn't a problem; most static type systems require both branches
|
||||
of an `if/else` expression to be of the same type for a similar
|
||||
reason. The problem comes from the interaction with `break`.
|
||||
|
||||
By itself, the would-be type of `break` seems innocent enough. It
|
||||
doesn't change any registers, so we could call it `Stmt s s`.
|
||||
But consider the following program:
|
||||
|
||||
```
|
||||
A := 0
|
||||
B := 0
|
||||
R := 0
|
||||
do
|
||||
if 5 <= A then
|
||||
B := 1 <= 1
|
||||
break
|
||||
B := 0
|
||||
else
|
||||
A := A + 1
|
||||
end
|
||||
end
|
||||
```
|
||||
|
||||
The loop starts with all registers having integer values.
|
||||
As per our aforementioned loop requirement, the body
|
||||
of the loop must terminate with all registers _still_ having
|
||||
integer values. For the first five iterations that's exactly
|
||||
what will happen. However, after we increment `A` the fifth time,
|
||||
we will set `B` to a boolean value -- using a valid statement --
|
||||
and then `break`. The `break` statement will be accepted by
|
||||
the typechecker, and so will the whole `then` branch. After all,
|
||||
it seems as though we reset `B` back to an integer value.
|
||||
But that won't be the case. We will have jumped to the end
|
||||
of the loop, where we are expected to have an all-integer type,
|
||||
which we will not have.
|
||||
|
||||
The solution I came up with to address this issue was to
|
||||
add a _third_ argument to `Stmt`, which contains the "context"
|
||||
type. That is, it contains the type of the innermost loop surrounding
|
||||
the statement. A `break` statement would only be permissible
|
||||
if the current type matches the loop type. With this, we finally
|
||||
write down a definition of `Stmt`:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 26 30 >}}
|
||||
|
||||
The `Store` constructor takes a register `r` and an expression producing some type `t` in state `s`.
|
||||
From these, it creates a statement that starts in `s`, and finishes
|
||||
in a state similar to `s`, but with `r` now having type `t`. The loop
|
||||
type `l` remains unaffected and unused; we are free to assign any register
|
||||
any value.
|
||||
|
||||
The `If` constructor takes a condition `Expr`, which starts in state `s` and _must_ produce
|
||||
a boolean. It also takes two programs (sequences of statements), each of which
|
||||
starts in `s` and finishes in another state `n`. This results in
|
||||
a statement that starts in state `s`, and finishes in state `n`. Conceptually,
|
||||
each branch of the `if/else` statement must result in the same final state (in terms of types);
|
||||
otherwise, we wouldn't know which of the states to pick when deciding the final
|
||||
state of the `If` itself. As with `Store`, the loop type `l` is untouched here.
|
||||
Individual statements are free to modify the state however they wish.
|
||||
|
||||
The `Loop` constructor is very restrictive. It takes a single program (the sequence
|
||||
of instructions that it will be repeating). As we discussed above, this program
|
||||
must start _and_ end in the same state `s`. Furthermore, this program's loop
|
||||
type must also be `s`, since the loop we're constructing will be surrounding the
|
||||
program. The resulting loop itself still has an arbitrary loop type `l`, since
|
||||
it doesn't surround itself.
|
||||
|
||||
Finally, `Break` can only be constructed when the loop state matches the current
|
||||
state. Since we'll be jumping to the end of the innermost loop, the final state
|
||||
is also the same as the loop state.
|
||||
|
||||
These are all the constructors we'll be needing. It's time to move on to
|
||||
whole programs!
|
||||
|
||||
### Programs
|
||||
A program is simply a list of statements. However, we can't use a regular Idris list,
|
||||
because a regular list wouldn't be able to represent the relationship between
|
||||
each successive statement. In our program, we want the final state of one
|
||||
statement to be the initial state of the following one, since they'll
|
||||
be executed in sequence. To represent this, we have to define our own
|
||||
list-like GADT. The definition of the type turns out fairly straightforward:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 32 34 >}}
|
||||
|
||||
The `Nil` constructor represents an empty program (much like the built-in `Nil` represents an empty list).
|
||||
Since no actions are done, it creates a `Prog` that starts and ends in the same state: `s`.
|
||||
The `(::)` constructor, much like the built-in `(::)` constructor, takes a statement
|
||||
and another program. The statement begins in state `s` and ends in state `n`; the program after
|
||||
that statement must then start in state `n`, and end in some other state `m`.
|
||||
The combination of the statement and the program starts in state `s`, and finishes in state `m`.
|
||||
Thus, `(::)` yields `Prog s m`. None of the constructors affect the loop type `l`: we
|
||||
are free to sequence any statements that we want, and it is impossible for us
|
||||
to construct statements using `l` that cause runtime errors.
|
||||
|
||||
This should be all! Let's try out some programs.
|
||||
|
||||
### Trying it Out
|
||||
The following (type-correct) program compiles just fine:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 36 47 >}}
|
||||
|
||||
First, it loads a boolean into register `A`; then,
|
||||
inside the `if/else` statement, it stores an integer into `A`. Finally,
|
||||
it stores another integer into `B`, and adds them into `R`. Even though
|
||||
`A` was a boolean at first, the type checker can deduce that it
|
||||
was reset back to an integer after the `if/else`, and the program is accepted.
|
||||
On the other hand, had we forgotten to set `A` to a boolean first:
|
||||
|
||||
```Idris
|
||||
[ If (Load A)
|
||||
[ Store A (Lit 1) ]
|
||||
[ Store A (Lit 2) ]
|
||||
, Store B (Lit 2)
|
||||
, Store R (Add (Load A) (Load B))
|
||||
]
|
||||
```
|
||||
|
||||
We would get a type error:
|
||||
|
||||
```
|
||||
Type mismatch between getRegTy A (IntTy, IntTy, IntTy) and BoolTy
|
||||
```
|
||||
|
||||
The type of register `A` (that is, `IntTy`) is incompatible
|
||||
with `BoolTy`. Our `initialState` says that `A` starts out as
|
||||
an integer, so it can't be used in an `if/else` right away!
|
||||
Similar errors occur if we make one of the branches of
|
||||
the `if/else` empty, or if we set `B` to a boolean.
|
||||
|
||||
We can also encode the example program from the beginning
|
||||
of this post:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 49 61 >}}
|
||||
|
||||
This program compiles just fine, too! It is a little reminiscent of
|
||||
the program we used to demonstrate how `break` could break things
|
||||
if we weren't careful. So, let's go ahead and try `break` in an invalid
|
||||
state:
|
||||
|
||||
```Idris
|
||||
[ Store A (Lit 7)
|
||||
, Store B (Lit 9)
|
||||
, Store R (Lit 0)
|
||||
, Loop
|
||||
[ If (Load A `Leq` Lit 0)
|
||||
[ Store B (Lit 1 `Leq` Lit 1), Break, Store B (Lit 0) ]
|
||||
[ Store R (Load R `Add` Load B)
|
||||
, Store A (Load A `Add` Lit (-1))
|
||||
]
|
||||
]
|
||||
]
|
||||
```
|
||||
|
||||
Again, the type checker complains:
|
||||
|
||||
```
|
||||
Type mismatch between IntTy and BoolTy
|
||||
```
|
||||
|
||||
And so, we have an encoding of our language that allows registers to
|
||||
be either integers or booleans, while still preventing
|
||||
type-incorrect programs!
|
||||
|
||||
### Building an Interpreter
|
||||
A good test of such an encoding is the implementation
|
||||
of an interpreter. It should be possible to convince the
|
||||
typechecker that our interpreter doesn't need to
|
||||
handle type errors in the toy language, since they
|
||||
cannot occur.
|
||||
|
||||
Let's start with something simple. First of all, suppose
|
||||
we have an expression of type `Expr InTy`. In our toy
|
||||
language, it produces an integer. Our interpreter, then,
|
||||
will probably want to use Idris' type `Int`. Similarly,
|
||||
an expression of type `Expr BoolTy` will produce a boolean
|
||||
in our toy language, which in Idris we can represent using
|
||||
the built-in `Bool` type. Despite the similar naming, though,
|
||||
there's no connection between Idris' `Bool` and our own `BoolTy`.
|
||||
We need to define a conversion from our own types -- which are
|
||||
values of type `Ty` -- into Idris types that result from evaluating
|
||||
expressions. We do so as follows:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 63 65 >}}
|
||||
|
||||
Similarly, we want to convert our `TypeState` (a tuple describing the _types_
|
||||
of our registers) into a tuple that actually holds the values of each
|
||||
register, which we will call `State`. The value of each register at
|
||||
any point depends on its type. My first thought was to define
|
||||
`State` as a function from `TypeState` to an Idris `Type`:
|
||||
|
||||
```Idris
|
||||
State : TypeState -> Type
|
||||
State (a, b, c) = (repr a, repr b, repr c)
|
||||
```
|
||||
|
||||
Unfortunately, this doesn't quite cut it. The problem is that this
|
||||
function technically doesn't give Idris any guarantees that `State`
|
||||
will be a tuple. The most Idris knows is that `State` will be some
|
||||
`Type`, which could be `Int`, `Bool`, or anything else! This
|
||||
becomes a problem when we try to pattern match on states to get
|
||||
the contents of a particular register. Instead, we have to define
|
||||
a new data type:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 67 68 >}}
|
||||
|
||||
In this snippet, `State` is still a (type level) function from `TypeState` to `Type`.
|
||||
However, by using a GADT, we guarantee that there's only one way to construct
|
||||
a `State (a,b,c)`: using a corresponding tuple. Now, Idris will accept our
|
||||
pattern matching:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 70 78 >}}
|
||||
|
||||
The `getReg` function will take out the value of the corresponding
|
||||
register, returning `Int` or `Bool` depending on the `TypeState`.
|
||||
What's important is that if the `TypeState` is known, then so
|
||||
is the type of `getReg`: no `Either` is involved here, and we
|
||||
can directly use the integer or boolean stored in the
|
||||
register. This is exactly what we do:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 80 85 >}}
|
||||
|
||||
This is pretty concise. Idris knows that `Lit i` is of type `Expr IntTy`,
|
||||
and it knows that `repr IntTy = Int`, so it also knows that
|
||||
`eval (Lit i)` produces an `Int`. Similarly, we wrote
|
||||
`Reg r` to have type `Expr s (getRegTy r s)`. Since `getReg`
|
||||
returns `repr (getRegTy r s)`, things check out here, too.
|
||||
A similar logic applies to the rest of the cases.
|
||||
|
||||
The situation with statements is somewhat different. As we said, a statement
|
||||
doesn't return a value; it changes state. A good initial guess would
|
||||
be that to evaluate a statement that starts in state `s` and terminates in state `n`,
|
||||
we would take as input `State s` and return `State n`. However, things are not
|
||||
quite as simple, thanks to `Break`. Not only does `Break` require
|
||||
special case logic to return control to the end of the `Loop`, but
|
||||
it also requires some additional consideration: in a statement
|
||||
of type `Stmt l s n`, evaluating `Break` can return `State l`.
|
||||
|
||||
To implement this, we'll use the `Either` type. The `Left` constructor
|
||||
will be contain the state at the time of evaluating a `Break`,
|
||||
and will indicate to the interpreter that we're breaking out of a loop.
|
||||
On the other hand, the `Right` constructor will contain the state
|
||||
as produced by all other statements, which would be considered
|
||||
{{< sidenote "right" "left-right-note" "the \"normal\" case." >}}
|
||||
We use <code>Left</code> for the "abnormal" case because of
|
||||
Idris' (and Haskell's) convention to use it as such. For
|
||||
instance, the two languages define a <code>Monad</code>
|
||||
instance for <code>Either a</code> where <code>(>>=)</code>
|
||||
behaves very much like it does for <code>Maybe</code>, with
|
||||
<code>Left</code> being treated as <code>Nothing</code>,
|
||||
and <code>Right</code> as <code>Just</code>. We will
|
||||
use this instance to clean up some of our computations.
|
||||
{{< /sidenote >}} Note that this doesn't indicate an error:
|
||||
we need to represent the two states (breaking out of a loop
|
||||
and normal execution) to define our language's semantics.
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 88 95 >}}
|
||||
|
||||
First, note the type. We return an `Either` value, which will
|
||||
contain `State l` (in the `Left` constructor) if a `Break`
|
||||
was evaluated, and `State n` (in the `Right` constructor)
|
||||
if execution went on without breaking.
|
||||
|
||||
The `Store` case is rather simple. We use `setReg` to update the result
|
||||
of the register `r` with the result of evaluating `e`. Because
|
||||
a store doesn't cause us to start breaking out of a loop,
|
||||
we use `Right` to wrap the new state.
|
||||
|
||||
The `If` case is also rather simple. Its condition is guaranteed
|
||||
to evaluate to a boolean, so it's sufficient for us to use
|
||||
Idris' `if` expression. We use the `prog` function here, which
|
||||
implements the evaluation of a whole program. We'll get to it
|
||||
momentarily.
|
||||
|
||||
`Loop` is the most interesting case. We start by evaluating
|
||||
the program `p` serving as the loop body. The result of this
|
||||
computation will be either a state after a break,
|
||||
held in `Left`, or as the normal execution state, held
|
||||
in `Right`. The `(>>=)` operation will do nothing in
|
||||
the first case, and feed the resulting (normal) state
|
||||
to `stmt (Loop p)` in the second case. This is exactly
|
||||
what we want: if we broke out of the current iteration
|
||||
of the loop, we shouldn't continue on to the next iteration.
|
||||
At the end of evaluating both `p` and the recursive call to
|
||||
`stmt`, we'll either have exited normally, or via breaking
|
||||
out. We don't want to continue breaking out further,
|
||||
so we return the final state wrapped in `Right` in both cases.
|
||||
Finally, `Break` returns the current state wrapped in `Left`,
|
||||
beginning the process of breaking out.
|
||||
|
||||
The task of `prog` is simply to sequence several statements
|
||||
together. The monadic bind operator, `(>>=)`, is again perfect
|
||||
for this task, since it "stops" when it hits a `Left`, but
|
||||
continues otherwise. This is the implementation:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 97 99 >}}
|
||||
|
||||
Awesome! Let's try it out, shall we? I defined a quick `run` function
|
||||
as follows:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 101 102 >}}
|
||||
|
||||
We then have:
|
||||
|
||||
```
|
||||
*TypesafeImp> run prodProg (MkState (0,0,0))
|
||||
MkState (0, 9, 63) : State (IntTy, IntTy, IntTy)
|
||||
```
|
||||
|
||||
This seems correct! The program multiplies seven by nine,
|
||||
and stops when register `A` reaches zero. Our test program
|
||||
runs, too:
|
||||
|
||||
```
|
||||
*TypesafeImp> run testProg (MkState (0,0,0))
|
||||
MkState (1, 2, 3) : State (IntTy, IntTy, IntTy)
|
||||
```
|
||||
|
||||
This is the correct answer: `A` ends up being set to
|
||||
`1` in the `then` branch of the conditional statement,
|
||||
`B` is set to 2 right after, and `R`, the sum of `A`
|
||||
and `B`, is rightly `3`.
|
||||
|
||||
As you can see, we didn't have to write any error handling
|
||||
code! This is because the typechecker _knows_ that type errors
|
||||
aren't possible: our programs are guaranteed to be
|
||||
{{< sidenote "right" "termination-note" "type correct." >}}
|
||||
Our programs <em>aren't</em> guaranteed to terminate:
|
||||
we're lucky that Idris' totality checker is turned off by default.
|
||||
{{< /sidenote >}} This was a fun exercise, and I hope you enjoyed reading along!
|
||||
I hope to see you in my future posts.
|
||||
9
layouts/shortcodes/gt_assumption.html
Normal file
9
layouts/shortcodes/gt_assumption.html
Normal file
@@ -0,0 +1,9 @@
|
||||
{{ $n := .Page.Scratch.Get "gt-assumption-count" }}
|
||||
{{ $newN := add 1 (int $n) }}
|
||||
{{ .Page.Scratch.Set "gt-assumption-count" $newN }}
|
||||
{{ .Page.Scratch.SetInMap "gt-assumptions" (.Get 0) $newN }}
|
||||
<div class="assumption">
|
||||
<span id="gt-assumption-{{ .Get 0 }}" class="assumption-number">
|
||||
Assumption {{ $newN }} ({{ .Get 1 }}).
|
||||
</span> {{ .Inner }}
|
||||
</div>
|
||||
2
layouts/shortcodes/gt_css.html
Normal file
2
layouts/shortcodes/gt_css.html
Normal file
@@ -0,0 +1,2 @@
|
||||
{{ $style := resources.Get "scss/gametheory.scss" | resources.ToCSS | resources.Minify }}
|
||||
<link rel="stylesheet" href="{{ $style.Permalink }}">
|
||||
1
layouts/shortcodes/gt_link.html
Normal file
1
layouts/shortcodes/gt_link.html
Normal file
@@ -0,0 +1 @@
|
||||
<a href="#gt-assumption-{{ .Get 0 }}">assumption {{ index (.Page.Scratch.Get "gt-assumptions") (.Get 0) }}</a>
|
||||
Binary file not shown.
BIN
static/index.st
Normal file
BIN
static/index.st
Normal file
Binary file not shown.
@@ -5,6 +5,16 @@ $code-color-keyword: black;
|
||||
$code-color-type: black;
|
||||
$code-color-comment: grey;
|
||||
|
||||
.highlight-label {
|
||||
padding: 0.25rem 0.5rem 0.25rem 0.5rem;
|
||||
border: $code-border;
|
||||
border-bottom: none;
|
||||
|
||||
a {
|
||||
font-family: $font-code;
|
||||
}
|
||||
}
|
||||
|
||||
code {
|
||||
font-family: $font-code;
|
||||
background-color: $code-color;
|
||||
@@ -61,6 +71,10 @@ pre code {
|
||||
.hl {
|
||||
display: block;
|
||||
background-color: #fffd99;
|
||||
|
||||
.lnt::before {
|
||||
content: "*";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
94
themes/vanilla/assets/scss/search.scss
Normal file
94
themes/vanilla/assets/scss/search.scss
Normal file
@@ -0,0 +1,94 @@
|
||||
@import "variables.scss";
|
||||
@import "mixins.scss";
|
||||
|
||||
$search-input-padding: 0.5rem;
|
||||
$search-element-padding: 0.5rem 1rem 0.5rem 1rem;
|
||||
|
||||
@mixin green-shadow {
|
||||
box-shadow: 0px 0px 5px rgba($primary-color, 0.7);
|
||||
}
|
||||
|
||||
.stork-input-wrapper {
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
input.stork-input {
|
||||
@include bordered-block;
|
||||
font-family: $font-body;
|
||||
padding: $search-input-padding;
|
||||
|
||||
&:active, &:focus {
|
||||
@include green-shadow;
|
||||
border-color: $primary-color;
|
||||
}
|
||||
|
||||
flex-grow: 1;
|
||||
}
|
||||
|
||||
.stork-close-button {
|
||||
@include bordered-block;
|
||||
font-family: $font-body;
|
||||
padding: $search-input-padding;
|
||||
|
||||
background-color: $code-color;
|
||||
padding-left: 1.5rem;
|
||||
padding-right: 1.5rem;
|
||||
|
||||
border-left: none;
|
||||
border-top-left-radius: 0;
|
||||
border-bottom-left-radius: 0;
|
||||
}
|
||||
|
||||
.stork-output-visible {
|
||||
@include bordered-block;
|
||||
|
||||
border-top: none;
|
||||
}
|
||||
|
||||
.stork-result, .stork-message, .stork-attribution {
|
||||
padding: $search-element-padding;
|
||||
}
|
||||
|
||||
.stork-message:not(:last-child), .stork-result {
|
||||
border-bottom: $standard-border;
|
||||
}
|
||||
|
||||
.stork-results {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.stork-result {
|
||||
list-style: none;
|
||||
|
||||
&.selected {
|
||||
background-color: $code-color;
|
||||
}
|
||||
|
||||
a:hover {
|
||||
color: black;
|
||||
}
|
||||
}
|
||||
|
||||
.stork-title, .stork-excerpt {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.stork-excerpt {
|
||||
white-space: nowrap;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
}
|
||||
|
||||
.stork-title {
|
||||
font-family: $font-heading;
|
||||
font-size: 1.4rem;
|
||||
text-align: left;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.stork-highlight {
|
||||
background-color: lighten($primary-color, 30%);
|
||||
}
|
||||
@@ -233,3 +233,10 @@ figure {
|
||||
.twitter-tweet {
|
||||
margin: auto;
|
||||
}
|
||||
|
||||
.draft-warning {
|
||||
@include bordered-block;
|
||||
padding: 0.5rem;
|
||||
background-color: #ffee99;
|
||||
border-color: #f5c827;
|
||||
}
|
||||
|
||||
1
themes/vanilla/layouts/_default/baseof.toml
Normal file
1
themes/vanilla/layouts/_default/baseof.toml
Normal file
@@ -0,0 +1 @@
|
||||
{{- block "main" . }}{{- end }}
|
||||
12
themes/vanilla/layouts/_default/list.toml.toml
Normal file
12
themes/vanilla/layouts/_default/list.toml.toml
Normal file
@@ -0,0 +1,12 @@
|
||||
[input]
|
||||
base_directory = "content/"
|
||||
title_boost = "Large"
|
||||
files = [
|
||||
{{ range $index , $e := .Site.RegularPages }}{{ if $index }}, {{end}}{ filetype = "PlainText", contents = {{ $e.Plain | jsonify }}, title = {{ $e.Title | jsonify }}, url = {{ $e.Permalink | jsonify }} }
|
||||
{{ end }}
|
||||
]
|
||||
|
||||
[output]
|
||||
filename = "index.st"
|
||||
excerpts_per_result = 2
|
||||
displayed_results_count = 5
|
||||
3
themes/vanilla/layouts/_default/single.toml
Normal file
3
themes/vanilla/layouts/_default/single.toml
Normal file
@@ -0,0 +1,3 @@
|
||||
{{ define "main" }}
|
||||
{{ .Content }}
|
||||
{{ end }}
|
||||
@@ -18,6 +18,15 @@
|
||||
</div>
|
||||
</div>
|
||||
{{ end }}
|
||||
|
||||
{{ if .Draft }}
|
||||
<div class="draft-warning">
|
||||
<em>Warning!</em> This post is a draft. At best, it may contain grammar mistakes;
|
||||
at worst, it can include significant errors and bugs. Please
|
||||
use your best judgement!
|
||||
</div>
|
||||
{{ end }}
|
||||
|
||||
{{ .Content }}
|
||||
</div>
|
||||
{{ end }}
|
||||
|
||||
@@ -1,10 +1,22 @@
|
||||
{{ define "main" }}
|
||||
{{ .Content }}
|
||||
|
||||
<p>
|
||||
<div class="stork-wrapper">
|
||||
<div class="stork-input-wrapper">
|
||||
<input class="stork-input" data-stork="blog" placeholder="Search (requires JavaScript)"/>
|
||||
</div>
|
||||
<div class="stork-output" data-stork="blog-output"></div>
|
||||
</div>
|
||||
</p>
|
||||
|
||||
Recent posts:
|
||||
<ul class="post-list">
|
||||
{{ range first 10 (where (where .Site.Pages.ByDate.Reverse "Section" "blog") ".Kind" "!=" "section") }}
|
||||
{{ partial "post.html" . }}
|
||||
{{ end }}
|
||||
</ul>
|
||||
|
||||
<script src="https://files.stork-search.net/stork.js"></script>
|
||||
<script>stork.register("blog", "/index.st");</script>
|
||||
{{ end }}
|
||||
|
||||
@@ -11,11 +11,13 @@
|
||||
{{ $style := resources.Get "scss/style.scss" | resources.ToCSS | resources.Minify }}
|
||||
{{ $sidenotes := resources.Get "scss/sidenotes.scss" | resources.ToCSS | resources.Minify }}
|
||||
{{ $code := resources.Get "scss/code.scss" | resources.ToCSS | resources.Minify }}
|
||||
{{ $search := resources.Get "scss/search.scss" | resources.ToCSS | resources.Minify }}
|
||||
{{ $icon := resources.Get "img/favicon.png" }}
|
||||
{{- partial "sidenotes.html" . -}}
|
||||
<link rel="stylesheet" href="{{ $style.Permalink }}" media="screen">
|
||||
<link rel="stylesheet" href="{{ $sidenotes.Permalink }}" media="screen">
|
||||
<link rel="stylesheet" href="{{ $code.Permalink }}" media="screen">
|
||||
<link rel="stylesheet" href="{{ $search.Permalink }}" media="screen">
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.11.1/dist/katex.min.css" integrity="sha384-zB1R0rpPzHqg7Kpt0Aljp8JPLqbXI3bhnPWROx27a9N0Ll6ZP/+DiW/UqRcLbRjq" crossorigin="anonymous" media="screen">
|
||||
<link rel="icon" type="image/png" href="{{ $icon.Permalink }}">
|
||||
|
||||
|
||||
@@ -11,4 +11,8 @@
|
||||
{{ else }}
|
||||
{{ .Scratch.Set "opts" "" }}
|
||||
{{ end }}
|
||||
{{ highlight (delimit $v "\n") (.Get 0) (printf "linenos=table,linenostart=%d%s" (.Get 2) (.Scratch.Get "opts")) }}
|
||||
<div class="highlight-group">
|
||||
<div class="highlight-label">From <a href="https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/{{ .Get 1 }}">{{ path.Base (.Get 1) }}</a>,
|
||||
{{ if eq (.Get 2) (.Get 3) }}line {{ .Get 2 }}{{ else }} lines {{ .Get 2 }} through {{ .Get 3 }}{{ end }}</div>
|
||||
{{ highlight (delimit $v "\n") (.Get 0) (printf "linenos=table,linenostart=%d%s" (.Get 2) (.Scratch.Get "opts")) }}
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user