Compare commits
99 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e0451d026c | |||
| 1f1345477f | |||
| 44529e872f | |||
| a10996954e | |||
| 4d1dfb5f66 | |||
| f97b624688 | |||
| 8215c59122 | |||
| eb97bd9c3e | |||
| d2e100fe4b | |||
| de09a1f6bd | |||
| c40672e762 | |||
| 565d4a6955 | |||
| 8f0f2eb35e | |||
| 234b795157 | |||
| e317c56c99 | |||
| 29d12a9914 | |||
| b459e9cbfe | |||
| 52abe73ef7 | |||
| f0fe481bcf | |||
| 222446a937 | |||
| e7edd43034 | |||
| 2bc2c282e1 | |||
| 5cc92d3a9d | |||
| 4be8a25699 | |||
| d3421733e1 | |||
| 4c099a54e8 | |||
| 9f77f07ed2 | |||
| 04ab1a137c | |||
| 53744ac772 | |||
| 50a1c33adb | |||
| d153af5212 | |||
| a336b27b6c | |||
| 97eb4b6e3e | |||
| 430768eac5 | |||
| 5db864881a | |||
| d3b1047d37 | |||
| 98cac103c4 | |||
| 7226d66f67 | |||
| 8a352ed3ea | |||
| 02f8306c7b | |||
| cf6f353f20 | |||
| 7a631b3557 | |||
| 5e13047846 | |||
| c17d532802 | |||
| 55e4e61906 | |||
| f2f88ab9ca | |||
| ba418d357f | |||
| 0e3f16139d | |||
| 55486d511f | |||
| 6080094c41 | |||
| 6b8d3b0f8a | |||
| 725958137a | |||
| 1f6b4bef74 | |||
| fe1e0a6de0 | |||
| 1f3c42fc44 | |||
| 8bf67c7dc3 | |||
| 13214cee96 | |||
| 579c7bad92 | |||
| f00a6a7783 | |||
| 2a81fdd9fb | |||
| 17c59e595c | |||
| ad2576eae2 | |||
| 72d8179cc5 | |||
| dbabec0db6 | |||
| 76675fbc9b | |||
| ca395b5c09 | |||
| 1a05d5ff7a | |||
| 56f0dbd02f | |||
| 9fc0ff961d | |||
| 73441dc93b | |||
| df5f5eba1c | |||
| d950b8dc90 | |||
| 85394b185d | |||
| 86b49f9cc3 | |||
| 9769b3e396 | |||
| e337992410 | |||
| d5c3a44041 | |||
| eade42be49 | |||
| d0fac50cfd | |||
| dd4aa6fb9d | |||
| aa867b2e5f | |||
| 2fa2be4b9e | |||
| d5536467f6 | |||
| 67cb61c93f | |||
| 578d580683 | |||
| 789f277780 | |||
| 308ec615b9 | |||
| 0e40c9e216 | |||
| 5dbf75b5e4 | |||
| b921ddfc8d | |||
| bf3c81fe24 | |||
| 06cbd93f05 | |||
| 6c3780d9ea | |||
| 6f0667bb28 | |||
| 8368283a3e | |||
| 18ee3a1526 | |||
| b0e501f086 | |||
| 385ae59133 | |||
| 068d0218b0 |
11
assets/scss/gametheory.scss
Normal file
11
assets/scss/gametheory.scss
Normal file
@@ -0,0 +1,11 @@
|
||||
@import "variables.scss";
|
||||
@import "mixins.scss";
|
||||
|
||||
.assumption-number {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.assumption {
|
||||
@include bordered-block;
|
||||
padding: 0.8rem;
|
||||
}
|
||||
102
code/aoc-coq/day1.v
Normal file
102
code/aoc-coq/day1.v
Normal file
@@ -0,0 +1,102 @@
|
||||
Require Import Coq.Lists.List.
|
||||
Require Import Omega.
|
||||
|
||||
Definition has_pair (t : nat) (is : list nat) : Prop :=
|
||||
exists n1 n2 : nat, n1 <> n2 /\ In n1 is /\ In n2 is /\ n1 + n2 = t.
|
||||
|
||||
Fixpoint find_matching (is : list nat) (total : nat) (x : nat) : option nat :=
|
||||
match is with
|
||||
| nil => None
|
||||
| cons y ys =>
|
||||
if Nat.eqb (x + y) total
|
||||
then Some y
|
||||
else find_matching ys total x
|
||||
end.
|
||||
|
||||
Fixpoint find_sum (is : list nat) (total : nat) : option (nat * nat) :=
|
||||
match is with
|
||||
| nil => None
|
||||
| cons x xs =>
|
||||
match find_matching xs total x with
|
||||
| None => find_sum xs total (* Was buggy! *)
|
||||
| Some y => Some (x, y)
|
||||
end
|
||||
end.
|
||||
|
||||
Lemma find_matching_correct : forall is k x y,
|
||||
find_matching is k x = Some y -> x + y = k.
|
||||
Proof.
|
||||
intros is. induction is;
|
||||
intros k x y Hev.
|
||||
- simpl in Hev. inversion Hev.
|
||||
- simpl in Hev. destruct (Nat.eqb (x+a) k) eqn:Heq.
|
||||
+ injection Hev as H; subst.
|
||||
apply EqNat.beq_nat_eq. auto.
|
||||
+ apply IHis. assumption.
|
||||
Qed.
|
||||
|
||||
Lemma find_matching_skip : forall k x y i is,
|
||||
find_matching is k x = Some y -> find_matching (cons i is) k x = Some y.
|
||||
Proof.
|
||||
intros k x y i is Hsmall.
|
||||
simpl. destruct (Nat.eqb (x+i) k) eqn:Heq.
|
||||
- apply find_matching_correct in Hsmall.
|
||||
symmetry in Heq. apply EqNat.beq_nat_eq in Heq.
|
||||
assert (i = y). { omega. } rewrite H. reflexivity.
|
||||
- assumption.
|
||||
Qed.
|
||||
|
||||
Lemma find_matching_works : forall is k x y, In y is /\ x + y = k ->
|
||||
find_matching is k x = Some y.
|
||||
Proof.
|
||||
intros is. induction is;
|
||||
intros k x y [Hin Heq].
|
||||
- inversion Hin.
|
||||
- inversion Hin.
|
||||
+ subst a. simpl. Search Nat.eqb.
|
||||
destruct (Nat.eqb_spec (x+y) k).
|
||||
* reflexivity.
|
||||
* exfalso. apply n. assumption.
|
||||
+ apply find_matching_skip. apply IHis.
|
||||
split; assumption.
|
||||
Qed.
|
||||
|
||||
Theorem find_sum_works :
|
||||
forall k is, has_pair k is ->
|
||||
exists x y, (find_sum is k = Some (x, y) /\ x + y = k).
|
||||
Proof.
|
||||
intros k is. generalize dependent k.
|
||||
induction is; intros k [x' [y' [Hneq [Hinx [Hiny Hsum]]]]].
|
||||
- (* is is empty. But x is in is! *)
|
||||
inversion Hinx.
|
||||
- (* is is not empty. *)
|
||||
inversion Hinx.
|
||||
+ (* x is the first element. *)
|
||||
subst a. inversion Hiny.
|
||||
* (* y is also the first element; but this is impossible! *)
|
||||
exfalso. apply Hneq. apply H.
|
||||
* (* y is somewhere in the rest of the list.
|
||||
We've proven that we will find it! *)
|
||||
exists x'. simpl.
|
||||
erewrite find_matching_works.
|
||||
{ exists y'. split. reflexivity. assumption. }
|
||||
{ split; assumption. }
|
||||
+ (* x is not the first element. *)
|
||||
inversion Hiny.
|
||||
* (* y is the first element,
|
||||
so x is somewhere in the rest of the list.
|
||||
Again, we've proven that we can find it. *)
|
||||
subst a. exists y'. simpl.
|
||||
erewrite find_matching_works.
|
||||
{ exists x'. split. reflexivity. rewrite plus_comm. assumption. }
|
||||
{ split. assumption. rewrite plus_comm. assumption. }
|
||||
* (* y is not the first element, either.
|
||||
Of course, there could be another matching pair
|
||||
starting with a. Otherwise, the inductive hypothesis applies. *)
|
||||
simpl. destruct (find_matching is k a) eqn:Hf.
|
||||
{ exists a. exists n. split.
|
||||
reflexivity.
|
||||
apply find_matching_correct with is. assumption. }
|
||||
{ apply IHis. unfold has_pair. exists x'. exists y'.
|
||||
repeat split; assumption. }
|
||||
Qed.
|
||||
53
code/compiler/13/CMakeLists.txt
Normal file
53
code/compiler/13/CMakeLists.txt
Normal file
@@ -0,0 +1,53 @@
|
||||
cmake_minimum_required(VERSION 3.1)
|
||||
project(compiler)
|
||||
|
||||
# We want C++17 for std::optional
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
# Find all the required packages
|
||||
find_package(BISON)
|
||||
find_package(FLEX)
|
||||
find_package(LLVM REQUIRED CONFIG)
|
||||
|
||||
# Set up the flex and bison targets
|
||||
bison_target(parser
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/parser.y
|
||||
${CMAKE_CURRENT_BINARY_DIR}/parser.cpp
|
||||
COMPILE_FLAGS "-d")
|
||||
flex_target(scanner
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scanner.l
|
||||
${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp)
|
||||
add_flex_bison_dependency(scanner parser)
|
||||
|
||||
# Find all the relevant LLVM components
|
||||
llvm_map_components_to_libnames(LLVM_LIBS core x86asmparser x86codegen)
|
||||
|
||||
# Create compiler executable
|
||||
add_executable(compiler
|
||||
definition.cpp definition.hpp
|
||||
parsed_type.cpp parsed_type.hpp
|
||||
ast.cpp ast.hpp
|
||||
llvm_context.cpp llvm_context.hpp
|
||||
type_env.cpp type_env.hpp
|
||||
env.cpp env.hpp
|
||||
type.cpp type.hpp
|
||||
error.cpp error.hpp
|
||||
binop.cpp binop.hpp
|
||||
instruction.cpp instruction.hpp
|
||||
graph.cpp graph.hpp
|
||||
global_scope.cpp global_scope.hpp
|
||||
parse_driver.cpp parse_driver.hpp
|
||||
mangler.cpp mangler.hpp
|
||||
compiler.cpp compiler.hpp
|
||||
${BISON_parser_OUTPUTS}
|
||||
${FLEX_scanner_OUTPUTS}
|
||||
main.cpp
|
||||
)
|
||||
|
||||
# Configure compiler executable
|
||||
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
|
||||
target_include_directories(compiler PUBLIC ${LLVM_INCLUDE_DIRS})
|
||||
target_compile_definitions(compiler PUBLIC ${LLVM_DEFINITIONS})
|
||||
target_link_libraries(compiler ${LLVM_LIBS})
|
||||
454
code/compiler/13/ast.cpp
Normal file
454
code/compiler/13/ast.cpp
Normal file
@@ -0,0 +1,454 @@
|
||||
#include "ast.hpp"
|
||||
#include <ostream>
|
||||
#include <type_traits>
|
||||
#include "binop.hpp"
|
||||
#include "error.hpp"
|
||||
#include "instruction.hpp"
|
||||
#include "type.hpp"
|
||||
#include "type_env.hpp"
|
||||
#include "env.hpp"
|
||||
|
||||
static void print_indent(int n, std::ostream& to) {
|
||||
while(n--) to << " ";
|
||||
}
|
||||
|
||||
void ast_int::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "INT: " << value << std::endl;
|
||||
}
|
||||
|
||||
void ast_int::find_free(std::set<std::string>& into) {
|
||||
|
||||
}
|
||||
|
||||
type_ptr ast_int::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = env;
|
||||
return type_ptr(new type_app(env->lookup_type("Int")));
|
||||
}
|
||||
|
||||
void ast_int::translate(global_scope& scope) {
|
||||
|
||||
}
|
||||
|
||||
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
into.push_back(instruction_ptr(new instruction_pushint(value)));
|
||||
}
|
||||
|
||||
void ast_lid::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "LID: " << id << std::endl;
|
||||
}
|
||||
|
||||
void ast_lid::find_free(std::set<std::string>& into) {
|
||||
into.insert(id);
|
||||
}
|
||||
|
||||
type_ptr ast_lid::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = env;
|
||||
type_scheme_ptr lid_type = env->lookup(id);
|
||||
if(!lid_type)
|
||||
throw type_error("unknown identifier " + id, loc);
|
||||
return lid_type->instantiate(mgr);
|
||||
}
|
||||
|
||||
void ast_lid::translate(global_scope& scope) {
|
||||
|
||||
}
|
||||
|
||||
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
into.push_back(instruction_ptr(
|
||||
(this->env->is_global(id)) ?
|
||||
(instruction*) new instruction_pushglobal(this->env->get_mangled_name(id)) :
|
||||
(instruction*) new instruction_push(env->get_offset(id))));
|
||||
}
|
||||
|
||||
void ast_uid::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "UID: " << id << std::endl;
|
||||
}
|
||||
|
||||
void ast_uid::find_free(std::set<std::string>& into) {
|
||||
|
||||
}
|
||||
|
||||
type_ptr ast_uid::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = env;
|
||||
type_scheme_ptr uid_type = env->lookup(id);
|
||||
if(!uid_type)
|
||||
throw type_error("unknown constructor " + id, loc);
|
||||
return uid_type->instantiate(mgr);
|
||||
}
|
||||
|
||||
void ast_uid::translate(global_scope& scope) {
|
||||
|
||||
}
|
||||
|
||||
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
into.push_back(instruction_ptr(
|
||||
new instruction_pushglobal(this->env->get_mangled_name(id))));
|
||||
}
|
||||
|
||||
void ast_binop::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "BINOP: " << op_name(op) << std::endl;
|
||||
left->print(indent + 1, to);
|
||||
right->print(indent + 1, to);
|
||||
}
|
||||
|
||||
void ast_binop::find_free(std::set<std::string>& into) {
|
||||
left->find_free(into);
|
||||
right->find_free(into);
|
||||
}
|
||||
|
||||
type_ptr ast_binop::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = env;
|
||||
type_ptr ltype = left->typecheck(mgr, env);
|
||||
type_ptr rtype = right->typecheck(mgr, env);
|
||||
type_ptr ftype = env->lookup(op_name(op))->instantiate(mgr);
|
||||
if(!ftype) throw type_error("unknown binary operator " + op_name(op), loc);
|
||||
|
||||
// For better type errors, we first require binary function,
|
||||
// and only then unify each argument. This way, we can
|
||||
// precisely point out which argument is "wrong".
|
||||
|
||||
type_ptr return_type = mgr.new_type();
|
||||
type_ptr second_type = mgr.new_type();
|
||||
type_ptr first_type = mgr.new_type();
|
||||
type_ptr arrow_one = type_ptr(new type_arr(second_type, return_type));
|
||||
type_ptr arrow_two = type_ptr(new type_arr(first_type, arrow_one));
|
||||
|
||||
mgr.unify(ftype, arrow_two, loc);
|
||||
mgr.unify(first_type, ltype, left->loc);
|
||||
mgr.unify(second_type, rtype, right->loc);
|
||||
return return_type;
|
||||
}
|
||||
|
||||
void ast_binop::translate(global_scope& scope) {
|
||||
left->translate(scope);
|
||||
right->translate(scope);
|
||||
}
|
||||
|
||||
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
right->compile(env, into);
|
||||
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||
|
||||
auto mangled_name = this->env->get_mangled_name(op_name(op));
|
||||
into.push_back(instruction_ptr(new instruction_pushglobal(mangled_name)));
|
||||
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||
}
|
||||
|
||||
void ast_app::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "APP:" << std::endl;
|
||||
left->print(indent + 1, to);
|
||||
right->print(indent + 1, to);
|
||||
}
|
||||
|
||||
void ast_app::find_free(std::set<std::string>& into) {
|
||||
left->find_free(into);
|
||||
right->find_free(into);
|
||||
}
|
||||
|
||||
type_ptr ast_app::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = env;
|
||||
type_ptr ltype = left->typecheck(mgr, env);
|
||||
type_ptr rtype = right->typecheck(mgr, env);
|
||||
|
||||
type_ptr return_type = mgr.new_type();
|
||||
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
|
||||
mgr.unify(arrow, ltype, left->loc);
|
||||
return return_type;
|
||||
}
|
||||
|
||||
void ast_app::translate(global_scope& scope) {
|
||||
left->translate(scope);
|
||||
right->translate(scope);
|
||||
}
|
||||
|
||||
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
right->compile(env, into);
|
||||
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||
}
|
||||
|
||||
void ast_case::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "CASE: " << std::endl;
|
||||
for(auto& branch : branches) {
|
||||
print_indent(indent + 1, to);
|
||||
branch->pat->print(to);
|
||||
to << std::endl;
|
||||
branch->expr->print(indent + 2, to);
|
||||
}
|
||||
}
|
||||
|
||||
void ast_case::find_free(std::set<std::string>& into) {
|
||||
of->find_free(into);
|
||||
for(auto& branch : branches) {
|
||||
std::set<std::string> free_in_branch;
|
||||
std::set<std::string> pattern_variables;
|
||||
branch->pat->find_variables(pattern_variables);
|
||||
branch->expr->find_free(free_in_branch);
|
||||
for(auto& free : free_in_branch) {
|
||||
if(pattern_variables.find(free) == pattern_variables.end())
|
||||
into.insert(free);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type_ptr ast_case::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = env;
|
||||
type_var* var;
|
||||
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var);
|
||||
type_ptr branch_type = mgr.new_type();
|
||||
|
||||
for(auto& branch : branches) {
|
||||
type_env_ptr new_env = type_scope(env);
|
||||
branch->pat->typecheck(case_type, mgr, new_env);
|
||||
type_ptr curr_branch_type = branch->expr->typecheck(mgr, new_env);
|
||||
mgr.unify(curr_branch_type, branch_type, branch->expr->loc);
|
||||
}
|
||||
|
||||
input_type = mgr.resolve(case_type, var);
|
||||
type_app* app_type;
|
||||
if(!(app_type = dynamic_cast<type_app*>(input_type.get())) ||
|
||||
!dynamic_cast<type_data*>(app_type->constructor.get())) {
|
||||
throw type_error("attempting case analysis of non-data type");
|
||||
}
|
||||
|
||||
return branch_type;
|
||||
}
|
||||
|
||||
void ast_case::translate(global_scope& scope) {
|
||||
of->translate(scope);
|
||||
for(auto& branch : branches) {
|
||||
branch->expr->translate(scope);
|
||||
}
|
||||
}
|
||||
|
||||
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
type_app* app_type = dynamic_cast<type_app*>(input_type.get());
|
||||
type_data* type = dynamic_cast<type_data*>(app_type->constructor.get());
|
||||
|
||||
of->compile(env, into);
|
||||
into.push_back(instruction_ptr(new instruction_eval()));
|
||||
|
||||
instruction_jump* jump_instruction = new instruction_jump();
|
||||
into.push_back(instruction_ptr(jump_instruction));
|
||||
for(auto& branch : branches) {
|
||||
std::vector<instruction_ptr> branch_instructions;
|
||||
pattern_var* vpat;
|
||||
pattern_constr* cpat;
|
||||
|
||||
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
|
||||
branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions);
|
||||
|
||||
for(auto& constr_pair : type->constructors) {
|
||||
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) !=
|
||||
jump_instruction->tag_mappings.end())
|
||||
break;
|
||||
|
||||
jump_instruction->tag_mappings[constr_pair.second.tag] =
|
||||
jump_instruction->branches.size();
|
||||
}
|
||||
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
|
||||
env_ptr new_env = env;
|
||||
for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) {
|
||||
new_env = env_ptr(new env_var(*it, new_env));
|
||||
}
|
||||
|
||||
branch_instructions.push_back(instruction_ptr(new instruction_split(
|
||||
cpat->params.size())));
|
||||
branch->expr->compile(new_env, branch_instructions);
|
||||
branch_instructions.push_back(instruction_ptr(new instruction_slide(
|
||||
cpat->params.size())));
|
||||
|
||||
int new_tag = type->constructors[cpat->constr].tag;
|
||||
if(jump_instruction->tag_mappings.find(new_tag) !=
|
||||
jump_instruction->tag_mappings.end())
|
||||
throw type_error("technically not a type error: duplicate pattern");
|
||||
|
||||
jump_instruction->tag_mappings[new_tag] =
|
||||
jump_instruction->branches.size();
|
||||
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||
}
|
||||
}
|
||||
|
||||
for(auto& constr_pair : type->constructors) {
|
||||
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) ==
|
||||
jump_instruction->tag_mappings.end())
|
||||
throw type_error("non-total pattern");
|
||||
}
|
||||
}
|
||||
|
||||
void ast_let::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "LET: " << std::endl;
|
||||
in->print(indent + 1, to);
|
||||
}
|
||||
|
||||
void ast_let::find_free(std::set<std::string>& into) {
|
||||
definitions.find_free(into);
|
||||
std::set<std::string> all_free;
|
||||
in->find_free(all_free);
|
||||
for(auto& free_var : all_free) {
|
||||
if(definitions.defs_defn.find(free_var) == definitions.defs_defn.end())
|
||||
into.insert(free_var);
|
||||
}
|
||||
}
|
||||
|
||||
type_ptr ast_let::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = env;
|
||||
definitions.typecheck(mgr, env);
|
||||
return in->typecheck(mgr, definitions.env);
|
||||
}
|
||||
|
||||
void ast_let::translate(global_scope& scope) {
|
||||
for(auto& def : definitions.defs_data) {
|
||||
def.second->into_globals(scope);
|
||||
}
|
||||
for(auto& def : definitions.defs_defn) {
|
||||
size_t original_params = def.second->params.size();
|
||||
std::string original_name = def.second->name;
|
||||
auto& global_definition = def.second->into_global(scope);
|
||||
size_t captured = global_definition.params.size() - original_params;
|
||||
|
||||
type_env_ptr mangled_env = type_scope(env);
|
||||
mangled_env->bind(def.first, env->lookup(def.first), visibility::global);
|
||||
mangled_env->set_mangled_name(def.first, global_definition.name);
|
||||
|
||||
ast_ptr global_app(new ast_lid(original_name));
|
||||
global_app->env = mangled_env;
|
||||
for(auto& param : global_definition.params) {
|
||||
if(!(captured--)) break;
|
||||
ast_ptr new_arg(new ast_lid(param));
|
||||
new_arg->env = env;
|
||||
global_app = ast_ptr(new ast_app(std::move(global_app), std::move(new_arg)));
|
||||
global_app->env = env;
|
||||
}
|
||||
translated_definitions.push_back({ def.first, std::move(global_app) });
|
||||
}
|
||||
in->translate(scope);
|
||||
}
|
||||
|
||||
void ast_let::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
into.push_back(instruction_ptr(new instruction_alloc(translated_definitions.size())));
|
||||
env_ptr new_env = env;
|
||||
for(auto& def : translated_definitions) {
|
||||
new_env = env_ptr(new env_var(def.first, std::move(new_env)));
|
||||
}
|
||||
int offset = translated_definitions.size() - 1;
|
||||
for(auto& def : translated_definitions) {
|
||||
def.second->compile(new_env, into);
|
||||
into.push_back(instruction_ptr(new instruction_update(offset--)));
|
||||
}
|
||||
in->compile(new_env, into);
|
||||
into.push_back(instruction_ptr(new instruction_slide(translated_definitions.size())));
|
||||
}
|
||||
|
||||
void ast_lambda::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "LAMBDA";
|
||||
for(auto& param : params) {
|
||||
to << " " << param;
|
||||
}
|
||||
to << std::endl;
|
||||
body->print(indent+1, to);
|
||||
}
|
||||
|
||||
void ast_lambda::find_free(std::set<std::string>& into) {
|
||||
body->find_free(free_variables);
|
||||
for(auto& param : params) {
|
||||
free_variables.erase(param);
|
||||
}
|
||||
into.insert(free_variables.begin(), free_variables.end());
|
||||
}
|
||||
|
||||
type_ptr ast_lambda::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = env;
|
||||
var_env = type_scope(env);
|
||||
type_ptr return_type = mgr.new_type();
|
||||
type_ptr full_type = return_type;
|
||||
|
||||
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||
type_ptr param_type = mgr.new_type();
|
||||
var_env->bind(*it, param_type);
|
||||
full_type = type_ptr(new type_arr(std::move(param_type), full_type));
|
||||
}
|
||||
|
||||
mgr.unify(return_type, body->typecheck(mgr, var_env), body->loc);
|
||||
return full_type;
|
||||
}
|
||||
|
||||
void ast_lambda::translate(global_scope& scope) {
|
||||
std::vector<std::string> function_params;
|
||||
for(auto& free_variable : free_variables) {
|
||||
if(env->is_global(free_variable)) continue;
|
||||
function_params.push_back(free_variable);
|
||||
}
|
||||
size_t captured_count = function_params.size();
|
||||
function_params.insert(function_params.end(), params.begin(), params.end());
|
||||
|
||||
auto& new_function = scope.add_function("lambda", std::move(function_params), std::move(body));
|
||||
type_env_ptr mangled_env = type_scope(env);
|
||||
mangled_env->bind("lambda", type_scheme_ptr(nullptr), visibility::global);
|
||||
mangled_env->set_mangled_name("lambda", new_function.name);
|
||||
ast_ptr new_application = ast_ptr(new ast_lid("lambda"));
|
||||
new_application->env = mangled_env;
|
||||
|
||||
for(auto& param : new_function.params) {
|
||||
if(!(captured_count--)) break;
|
||||
ast_ptr new_arg = ast_ptr(new ast_lid(param));
|
||||
new_arg->env = env;
|
||||
new_application = ast_ptr(new ast_app(std::move(new_application), std::move(new_arg)));
|
||||
new_application->env = env;
|
||||
}
|
||||
translated = std::move(new_application);
|
||||
}
|
||||
|
||||
void ast_lambda::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
translated->compile(env, into);
|
||||
}
|
||||
|
||||
void pattern_var::print(std::ostream& to) const {
|
||||
to << var;
|
||||
}
|
||||
|
||||
void pattern_var::find_variables(std::set<std::string>& into) const {
|
||||
into.insert(var);
|
||||
}
|
||||
|
||||
void pattern_var::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const {
|
||||
env->bind(var, t);
|
||||
}
|
||||
|
||||
void pattern_constr::print(std::ostream& to) const {
|
||||
to << constr;
|
||||
for(auto& param : params) {
|
||||
to << " " << param;
|
||||
}
|
||||
}
|
||||
|
||||
void pattern_constr::find_variables(std::set<std::string>& into) const {
|
||||
into.insert(params.begin(), params.end());
|
||||
}
|
||||
|
||||
void pattern_constr::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const {
|
||||
type_scheme_ptr constructor_type_scheme = env->lookup(constr);
|
||||
if(!constructor_type_scheme) {
|
||||
throw type_error("pattern using unknown constructor " + constr, loc);
|
||||
}
|
||||
type_ptr constructor_type = constructor_type_scheme->instantiate(mgr);
|
||||
|
||||
for(auto& param : params) {
|
||||
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
|
||||
if(!arr) throw type_error("too many parameters in constructor pattern", loc);
|
||||
|
||||
env->bind(param, arr->left);
|
||||
constructor_type = arr->right;
|
||||
}
|
||||
|
||||
mgr.unify(t, constructor_type, loc);
|
||||
}
|
||||
195
code/compiler/13/ast.hpp
Normal file
195
code/compiler/13/ast.hpp
Normal file
@@ -0,0 +1,195 @@
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include "type.hpp"
|
||||
#include "type_env.hpp"
|
||||
#include "binop.hpp"
|
||||
#include "instruction.hpp"
|
||||
#include "env.hpp"
|
||||
#include "definition.hpp"
|
||||
#include "location.hh"
|
||||
#include "global_scope.hpp"
|
||||
|
||||
struct ast {
|
||||
type_env_ptr env;
|
||||
yy::location loc;
|
||||
|
||||
ast(yy::location l) : env(nullptr), loc(std::move(l)) {}
|
||||
virtual ~ast() = default;
|
||||
|
||||
virtual void print(int indent, std::ostream& to) const = 0;
|
||||
virtual void find_free(std::set<std::string>& into) = 0;
|
||||
virtual type_ptr typecheck(type_mgr& mgr, type_env_ptr& env) = 0;
|
||||
virtual void translate(global_scope& scope) = 0;
|
||||
virtual void compile(const env_ptr& env,
|
||||
std::vector<instruction_ptr>& into) const = 0;
|
||||
};
|
||||
|
||||
using ast_ptr = std::unique_ptr<ast>;
|
||||
|
||||
struct pattern {
|
||||
yy::location loc;
|
||||
|
||||
pattern(yy::location l) : loc(std::move(l)) {}
|
||||
virtual ~pattern() = default;
|
||||
|
||||
virtual void print(std::ostream& to) const = 0;
|
||||
virtual void find_variables(std::set<std::string>& into) const = 0;
|
||||
virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const = 0;
|
||||
};
|
||||
|
||||
using pattern_ptr = std::unique_ptr<pattern>;
|
||||
|
||||
struct branch {
|
||||
pattern_ptr pat;
|
||||
ast_ptr expr;
|
||||
|
||||
branch(pattern_ptr p, ast_ptr a)
|
||||
: pat(std::move(p)), expr(std::move(a)) {}
|
||||
};
|
||||
|
||||
using branch_ptr = std::unique_ptr<branch>;
|
||||
|
||||
struct ast_int : public ast {
|
||||
int value;
|
||||
|
||||
explicit ast_int(int v, yy::location l = yy::location())
|
||||
: ast(std::move(l)), value(v) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void find_free(std::set<std::string>& into);
|
||||
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||
void translate(global_scope& scope);
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
struct ast_lid : public ast {
|
||||
std::string id;
|
||||
|
||||
explicit ast_lid(std::string i, yy::location l = yy::location())
|
||||
: ast(std::move(l)), id(std::move(i)) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void find_free(std::set<std::string>& into);
|
||||
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||
void translate(global_scope& scope);
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
struct ast_uid : public ast {
|
||||
std::string id;
|
||||
|
||||
explicit ast_uid(std::string i, yy::location l = yy::location())
|
||||
: ast(std::move(l)), id(std::move(i)) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void find_free(std::set<std::string>& into);
|
||||
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||
void translate(global_scope& scope);
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
struct ast_binop : public ast {
|
||||
binop op;
|
||||
ast_ptr left;
|
||||
ast_ptr right;
|
||||
|
||||
ast_binop(binop o, ast_ptr l, ast_ptr r, yy::location lc = yy::location())
|
||||
: ast(std::move(lc)), op(o), left(std::move(l)), right(std::move(r)) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void find_free(std::set<std::string>& into);
|
||||
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||
void translate(global_scope& scope);
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
struct ast_app : public ast {
|
||||
ast_ptr left;
|
||||
ast_ptr right;
|
||||
|
||||
ast_app(ast_ptr l, ast_ptr r, yy::location lc = yy::location())
|
||||
: ast(std::move(lc)), left(std::move(l)), right(std::move(r)) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void find_free(std::set<std::string>& into);
|
||||
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||
void translate(global_scope& scope);
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
struct ast_case : public ast {
|
||||
ast_ptr of;
|
||||
type_ptr input_type;
|
||||
std::vector<branch_ptr> branches;
|
||||
|
||||
ast_case(ast_ptr o, std::vector<branch_ptr> b, yy::location l = yy::location())
|
||||
: ast(std::move(l)), of(std::move(o)), branches(std::move(b)) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void find_free(std::set<std::string>& into);
|
||||
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||
void translate(global_scope& scope);
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
struct ast_let : public ast {
|
||||
using basic_definition = std::pair<std::string, ast_ptr>;
|
||||
|
||||
definition_group definitions;
|
||||
ast_ptr in;
|
||||
|
||||
std::vector<basic_definition> translated_definitions;
|
||||
|
||||
ast_let(definition_group g, ast_ptr i, yy::location l = yy::location())
|
||||
: ast(std::move(l)), definitions(std::move(g)), in(std::move(i)) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void find_free(std::set<std::string>& into);
|
||||
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||
void translate(global_scope& scope);
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
struct ast_lambda : public ast {
|
||||
std::vector<std::string> params;
|
||||
ast_ptr body;
|
||||
|
||||
type_env_ptr var_env;
|
||||
|
||||
std::set<std::string> free_variables;
|
||||
ast_ptr translated;
|
||||
|
||||
ast_lambda(std::vector<std::string> ps, ast_ptr b, yy::location l = yy::location())
|
||||
: ast(std::move(l)), params(std::move(ps)), body(std::move(b)) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void find_free(std::set<std::string>& into);
|
||||
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||
void translate(global_scope& scope);
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
struct pattern_var : public pattern {
|
||||
std::string var;
|
||||
|
||||
pattern_var(std::string v, yy::location l = yy::location())
|
||||
: pattern(std::move(l)), var(std::move(v)) {}
|
||||
|
||||
void print(std::ostream &to) const;
|
||||
void find_variables(std::set<std::string>& into) const;
|
||||
void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const;
|
||||
};
|
||||
|
||||
struct pattern_constr : public pattern {
|
||||
std::string constr;
|
||||
std::vector<std::string> params;
|
||||
|
||||
pattern_constr(std::string c, std::vector<std::string> p, yy::location l = yy::location())
|
||||
: pattern(std::move(l)), constr(std::move(c)), params(std::move(p)) {}
|
||||
|
||||
void print(std::ostream &to) const;
|
||||
void find_variables(std::set<std::string>& into) const;
|
||||
virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const;
|
||||
};
|
||||
21
code/compiler/13/binop.cpp
Normal file
21
code/compiler/13/binop.cpp
Normal file
@@ -0,0 +1,21 @@
|
||||
#include "binop.hpp"
|
||||
|
||||
std::string op_name(binop op) {
|
||||
switch(op) {
|
||||
case PLUS: return "+";
|
||||
case MINUS: return "-";
|
||||
case TIMES: return "*";
|
||||
case DIVIDE: return "/";
|
||||
}
|
||||
return "??";
|
||||
}
|
||||
|
||||
std::string op_action(binop op) {
|
||||
switch(op) {
|
||||
case PLUS: return "plus";
|
||||
case MINUS: return "minus";
|
||||
case TIMES: return "times";
|
||||
case DIVIDE: return "divide";
|
||||
}
|
||||
return "??";
|
||||
}
|
||||
17
code/compiler/13/binop.hpp
Normal file
17
code/compiler/13/binop.hpp
Normal file
@@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
#include <array>
|
||||
#include <string>
|
||||
|
||||
enum binop {
|
||||
PLUS,
|
||||
MINUS,
|
||||
TIMES,
|
||||
DIVIDE
|
||||
};
|
||||
|
||||
constexpr binop all_binops[] = {
|
||||
PLUS, MINUS, TIMES, DIVIDE
|
||||
};
|
||||
|
||||
std::string op_name(binop op);
|
||||
std::string op_action(binop op);
|
||||
153
code/compiler/13/compiler.cpp
Normal file
153
code/compiler/13/compiler.cpp
Normal file
@@ -0,0 +1,153 @@
|
||||
#include "compiler.hpp"
|
||||
#include "binop.hpp"
|
||||
#include "error.hpp"
|
||||
#include "global_scope.hpp"
|
||||
#include "parse_driver.hpp"
|
||||
#include "type.hpp"
|
||||
#include "type_env.hpp"
|
||||
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
#include "llvm/IR/Verifier.h"
|
||||
#include "llvm/Support/TargetSelect.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
void compiler::add_default_types() {
|
||||
global_env->bind_type("Int", type_ptr(new type_base("Int")));
|
||||
}
|
||||
|
||||
void compiler::add_binop_type(binop op, type_ptr type) {
|
||||
auto name = mng.new_mangled_name(op_action(op));
|
||||
global_env->bind(op_name(op), std::move(type), visibility::global);
|
||||
global_env->set_mangled_name(op_name(op), name);
|
||||
}
|
||||
|
||||
void compiler::add_default_function_types() {
|
||||
type_ptr int_type = global_env->lookup_type("Int");
|
||||
assert(int_type != nullptr);
|
||||
type_ptr int_type_app = type_ptr(new type_app(int_type));
|
||||
|
||||
type_ptr closed_int_op_type(
|
||||
new type_arr(int_type_app, type_ptr(new type_arr(int_type_app, int_type_app))));
|
||||
|
||||
constexpr binop closed_ops[] = { PLUS, MINUS, TIMES, DIVIDE };
|
||||
for(auto& op : closed_ops) add_binop_type(op, closed_int_op_type);
|
||||
}
|
||||
|
||||
void compiler::parse() {
|
||||
if(!driver())
|
||||
throw compiler_error("failed to open file");
|
||||
}
|
||||
|
||||
void compiler::typecheck() {
|
||||
std::set<std::string> free_variables;
|
||||
global_defs.find_free(free_variables);
|
||||
global_defs.typecheck(type_m, global_env);
|
||||
}
|
||||
|
||||
void compiler::translate() {
|
||||
for(auto& data : global_defs.defs_data) {
|
||||
data.second->into_globals(global_scp);
|
||||
}
|
||||
for(auto& defn : global_defs.defs_defn) {
|
||||
auto& function = defn.second->into_global(global_scp);
|
||||
defn.second->env->set_mangled_name(defn.first, function.name);
|
||||
}
|
||||
}
|
||||
|
||||
void compiler::compile() {
|
||||
global_scp.compile();
|
||||
}
|
||||
|
||||
void compiler::create_llvm_binop(binop op) {
|
||||
auto new_function =
|
||||
ctx.create_custom_function(global_env->get_mangled_name(op_name(op)), 2);
|
||||
std::vector<instruction_ptr> instructions;
|
||||
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||
instructions.push_back(instruction_ptr(new instruction_binop(op)));
|
||||
instructions.push_back(instruction_ptr(new instruction_update(2)));
|
||||
instructions.push_back(instruction_ptr(new instruction_pop(2)));
|
||||
ctx.get_builder().SetInsertPoint(&new_function->getEntryBlock());
|
||||
for(auto& instruction : instructions) {
|
||||
instruction->gen_llvm(ctx, new_function);
|
||||
}
|
||||
ctx.get_builder().CreateRetVoid();
|
||||
}
|
||||
|
||||
void compiler::generate_llvm() {
|
||||
for(auto op : all_binops) {
|
||||
create_llvm_binop(op);
|
||||
}
|
||||
|
||||
global_scp.generate_llvm(ctx);
|
||||
}
|
||||
|
||||
void compiler::output_llvm(const std::string& into) {
|
||||
std::string targetTriple = llvm::sys::getDefaultTargetTriple();
|
||||
|
||||
llvm::InitializeNativeTarget();
|
||||
llvm::InitializeNativeTargetAsmParser();
|
||||
llvm::InitializeNativeTargetAsmPrinter();
|
||||
|
||||
std::string error;
|
||||
const llvm::Target* target =
|
||||
llvm::TargetRegistry::lookupTarget(targetTriple, error);
|
||||
if (!target) {
|
||||
std::cerr << error << std::endl;
|
||||
} else {
|
||||
std::string cpu = "generic";
|
||||
std::string features = "";
|
||||
llvm::TargetOptions options;
|
||||
std::unique_ptr<llvm::TargetMachine> targetMachine(
|
||||
target->createTargetMachine(targetTriple, cpu, features,
|
||||
options, llvm::Optional<llvm::Reloc::Model>()));
|
||||
|
||||
ctx.get_module().setDataLayout(targetMachine->createDataLayout());
|
||||
ctx.get_module().setTargetTriple(targetTriple);
|
||||
|
||||
std::error_code ec;
|
||||
llvm::raw_fd_ostream file(into, ec, llvm::sys::fs::F_None);
|
||||
if (ec) {
|
||||
throw compiler_error("failed to open object file for writing");
|
||||
} else {
|
||||
llvm::CodeGenFileType type = llvm::CGFT_ObjectFile;
|
||||
llvm::legacy::PassManager pm;
|
||||
if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) {
|
||||
throw compiler_error("failed to add passes to pass manager");
|
||||
} else {
|
||||
pm.run(ctx.get_module());
|
||||
file.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
compiler::compiler(const std::string& filename)
|
||||
: file_m(), global_defs(), driver(file_m, global_defs, filename),
|
||||
global_env(new type_env), type_m(), mng(), global_scp(mng), ctx() {
|
||||
add_default_types();
|
||||
add_default_function_types();
|
||||
}
|
||||
|
||||
void compiler::operator()(const std::string& into) {
|
||||
parse();
|
||||
typecheck();
|
||||
translate();
|
||||
compile();
|
||||
generate_llvm();
|
||||
output_llvm(into);
|
||||
}
|
||||
|
||||
file_mgr& compiler::get_file_manager() {
|
||||
return file_m;
|
||||
}
|
||||
|
||||
type_mgr& compiler::get_type_manager() {
|
||||
return type_m;
|
||||
}
|
||||
37
code/compiler/13/compiler.hpp
Normal file
37
code/compiler/13/compiler.hpp
Normal file
@@ -0,0 +1,37 @@
|
||||
#pragma once
|
||||
#include "binop.hpp"
|
||||
#include "parse_driver.hpp"
|
||||
#include "definition.hpp"
|
||||
#include "type_env.hpp"
|
||||
#include "type.hpp"
|
||||
#include "global_scope.hpp"
|
||||
#include "mangler.hpp"
|
||||
#include "llvm_context.hpp"
|
||||
|
||||
class compiler {
|
||||
private:
|
||||
file_mgr file_m;
|
||||
definition_group global_defs;
|
||||
parse_driver driver;
|
||||
type_env_ptr global_env;
|
||||
type_mgr type_m;
|
||||
mangler mng;
|
||||
global_scope global_scp;
|
||||
llvm_context ctx;
|
||||
|
||||
void add_default_types();
|
||||
void add_binop_type(binop op, type_ptr type);
|
||||
void add_default_function_types();
|
||||
void parse();
|
||||
void typecheck();
|
||||
void translate();
|
||||
void compile();
|
||||
void create_llvm_binop(binop op);
|
||||
void generate_llvm();
|
||||
void output_llvm(const std::string& into);
|
||||
public:
|
||||
compiler(const std::string& filename);
|
||||
void operator()(const std::string& into);
|
||||
file_mgr& get_file_manager();
|
||||
type_mgr& get_type_manager();
|
||||
};
|
||||
148
code/compiler/13/definition.cpp
Normal file
148
code/compiler/13/definition.cpp
Normal file
@@ -0,0 +1,148 @@
|
||||
#include "definition.hpp"
|
||||
#include <cassert>
|
||||
#include "error.hpp"
|
||||
#include "ast.hpp"
|
||||
#include "instruction.hpp"
|
||||
#include "llvm_context.hpp"
|
||||
#include "type.hpp"
|
||||
#include "type_env.hpp"
|
||||
#include "graph.hpp"
|
||||
#include <llvm/IR/DerivedTypes.h>
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/IR/Type.h>
|
||||
|
||||
void definition_defn::find_free() {
|
||||
body->find_free(free_variables);
|
||||
for(auto& param : params) {
|
||||
free_variables.erase(param);
|
||||
}
|
||||
}
|
||||
|
||||
void definition_defn::insert_types(type_mgr& mgr, type_env_ptr& env, visibility v) {
|
||||
this->env = env;
|
||||
var_env = type_scope(env);
|
||||
return_type = mgr.new_type();
|
||||
full_type = return_type;
|
||||
|
||||
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||
type_ptr param_type = mgr.new_type();
|
||||
full_type = type_ptr(new type_arr(param_type, full_type));
|
||||
var_env->bind(*it, param_type);
|
||||
}
|
||||
env->bind(name, full_type, v);
|
||||
}
|
||||
|
||||
void definition_defn::typecheck(type_mgr& mgr) {
|
||||
type_ptr body_type = body->typecheck(mgr, var_env);
|
||||
mgr.unify(return_type, body_type);
|
||||
}
|
||||
|
||||
|
||||
global_function& definition_defn::into_global(global_scope& scope) {
|
||||
std::vector<std::string> all_params;
|
||||
for(auto& free : free_variables) {
|
||||
if(env->is_global(free)) continue;
|
||||
all_params.push_back(free);
|
||||
}
|
||||
all_params.insert(all_params.end(), params.begin(), params.end());
|
||||
body->translate(scope);
|
||||
return scope.add_function(name, std::move(all_params), std::move(body));
|
||||
}
|
||||
|
||||
void definition_data::insert_types(type_env_ptr& env) {
|
||||
this->env = env;
|
||||
env->bind_type(name, type_ptr(new type_data(name, vars.size())));
|
||||
}
|
||||
|
||||
void definition_data::insert_constructors() const {
|
||||
type_ptr this_type_ptr = env->lookup_type(name);
|
||||
type_data* this_type = static_cast<type_data*>(this_type_ptr.get());
|
||||
int next_tag = 0;
|
||||
|
||||
std::set<std::string> var_set;
|
||||
type_app* return_app = new type_app(std::move(this_type_ptr));
|
||||
type_ptr return_type(return_app);
|
||||
for(auto& var : vars) {
|
||||
if(var_set.find(var) != var_set.end())
|
||||
throw compiler_error(
|
||||
"type variable " + var +
|
||||
" used twice in data type definition.", loc);
|
||||
var_set.insert(var);
|
||||
return_app->arguments.push_back(type_ptr(new type_var(var)));
|
||||
}
|
||||
|
||||
for(auto& constructor : constructors) {
|
||||
constructor->tag = next_tag;
|
||||
this_type->constructors[constructor->name] = { next_tag++ };
|
||||
|
||||
type_ptr full_type = return_type;
|
||||
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
|
||||
type_ptr type = (*it)->to_type(var_set, env);
|
||||
full_type = type_ptr(new type_arr(type, full_type));
|
||||
}
|
||||
|
||||
type_scheme_ptr full_scheme(new type_scheme(std::move(full_type)));
|
||||
full_scheme->forall.insert(full_scheme->forall.begin(), vars.begin(), vars.end());
|
||||
env->bind(constructor->name, full_scheme, visibility::global);
|
||||
}
|
||||
}
|
||||
|
||||
void definition_data::into_globals(global_scope& scope) {
|
||||
for(auto& constructor : constructors) {
|
||||
global_constructor& c = scope.add_constructor(
|
||||
constructor->name, constructor->tag, constructor->types.size());
|
||||
env->set_mangled_name(constructor->name, c.name);
|
||||
}
|
||||
}
|
||||
|
||||
void definition_group::find_free(std::set<std::string>& into) {
|
||||
for(auto& def_pair : defs_defn) {
|
||||
def_pair.second->find_free();
|
||||
for(auto& free_var : def_pair.second->free_variables) {
|
||||
if(defs_defn.find(free_var) == defs_defn.end()) {
|
||||
into.insert(free_var);
|
||||
} else {
|
||||
def_pair.second->nearby_variables.insert(free_var);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void definition_group::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = type_scope(env);
|
||||
|
||||
for(auto& def_data : defs_data) {
|
||||
def_data.second->insert_types(this->env);
|
||||
}
|
||||
for(auto& def_data : defs_data) {
|
||||
def_data.second->insert_constructors();
|
||||
}
|
||||
|
||||
function_graph dependency_graph;
|
||||
|
||||
for(auto& def_defn : defs_defn) {
|
||||
def_defn.second->find_free();
|
||||
dependency_graph.add_function(def_defn.second->name);
|
||||
|
||||
for(auto& dependency : def_defn.second->nearby_variables) {
|
||||
assert(defs_defn.find(dependency) != defs_defn.end());
|
||||
dependency_graph.add_edge(def_defn.second->name, dependency);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<group_ptr> groups = dependency_graph.compute_order();
|
||||
for(auto it = groups.rbegin(); it != groups.rend(); it++) {
|
||||
auto& group = *it;
|
||||
for(auto& def_defnn_name : group->members) {
|
||||
auto& def_defn = defs_defn.find(def_defnn_name)->second;
|
||||
def_defn->insert_types(mgr, this->env, vis);
|
||||
}
|
||||
for(auto& def_defnn_name : group->members) {
|
||||
auto& def_defn = defs_defn.find(def_defnn_name)->second;
|
||||
def_defn->typecheck(mgr);
|
||||
}
|
||||
for(auto& def_defnn_name : group->members) {
|
||||
this->env->generalize(def_defnn_name, *group, mgr);
|
||||
}
|
||||
}
|
||||
}
|
||||
91
code/compiler/13/definition.hpp
Normal file
91
code/compiler/13/definition.hpp
Normal file
@@ -0,0 +1,91 @@
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include "instruction.hpp"
|
||||
#include "llvm_context.hpp"
|
||||
#include "parsed_type.hpp"
|
||||
#include "type_env.hpp"
|
||||
#include "location.hh"
|
||||
#include "global_scope.hpp"
|
||||
|
||||
struct ast;
|
||||
using ast_ptr = std::unique_ptr<ast>;
|
||||
|
||||
struct constructor {
|
||||
std::string name;
|
||||
std::vector<parsed_type_ptr> types;
|
||||
int8_t tag;
|
||||
|
||||
constructor(std::string n, std::vector<parsed_type_ptr> ts)
|
||||
: name(std::move(n)), types(std::move(ts)) {}
|
||||
};
|
||||
|
||||
using constructor_ptr = std::unique_ptr<constructor>;
|
||||
|
||||
struct definition_defn {
|
||||
std::string name;
|
||||
std::vector<std::string> params;
|
||||
ast_ptr body;
|
||||
yy::location loc;
|
||||
|
||||
type_env_ptr env;
|
||||
type_env_ptr var_env;
|
||||
std::set<std::string> free_variables;
|
||||
std::set<std::string> nearby_variables;
|
||||
type_ptr full_type;
|
||||
type_ptr return_type;
|
||||
|
||||
definition_defn(
|
||||
std::string n,
|
||||
std::vector<std::string> p,
|
||||
ast_ptr b,
|
||||
yy::location l = yy::location())
|
||||
: name(std::move(n)), params(std::move(p)), body(std::move(b)), loc(std::move(l)) {
|
||||
|
||||
}
|
||||
|
||||
void find_free();
|
||||
void insert_types(type_mgr& mgr, type_env_ptr& env, visibility v);
|
||||
void typecheck(type_mgr& mgr);
|
||||
|
||||
global_function& into_global(global_scope& scope);
|
||||
};
|
||||
|
||||
using definition_defn_ptr = std::unique_ptr<definition_defn>;
|
||||
|
||||
struct definition_data {
|
||||
std::string name;
|
||||
std::vector<std::string> vars;
|
||||
std::vector<constructor_ptr> constructors;
|
||||
yy::location loc;
|
||||
|
||||
type_env_ptr env;
|
||||
|
||||
definition_data(
|
||||
std::string n,
|
||||
std::vector<std::string> vs,
|
||||
std::vector<constructor_ptr> cs,
|
||||
yy::location l = yy::location())
|
||||
: name(std::move(n)), vars(std::move(vs)), constructors(std::move(cs)), loc(std::move(l)) {}
|
||||
|
||||
void insert_types(type_env_ptr& env);
|
||||
void insert_constructors() const;
|
||||
|
||||
void into_globals(global_scope& scope);
|
||||
};
|
||||
|
||||
using definition_data_ptr = std::unique_ptr<definition_data>;
|
||||
|
||||
struct definition_group {
|
||||
std::map<std::string, definition_data_ptr> defs_data;
|
||||
std::map<std::string, definition_defn_ptr> defs_defn;
|
||||
visibility vis;
|
||||
type_env_ptr env;
|
||||
|
||||
definition_group(visibility v = visibility::local) : vis(v) {}
|
||||
|
||||
void find_free(std::set<std::string>& into);
|
||||
void typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||
};
|
||||
24
code/compiler/13/env.cpp
Normal file
24
code/compiler/13/env.cpp
Normal file
@@ -0,0 +1,24 @@
|
||||
#include "env.hpp"
|
||||
#include <cassert>
|
||||
|
||||
int env_var::get_offset(const std::string& name) const {
|
||||
if(name == this->name) return 0;
|
||||
assert(parent != nullptr);
|
||||
return parent->get_offset(name) + 1;
|
||||
}
|
||||
|
||||
bool env_var::has_variable(const std::string& name) const {
|
||||
if(name == this->name) return true;
|
||||
if(parent) return parent->has_variable(name);
|
||||
return false;
|
||||
}
|
||||
|
||||
int env_offset::get_offset(const std::string& name) const {
|
||||
assert(parent != nullptr);
|
||||
return parent->get_offset(name) + offset;
|
||||
}
|
||||
|
||||
bool env_offset::has_variable(const std::string& name) const {
|
||||
if(parent) return parent->has_variable(name);
|
||||
return false;
|
||||
}
|
||||
39
code/compiler/13/env.hpp
Normal file
39
code/compiler/13/env.hpp
Normal file
@@ -0,0 +1,39 @@
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
class env {
|
||||
public:
|
||||
virtual ~env() = default;
|
||||
|
||||
virtual int get_offset(const std::string& name) const = 0;
|
||||
virtual bool has_variable(const std::string& name) const = 0;
|
||||
};
|
||||
|
||||
using env_ptr = std::shared_ptr<env>;
|
||||
|
||||
class env_var : public env {
|
||||
private:
|
||||
std::string name;
|
||||
env_ptr parent;
|
||||
|
||||
public:
|
||||
env_var(std::string n, env_ptr p)
|
||||
: name(std::move(n)), parent(std::move(p)) {}
|
||||
|
||||
int get_offset(const std::string& name) const;
|
||||
bool has_variable(const std::string& name) const;
|
||||
};
|
||||
|
||||
class env_offset : public env {
|
||||
private:
|
||||
int offset;
|
||||
env_ptr parent;
|
||||
|
||||
public:
|
||||
env_offset(int o, env_ptr p)
|
||||
: offset(o), parent(std::move(p)) {}
|
||||
|
||||
int get_offset(const std::string& name) const;
|
||||
bool has_variable(const std::string& name) const;
|
||||
};
|
||||
41
code/compiler/13/error.cpp
Normal file
41
code/compiler/13/error.cpp
Normal file
@@ -0,0 +1,41 @@
|
||||
#include "error.hpp"
|
||||
|
||||
const char* compiler_error::what() const noexcept {
|
||||
return "an error occured while compiling the program";
|
||||
}
|
||||
|
||||
void compiler_error::print_about(std::ostream& to) {
|
||||
to << what() << ": ";
|
||||
to << description << std::endl;
|
||||
}
|
||||
|
||||
void compiler_error::print_location(std::ostream& to, file_mgr& fm, bool highlight) {
|
||||
if(!loc) return;
|
||||
to << "occuring on line " << loc->begin.line << ":" << std::endl;
|
||||
fm.print_location(to, *loc, highlight);
|
||||
}
|
||||
|
||||
void compiler_error::pretty_print(std::ostream& to, file_mgr& fm) {
|
||||
print_about(to);
|
||||
print_location(to, fm);
|
||||
}
|
||||
|
||||
const char* type_error::what() const noexcept {
|
||||
return "an error occured while checking the types of the program";
|
||||
}
|
||||
|
||||
void type_error::pretty_print(std::ostream& to, file_mgr& fm) {
|
||||
print_about(to);
|
||||
print_location(to, fm, true);
|
||||
}
|
||||
|
||||
void unification_error::pretty_print(std::ostream& to, file_mgr& fm, type_mgr& mgr) {
|
||||
type_error::pretty_print(to, fm);
|
||||
to << "the expected type was:" << std::endl;
|
||||
to << " \033[34m";
|
||||
left->print(mgr, to);
|
||||
to << std::endl << "\033[0mwhile the actual type was:" << std::endl;
|
||||
to << " \033[32m";
|
||||
right->print(mgr, to);
|
||||
to << "\033[0m" << std::endl;
|
||||
}
|
||||
49
code/compiler/13/error.hpp
Normal file
49
code/compiler/13/error.hpp
Normal file
@@ -0,0 +1,49 @@
|
||||
#pragma once
|
||||
#include <exception>
|
||||
#include <optional>
|
||||
#include "type.hpp"
|
||||
#include "location.hh"
|
||||
#include "parse_driver.hpp"
|
||||
|
||||
using maybe_location = std::optional<yy::location>;
|
||||
|
||||
class compiler_error : std::exception {
|
||||
private:
|
||||
std::string description;
|
||||
maybe_location loc;
|
||||
|
||||
public:
|
||||
compiler_error(std::string d, maybe_location l = std::nullopt)
|
||||
: description(std::move(d)), loc(std::move(l)) {}
|
||||
|
||||
const char* what() const noexcept override;
|
||||
|
||||
void print_about(std::ostream& to);
|
||||
void print_location(std::ostream& to, file_mgr& fm, bool highlight = false);
|
||||
|
||||
void pretty_print(std::ostream& to, file_mgr& fm);
|
||||
};
|
||||
|
||||
class type_error : compiler_error {
|
||||
private:
|
||||
|
||||
public:
|
||||
type_error(std::string d, maybe_location l = std::nullopt)
|
||||
: compiler_error(std::move(d), std::move(l)) {}
|
||||
|
||||
const char* what() const noexcept override;
|
||||
void pretty_print(std::ostream& to, file_mgr& fm);
|
||||
};
|
||||
|
||||
class unification_error : public type_error {
|
||||
private:
|
||||
type_ptr left;
|
||||
type_ptr right;
|
||||
|
||||
public:
|
||||
unification_error(type_ptr l, type_ptr r, maybe_location loc = std::nullopt)
|
||||
: left(std::move(l)), right(std::move(r)),
|
||||
type_error("failed to unify types", std::move(loc)) {}
|
||||
|
||||
void pretty_print(std::ostream& to, file_mgr& fm, type_mgr& mgr);
|
||||
};
|
||||
2
code/compiler/13/examples/bad1.txt
Normal file
2
code/compiler/13/examples/bad1.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
data Bool = { True, False }
|
||||
defn main = { 3 + True }
|
||||
1
code/compiler/13/examples/bad2.txt
Normal file
1
code/compiler/13/examples/bad2.txt
Normal file
@@ -0,0 +1 @@
|
||||
defn main = { 1 2 3 4 5 }
|
||||
8
code/compiler/13/examples/bad3.txt
Normal file
8
code/compiler/13/examples/bad3.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
data List = { Nil, Cons Int List }
|
||||
|
||||
defn head l = {
|
||||
case l of {
|
||||
Nil -> { 0 }
|
||||
Cons x y z -> { x }
|
||||
}
|
||||
}
|
||||
6
code/compiler/13/examples/errors/double_catchall.txt
Normal file
6
code/compiler/13/examples/errors/double_catchall.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
defn main = {
|
||||
case True of {
|
||||
n -> { 2 }
|
||||
n -> { 1 }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
data Pair a a = { MkPair a a }
|
||||
7
code/compiler/13/examples/errors/exhausted_patterns.txt
Normal file
7
code/compiler/13/examples/errors/exhausted_patterns.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
defn main = {
|
||||
case True of {
|
||||
True -> { 1 }
|
||||
False -> { 0 }
|
||||
n -> { 2 }
|
||||
}
|
||||
}
|
||||
5
code/compiler/13/examples/errors/incomplete_patterns.txt
Normal file
5
code/compiler/13/examples/errors/incomplete_patterns.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
defn main = {
|
||||
case True of {
|
||||
True -> { 1 }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
defn add x y = { x + y }
|
||||
|
||||
defn main = {
|
||||
case add of {
|
||||
n -> { 1 }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
defn main = {
|
||||
case True of {
|
||||
n -> { 2 }
|
||||
True -> { 1 }
|
||||
False -> { 0 }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
data List = { Nil, Cons Int List }
|
||||
|
||||
defn head l = {
|
||||
case l of {
|
||||
Nil -> { 0 }
|
||||
Cons x -> { x }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
data List = { Nil, Cons Int List }
|
||||
|
||||
defn head l = {
|
||||
case l of {
|
||||
Nil -> { 0 }
|
||||
Cons x y z -> { x }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
defn main = {
|
||||
case True of {
|
||||
NotBool -> { 1 }
|
||||
True -> { 2 }
|
||||
}
|
||||
}
|
||||
1
code/compiler/13/examples/errors/type_redefinition.txt
Normal file
1
code/compiler/13/examples/errors/type_redefinition.txt
Normal file
@@ -0,0 +1 @@
|
||||
data Bool = { True, False }
|
||||
3
code/compiler/13/examples/errors/unknown_lid.txt
Normal file
3
code/compiler/13/examples/errors/unknown_lid.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
defn main = {
|
||||
weird 1
|
||||
}
|
||||
1
code/compiler/13/examples/errors/unknown_type.txt
Normal file
1
code/compiler/13/examples/errors/unknown_type.txt
Normal file
@@ -0,0 +1 @@
|
||||
data Wrapper = { Wrap Weird }
|
||||
1
code/compiler/13/examples/errors/unknown_type_param.txt
Normal file
1
code/compiler/13/examples/errors/unknown_type_param.txt
Normal file
@@ -0,0 +1 @@
|
||||
data Wrapper = { Wrap a }
|
||||
3
code/compiler/13/examples/errors/unknown_uid.txt
Normal file
3
code/compiler/13/examples/errors/unknown_uid.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
defn main = {
|
||||
Weird 1
|
||||
}
|
||||
1
code/compiler/13/examples/errors/wrong_type_kind.txt
Normal file
1
code/compiler/13/examples/errors/wrong_type_kind.txt
Normal file
@@ -0,0 +1 @@
|
||||
data Wrapper = { Wrap (Int Bool) }
|
||||
17
code/compiler/13/examples/fixpoint.txt
Normal file
17
code/compiler/13/examples/fixpoint.txt
Normal file
@@ -0,0 +1,17 @@
|
||||
data List a = { Nil, Cons a (List a) }
|
||||
|
||||
defn fix f = { let { defn x = { f x } } in { x } }
|
||||
defn fixpointOnes fo = { Cons 1 fo }
|
||||
defn sumTwo l = {
|
||||
case l of {
|
||||
Nil -> { 0 }
|
||||
Cons x xs -> {
|
||||
x + case xs of {
|
||||
Nil -> { 0 }
|
||||
Cons y ys -> { y }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
defn main = { sumTwo (fix fixpointOnes) }
|
||||
8
code/compiler/13/examples/if.txt
Normal file
8
code/compiler/13/examples/if.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
data Bool = { True, False }
|
||||
defn if c t e = {
|
||||
case c of {
|
||||
True -> { t }
|
||||
False -> { e }
|
||||
}
|
||||
}
|
||||
defn main = { if (if True False True) 11 3 }
|
||||
19
code/compiler/13/examples/lambda.txt
Normal file
19
code/compiler/13/examples/lambda.txt
Normal file
@@ -0,0 +1,19 @@
|
||||
data List a = { Nil, Cons a (List a) }
|
||||
|
||||
defn sum l = {
|
||||
case l of {
|
||||
Nil -> { 0 }
|
||||
Cons x xs -> { x + sum xs}
|
||||
}
|
||||
}
|
||||
|
||||
defn map f l = {
|
||||
case l of {
|
||||
Nil -> { Nil }
|
||||
Cons x xs -> { Cons (f x) (map f xs) }
|
||||
}
|
||||
}
|
||||
|
||||
defn main = {
|
||||
sum (map \x -> { x * x } (map (\x -> { x + x }) (Cons 1 (Cons 2 (Cons 3 Nil)))))
|
||||
}
|
||||
47
code/compiler/13/examples/letin.txt
Normal file
47
code/compiler/13/examples/letin.txt
Normal file
@@ -0,0 +1,47 @@
|
||||
data Bool = { True, False }
|
||||
|
||||
data List a = { Nil, Cons a (List a) }
|
||||
|
||||
defn if c t e = {
|
||||
case c of {
|
||||
True -> { t }
|
||||
False -> { e }
|
||||
}
|
||||
}
|
||||
|
||||
defn mergeUntil l r p = {
|
||||
let {
|
||||
defn mergeLeft nl nr = {
|
||||
case nl of {
|
||||
Nil -> { Nil }
|
||||
Cons x xs -> { if (p x) (Cons x (mergeRight xs nr)) Nil }
|
||||
}
|
||||
}
|
||||
defn mergeRight nl nr = {
|
||||
case nr of {
|
||||
Nil -> { Nil }
|
||||
Cons x xs -> { if (p x) (Cons x (mergeLeft nl xs)) Nil }
|
||||
}
|
||||
}
|
||||
} in {
|
||||
mergeLeft l r
|
||||
}
|
||||
}
|
||||
|
||||
defn const x y = { x }
|
||||
|
||||
defn sum l = {
|
||||
case l of {
|
||||
Nil -> { 0 }
|
||||
Cons x xs -> { x + sum xs }
|
||||
}
|
||||
}
|
||||
|
||||
defn main = {
|
||||
let {
|
||||
defn firstList = { Cons 1 (Cons 3 (Cons 5 Nil)) }
|
||||
defn secondList = { Cons 2 (Cons 4 (Cons 6 Nil)) }
|
||||
} in {
|
||||
sum (mergeUntil firstList secondList (const True))
|
||||
}
|
||||
}
|
||||
32
code/compiler/13/examples/list.txt
Normal file
32
code/compiler/13/examples/list.txt
Normal file
@@ -0,0 +1,32 @@
|
||||
data List a = { Nil, Cons a (List a) }
|
||||
|
||||
defn map f l = {
|
||||
case l of {
|
||||
Nil -> { Nil }
|
||||
Cons x xs -> { Cons (f x) (map f xs) }
|
||||
}
|
||||
}
|
||||
|
||||
defn foldl f b l = {
|
||||
case l of {
|
||||
Nil -> { b }
|
||||
Cons x xs -> { foldl f (f b x) xs }
|
||||
}
|
||||
}
|
||||
|
||||
defn foldr f b l = {
|
||||
case l of {
|
||||
Nil -> { b }
|
||||
Cons x xs -> { f x (foldr f b xs) }
|
||||
}
|
||||
}
|
||||
|
||||
defn list = { Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil))) }
|
||||
|
||||
defn add x y = { x + y }
|
||||
defn sum l = { foldr add 0 l }
|
||||
|
||||
defn skipAdd x y = { y + 1 }
|
||||
defn length l = { foldr skipAdd 0 l }
|
||||
|
||||
defn main = { sum list + length list }
|
||||
25
code/compiler/13/examples/mutual_recursion.txt
Normal file
25
code/compiler/13/examples/mutual_recursion.txt
Normal file
@@ -0,0 +1,25 @@
|
||||
data Bool = { True, False }
|
||||
data List = { Nil, Cons Int List }
|
||||
|
||||
defn if c t e = {
|
||||
case c of {
|
||||
True -> { t }
|
||||
False -> { e }
|
||||
}
|
||||
}
|
||||
|
||||
defn oddEven l e = {
|
||||
case l of {
|
||||
Nil -> { e }
|
||||
Cons x xs -> { evenOdd xs e }
|
||||
}
|
||||
}
|
||||
|
||||
defn evenOdd l e = {
|
||||
case l of {
|
||||
Nil -> { e }
|
||||
Cons x xs -> { oddEven xs e }
|
||||
}
|
||||
}
|
||||
|
||||
defn main = { if (oddEven (Cons 1 (Cons 2 (Cons 3 Nil))) True) (oddEven (Cons 1 (Cons 2 (Cons 3 Nil))) 1) 3 }
|
||||
23
code/compiler/13/examples/packed.txt
Normal file
23
code/compiler/13/examples/packed.txt
Normal file
@@ -0,0 +1,23 @@
|
||||
data Pair a b = { Pair a b }
|
||||
|
||||
defn packer = {
|
||||
let {
|
||||
data Packed a = { Packed a }
|
||||
defn pack a = { Packed a }
|
||||
defn unpack p = {
|
||||
case p of {
|
||||
Packed a -> { a }
|
||||
}
|
||||
}
|
||||
} in {
|
||||
Pair pack unpack
|
||||
}
|
||||
}
|
||||
|
||||
defn main = {
|
||||
case packer of {
|
||||
Pair pack unpack -> {
|
||||
unpack (pack 3)
|
||||
}
|
||||
}
|
||||
}
|
||||
17
code/compiler/13/examples/pair.txt
Normal file
17
code/compiler/13/examples/pair.txt
Normal file
@@ -0,0 +1,17 @@
|
||||
data Pair a b = { MkPair a b }
|
||||
|
||||
defn fst p = {
|
||||
case p of {
|
||||
MkPair a b -> { a }
|
||||
}
|
||||
}
|
||||
|
||||
defn snd p = {
|
||||
case p of {
|
||||
MkPair a b -> { b }
|
||||
}
|
||||
}
|
||||
|
||||
defn pair = { MkPair 1 (MkPair 2 3) }
|
||||
|
||||
defn main = { fst pair + snd (snd pair) }
|
||||
122
code/compiler/13/examples/primes.txt
Normal file
122
code/compiler/13/examples/primes.txt
Normal file
@@ -0,0 +1,122 @@
|
||||
data List = { Nil, Cons Nat List }
|
||||
data Bool = { True, False }
|
||||
data Nat = { O, S Nat }
|
||||
|
||||
defn if c t e = {
|
||||
case c of {
|
||||
True -> { t }
|
||||
False -> { e }
|
||||
}
|
||||
}
|
||||
|
||||
defn toInt n = {
|
||||
case n of {
|
||||
O -> { 0 }
|
||||
S np -> { 1 + toInt np }
|
||||
}
|
||||
}
|
||||
|
||||
defn lte n m = {
|
||||
case m of {
|
||||
O -> {
|
||||
case n of {
|
||||
O -> { True }
|
||||
S np -> { False }
|
||||
}
|
||||
}
|
||||
S mp -> {
|
||||
case n of {
|
||||
O -> { True }
|
||||
S np -> { lte np mp }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
defn minus n m = {
|
||||
case m of {
|
||||
O -> { n }
|
||||
S mp -> {
|
||||
case n of {
|
||||
O -> { O }
|
||||
S np -> {
|
||||
minus np mp
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
defn mod n m = {
|
||||
if (lte m n) (mod (minus n m) m) n
|
||||
}
|
||||
|
||||
defn notDivisibleBy n m = {
|
||||
case (mod m n) of {
|
||||
O -> { False }
|
||||
S mp -> { True }
|
||||
}
|
||||
}
|
||||
|
||||
defn filter f l = {
|
||||
case l of {
|
||||
Nil -> { Nil }
|
||||
Cons x xs -> { if (f x) (Cons x (filter f xs)) (filter f xs) }
|
||||
}
|
||||
}
|
||||
|
||||
defn map f l = {
|
||||
case l of {
|
||||
Nil -> { Nil }
|
||||
Cons x xs -> { Cons (f x) (map f xs) }
|
||||
}
|
||||
}
|
||||
|
||||
defn nats = {
|
||||
Cons (S (S O)) (map S nats)
|
||||
}
|
||||
|
||||
defn primesRec l = {
|
||||
case l of {
|
||||
Nil -> { Nil }
|
||||
Cons p xs -> { Cons p (primesRec (filter (notDivisibleBy p) xs)) }
|
||||
}
|
||||
}
|
||||
|
||||
defn primes = {
|
||||
primesRec nats
|
||||
}
|
||||
|
||||
defn take n l = {
|
||||
case l of {
|
||||
Nil -> { Nil }
|
||||
Cons x xs -> {
|
||||
case n of {
|
||||
O -> { Nil }
|
||||
S np -> { Cons x (take np xs) }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
defn head l = {
|
||||
case l of {
|
||||
Nil -> { O }
|
||||
Cons x xs -> { x }
|
||||
}
|
||||
}
|
||||
|
||||
defn reverseAcc a l = {
|
||||
case l of {
|
||||
Nil -> { a }
|
||||
Cons x xs -> { reverseAcc (Cons x a) xs }
|
||||
}
|
||||
}
|
||||
|
||||
defn reverse l = {
|
||||
reverseAcc Nil l
|
||||
}
|
||||
|
||||
defn main = {
|
||||
toInt (head (reverse (take ((S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S O))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) primes)))
|
||||
}
|
||||
31
code/compiler/13/examples/runtime1.c
Normal file
31
code/compiler/13/examples/runtime1.c
Normal file
@@ -0,0 +1,31 @@
|
||||
#include "../runtime.h"
|
||||
|
||||
void f_add(struct stack* s) {
|
||||
struct node_num* left = (struct node_num*) eval(stack_peek(s, 0));
|
||||
struct node_num* right = (struct node_num*) eval(stack_peek(s, 1));
|
||||
stack_push(s, (struct node_base*) alloc_num(left->value + right->value));
|
||||
}
|
||||
|
||||
void f_main(struct stack* s) {
|
||||
// PushInt 320
|
||||
stack_push(s, (struct node_base*) alloc_num(320));
|
||||
|
||||
// PushInt 6
|
||||
stack_push(s, (struct node_base*) alloc_num(6));
|
||||
|
||||
// PushGlobal f_add (the function for +)
|
||||
stack_push(s, (struct node_base*) alloc_global(f_add, 2));
|
||||
|
||||
struct node_base* left;
|
||||
struct node_base* right;
|
||||
|
||||
// MkApp
|
||||
left = stack_pop(s);
|
||||
right = stack_pop(s);
|
||||
stack_push(s, (struct node_base*) alloc_app(left, right));
|
||||
|
||||
// MkApp
|
||||
left = stack_pop(s);
|
||||
right = stack_pop(s);
|
||||
stack_push(s, (struct node_base*) alloc_app(left, right));
|
||||
}
|
||||
2
code/compiler/13/examples/works1.txt
Normal file
2
code/compiler/13/examples/works1.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
defn main = { sum 320 6 }
|
||||
defn sum x y = { x + y }
|
||||
3
code/compiler/13/examples/works2.txt
Normal file
3
code/compiler/13/examples/works2.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
defn add x y = { x + y }
|
||||
defn double x = { add x x }
|
||||
defn main = { double 163 }
|
||||
9
code/compiler/13/examples/works3.txt
Normal file
9
code/compiler/13/examples/works3.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
data List a = { Nil, Cons a (List a) }
|
||||
data Bool = { True, False }
|
||||
defn length l = {
|
||||
case l of {
|
||||
Nil -> { 0 }
|
||||
Cons x xs -> { 1 + length xs }
|
||||
}
|
||||
}
|
||||
defn main = { length (Cons 1 (Cons 2 (Cons 3 Nil))) + length (Cons True (Cons False (Cons True Nil))) }
|
||||
16
code/compiler/13/examples/works4.txt
Normal file
16
code/compiler/13/examples/works4.txt
Normal file
@@ -0,0 +1,16 @@
|
||||
data List = { Nil, Cons Int List }
|
||||
|
||||
defn add x y = { x + y }
|
||||
defn mul x y = { x * y }
|
||||
|
||||
defn foldr f b l = {
|
||||
case l of {
|
||||
Nil -> { b }
|
||||
Cons x xs -> { f x (foldr f b xs) }
|
||||
}
|
||||
}
|
||||
|
||||
defn main = {
|
||||
foldr add 0 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil)))) +
|
||||
foldr mul 1 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil))))
|
||||
}
|
||||
17
code/compiler/13/examples/works5.txt
Normal file
17
code/compiler/13/examples/works5.txt
Normal file
@@ -0,0 +1,17 @@
|
||||
data List = { Nil, Cons Int List }
|
||||
|
||||
defn sumZip l m = {
|
||||
case l of {
|
||||
Nil -> { 0 }
|
||||
Cons x xs -> {
|
||||
case m of {
|
||||
Nil -> { 0 }
|
||||
Cons y ys -> { x + y + sumZip xs ys }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
defn ones = { Cons 1 ones }
|
||||
|
||||
defn main = { sumZip ones (Cons 1 (Cons 2 (Cons 3 Nil))) }
|
||||
76
code/compiler/13/global_scope.cpp
Normal file
76
code/compiler/13/global_scope.cpp
Normal file
@@ -0,0 +1,76 @@
|
||||
#include "global_scope.hpp"
|
||||
#include "ast.hpp"
|
||||
|
||||
void global_function::compile() {
|
||||
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
|
||||
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||
new_env = env_ptr(new env_var(*it, new_env));
|
||||
}
|
||||
body->compile(new_env, instructions);
|
||||
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
|
||||
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
|
||||
}
|
||||
|
||||
void global_function::declare_llvm(llvm_context& ctx) {
|
||||
generated_function = ctx.create_custom_function(name, params.size());
|
||||
}
|
||||
|
||||
void global_function::generate_llvm(llvm_context& ctx) {
|
||||
ctx.get_builder().SetInsertPoint(&generated_function->getEntryBlock());
|
||||
for(auto& instruction : instructions) {
|
||||
instruction->gen_llvm(ctx, generated_function);
|
||||
}
|
||||
ctx.get_builder().CreateRetVoid();
|
||||
}
|
||||
|
||||
void global_constructor::generate_llvm(llvm_context& ctx) {
|
||||
auto new_function =
|
||||
ctx.create_custom_function(name, arity);
|
||||
std::vector<instruction_ptr> instructions;
|
||||
instructions.push_back(instruction_ptr(new instruction_pack(tag, arity)));
|
||||
instructions.push_back(instruction_ptr(new instruction_update(0)));
|
||||
ctx.get_builder().SetInsertPoint(&new_function->getEntryBlock());
|
||||
for (auto& instruction : instructions) {
|
||||
instruction->gen_llvm(ctx, new_function);
|
||||
}
|
||||
ctx.get_builder().CreateRetVoid();
|
||||
}
|
||||
|
||||
global_function& global_scope::add_function(
|
||||
const std::string& n,
|
||||
std::vector<std::string> ps,
|
||||
ast_ptr b) {
|
||||
auto name = mng->new_mangled_name(n);
|
||||
global_function* new_function =
|
||||
new global_function(std::move(name), std::move(ps), std::move(b));
|
||||
functions.push_back(global_function_ptr(new_function));
|
||||
return *new_function;
|
||||
}
|
||||
|
||||
global_constructor& global_scope::add_constructor(
|
||||
const std::string& n,
|
||||
int8_t t,
|
||||
size_t a) {
|
||||
auto name = mng->new_mangled_name(n);
|
||||
global_constructor* new_constructor = new global_constructor(name, t, a);
|
||||
constructors.push_back(global_constructor_ptr(new_constructor));
|
||||
return *new_constructor;
|
||||
}
|
||||
|
||||
void global_scope::compile() {
|
||||
for(auto& function : functions) {
|
||||
function->compile();
|
||||
}
|
||||
}
|
||||
|
||||
void global_scope::generate_llvm(llvm_context& ctx) {
|
||||
for(auto& constructor : constructors) {
|
||||
constructor->generate_llvm(ctx);
|
||||
}
|
||||
for(auto& function : functions) {
|
||||
function->declare_llvm(ctx);
|
||||
}
|
||||
for(auto& function : functions) {
|
||||
function->generate_llvm(ctx);
|
||||
}
|
||||
}
|
||||
60
code/compiler/13/global_scope.hpp
Normal file
60
code/compiler/13/global_scope.hpp
Normal file
@@ -0,0 +1,60 @@
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <llvm/IR/Function.h>
|
||||
#include "instruction.hpp"
|
||||
#include "mangler.hpp"
|
||||
|
||||
struct ast;
|
||||
using ast_ptr = std::unique_ptr<ast>;
|
||||
|
||||
struct global_function {
|
||||
std::string name;
|
||||
std::vector<std::string> params;
|
||||
ast_ptr body;
|
||||
|
||||
std::vector<instruction_ptr> instructions;
|
||||
llvm::Function* generated_function;
|
||||
|
||||
global_function(std::string n, std::vector<std::string> ps, ast_ptr b)
|
||||
: name(std::move(n)), params(std::move(ps)), body(std::move(b)) {}
|
||||
|
||||
void compile();
|
||||
void declare_llvm(llvm_context& ctx);
|
||||
void generate_llvm(llvm_context& ctx);
|
||||
};
|
||||
|
||||
using global_function_ptr = std::unique_ptr<global_function>;
|
||||
|
||||
struct global_constructor {
|
||||
std::string name;
|
||||
int8_t tag;
|
||||
size_t arity;
|
||||
|
||||
global_constructor(std::string n, int8_t t, size_t a)
|
||||
: name(std::move(n)), tag(t), arity(a) {}
|
||||
|
||||
void generate_llvm(llvm_context& ctx);
|
||||
};
|
||||
|
||||
using global_constructor_ptr = std::unique_ptr<global_constructor>;
|
||||
|
||||
class global_scope {
|
||||
private:
|
||||
std::vector<global_function_ptr> functions;
|
||||
std::vector<global_constructor_ptr> constructors;
|
||||
mangler* mng;
|
||||
|
||||
public:
|
||||
global_scope(mangler& m) : mng(&m) {}
|
||||
|
||||
global_function& add_function(
|
||||
const std::string& n,
|
||||
std::vector<std::string> ps,
|
||||
ast_ptr b);
|
||||
global_constructor& add_constructor(const std::string& n, int8_t t, size_t a);
|
||||
|
||||
void compile();
|
||||
void generate_llvm(llvm_context& ctx);
|
||||
};
|
||||
114
code/compiler/13/graph.cpp
Normal file
114
code/compiler/13/graph.cpp
Normal file
@@ -0,0 +1,114 @@
|
||||
#include "graph.hpp"
|
||||
|
||||
std::set<function_graph::edge> function_graph::compute_transitive_edges() {
|
||||
std::set<edge> transitive_edges;
|
||||
transitive_edges.insert(edges.begin(), edges.end());
|
||||
for(auto& connector : adjacency_lists) {
|
||||
for(auto& from : adjacency_lists) {
|
||||
edge to_connector { from.first, connector.first };
|
||||
for(auto& to : adjacency_lists) {
|
||||
edge full_jump { from.first, to.first };
|
||||
if(transitive_edges.find(full_jump) != transitive_edges.end()) continue;
|
||||
|
||||
edge from_connector { connector.first, to.first };
|
||||
if(transitive_edges.find(to_connector) != transitive_edges.end() &&
|
||||
transitive_edges.find(from_connector) != transitive_edges.end())
|
||||
transitive_edges.insert(std::move(full_jump));
|
||||
}
|
||||
}
|
||||
}
|
||||
return transitive_edges;
|
||||
}
|
||||
|
||||
void function_graph::create_groups(
|
||||
const std::set<edge>& transitive_edges,
|
||||
std::map<function, group_id>& group_ids,
|
||||
std::map<group_id, data_ptr>& group_data_map) {
|
||||
group_id id_counter = 0;
|
||||
for(auto& vertex : adjacency_lists) {
|
||||
if(group_ids.find(vertex.first) != group_ids.end())
|
||||
continue;
|
||||
data_ptr new_group(new group_data);
|
||||
new_group->functions.insert(vertex.first);
|
||||
group_data_map[id_counter] = new_group;
|
||||
group_ids[vertex.first] = id_counter;
|
||||
for(auto& other_vertex : adjacency_lists) {
|
||||
if(transitive_edges.find({vertex.first, other_vertex.first}) != transitive_edges.end() &&
|
||||
transitive_edges.find({other_vertex.first, vertex.first}) != transitive_edges.end()) {
|
||||
group_ids[other_vertex.first] = id_counter;
|
||||
new_group->functions.insert(other_vertex.first);
|
||||
}
|
||||
}
|
||||
id_counter++;
|
||||
}
|
||||
}
|
||||
|
||||
void function_graph::create_edges(
|
||||
std::map<function, group_id>& group_ids,
|
||||
std::map<group_id, data_ptr>& group_data_map) {
|
||||
std::set<std::pair<group_id, group_id>> group_edges;
|
||||
for(auto& vertex : adjacency_lists) {
|
||||
auto vertex_id = group_ids[vertex.first];
|
||||
auto& vertex_data = group_data_map[vertex_id];
|
||||
for(auto& other_vertex : vertex.second) {
|
||||
auto other_id = group_ids[other_vertex];
|
||||
if(vertex_id == other_id) continue;
|
||||
if(group_edges.find({vertex_id, other_id}) != group_edges.end())
|
||||
continue;
|
||||
group_edges.insert({vertex_id, other_id});
|
||||
vertex_data->adjacency_list.insert(other_id);
|
||||
group_data_map[other_id]->indegree++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<group_ptr> function_graph::generate_order(
|
||||
std::map<function, group_id>& group_ids,
|
||||
std::map<group_id, data_ptr>& group_data_map) {
|
||||
std::queue<group_id> id_queue;
|
||||
std::vector<group_ptr> output;
|
||||
for(auto& group : group_data_map) {
|
||||
if(group.second->indegree == 0) id_queue.push(group.first);
|
||||
}
|
||||
|
||||
while(!id_queue.empty()) {
|
||||
auto new_id = id_queue.front();
|
||||
auto& group_data = group_data_map[new_id];
|
||||
group_ptr output_group(new group);
|
||||
output_group->members = std::move(group_data->functions);
|
||||
id_queue.pop();
|
||||
|
||||
for(auto& adjacent_group : group_data->adjacency_list) {
|
||||
if(--group_data_map[adjacent_group]->indegree == 0)
|
||||
id_queue.push(adjacent_group);
|
||||
}
|
||||
|
||||
output.push_back(std::move(output_group));
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
std::set<function>& function_graph::add_function(const function& f) {
|
||||
auto adjacency_list_it = adjacency_lists.find(f);
|
||||
if(adjacency_list_it != adjacency_lists.end()) {
|
||||
return adjacency_list_it->second;
|
||||
} else {
|
||||
return adjacency_lists[f] = { };
|
||||
}
|
||||
}
|
||||
|
||||
void function_graph::add_edge(const function& from, const function& to) {
|
||||
add_function(from).insert(to);
|
||||
edges.insert({ from, to });
|
||||
}
|
||||
|
||||
std::vector<group_ptr> function_graph::compute_order() {
|
||||
std::set<edge> transitive_edges = compute_transitive_edges();
|
||||
std::map<function, group_id> group_ids;
|
||||
std::map<group_id, data_ptr> group_data_map;
|
||||
|
||||
create_groups(transitive_edges, group_ids, group_data_map);
|
||||
create_edges(group_ids, group_data_map);
|
||||
return generate_order(group_ids, group_data_map);
|
||||
}
|
||||
54
code/compiler/13/graph.hpp
Normal file
54
code/compiler/13/graph.hpp
Normal file
@@ -0,0 +1,54 @@
|
||||
#pragma once
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <queue>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
using function = std::string;
|
||||
|
||||
struct group {
|
||||
std::set<function> members;
|
||||
};
|
||||
|
||||
using group_ptr = std::unique_ptr<group>;
|
||||
|
||||
class function_graph {
|
||||
private:
|
||||
using group_id = size_t;
|
||||
|
||||
struct group_data {
|
||||
std::set<function> functions;
|
||||
std::set<group_id> adjacency_list;
|
||||
size_t indegree;
|
||||
|
||||
group_data() : indegree(0) {}
|
||||
};
|
||||
|
||||
using data_ptr = std::shared_ptr<group_data>;
|
||||
using edge = std::pair<function, function>;
|
||||
using group_edge = std::pair<group_id, group_id>;
|
||||
|
||||
std::map<function, std::set<function>> adjacency_lists;
|
||||
std::set<edge> edges;
|
||||
|
||||
std::set<edge> compute_transitive_edges();
|
||||
void create_groups(
|
||||
const std::set<edge>&,
|
||||
std::map<function, group_id>&,
|
||||
std::map<group_id, data_ptr>&);
|
||||
void create_edges(
|
||||
std::map<function, group_id>&,
|
||||
std::map<group_id, data_ptr>&);
|
||||
std::vector<group_ptr> generate_order(
|
||||
std::map<function, group_id>&,
|
||||
std::map<group_id, data_ptr>&);
|
||||
|
||||
public:
|
||||
std::set<function>& add_function(const function& f);
|
||||
void add_edge(const function& from, const function& to);
|
||||
std::vector<group_ptr> compute_order();
|
||||
};
|
||||
177
code/compiler/13/instruction.cpp
Normal file
177
code/compiler/13/instruction.cpp
Normal file
@@ -0,0 +1,177 @@
|
||||
#include "instruction.hpp"
|
||||
#include "llvm_context.hpp"
|
||||
#include <llvm/IR/BasicBlock.h>
|
||||
#include <llvm/IR/Function.h>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static void print_indent(int n, std::ostream& to) {
|
||||
while(n--) to << " ";
|
||||
}
|
||||
|
||||
void instruction_pushint::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "PushInt(" << value << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_pushint::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
ctx.create_push(f, ctx.create_num(f, ctx.create_i32(value)));
|
||||
}
|
||||
|
||||
void instruction_pushglobal::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "PushGlobal(" << name << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_pushglobal::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
auto& global_f = ctx.get_custom_function(name);
|
||||
auto arity = ctx.create_i32(global_f.arity);
|
||||
ctx.create_push(f, ctx.create_global(f, global_f.function, arity));
|
||||
}
|
||||
|
||||
void instruction_push::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Push(" << offset << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_push::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
ctx.create_push(f, ctx.create_peek(f, ctx.create_size(offset)));
|
||||
}
|
||||
|
||||
void instruction_pop::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Pop(" << count << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_pop::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
ctx.create_popn(f, ctx.create_size(count));
|
||||
}
|
||||
|
||||
void instruction_mkapp::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "MkApp()" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_mkapp::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
auto left = ctx.create_pop(f);
|
||||
auto right = ctx.create_pop(f);
|
||||
ctx.create_push(f, ctx.create_app(f, left, right));
|
||||
}
|
||||
|
||||
void instruction_update::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Update(" << offset << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_update::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
ctx.create_update(f, ctx.create_size(offset));
|
||||
}
|
||||
|
||||
void instruction_pack::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Pack(" << tag << ", " << size << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_pack::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
ctx.create_pack(f, ctx.create_size(size), ctx.create_i8(tag));
|
||||
}
|
||||
|
||||
void instruction_split::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Split()" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_split::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
ctx.create_split(f, ctx.create_size(size));
|
||||
}
|
||||
|
||||
void instruction_jump::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Jump(" << std::endl;
|
||||
for(auto& instruction_set : branches) {
|
||||
for(auto& instruction : instruction_set) {
|
||||
instruction->print(indent + 2, to);
|
||||
}
|
||||
to << std::endl;
|
||||
}
|
||||
print_indent(indent, to);
|
||||
to << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_jump::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
auto top_node = ctx.create_peek(f, ctx.create_size(0));
|
||||
auto tag = ctx.unwrap_data_tag(top_node);
|
||||
auto safety_block = ctx.create_basic_block("safety", f);
|
||||
auto switch_op = ctx.get_builder().CreateSwitch(tag, safety_block, tag_mappings.size());
|
||||
std::vector<BasicBlock*> blocks;
|
||||
|
||||
for(auto& branch : branches) {
|
||||
auto branch_block = ctx.create_basic_block("branch", f);
|
||||
ctx.get_builder().SetInsertPoint(branch_block);
|
||||
for(auto& instruction : branch) {
|
||||
instruction->gen_llvm(ctx, f);
|
||||
}
|
||||
ctx.get_builder().CreateBr(safety_block);
|
||||
blocks.push_back(branch_block);
|
||||
}
|
||||
|
||||
for(auto& mapping : tag_mappings) {
|
||||
switch_op->addCase(ctx.create_i8(mapping.first), blocks[mapping.second]);
|
||||
}
|
||||
|
||||
ctx.get_builder().SetInsertPoint(safety_block);
|
||||
}
|
||||
|
||||
void instruction_slide::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Slide(" << offset << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_slide::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
ctx.create_slide(f, ctx.create_size(offset));
|
||||
}
|
||||
|
||||
void instruction_binop::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "BinOp(" << op_action(op) << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_binop::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
auto left_int = ctx.unwrap_num(ctx.create_pop(f));
|
||||
auto right_int = ctx.unwrap_num(ctx.create_pop(f));
|
||||
llvm::Value* result;
|
||||
switch(op) {
|
||||
case PLUS: result = ctx.get_builder().CreateAdd(left_int, right_int); break;
|
||||
case MINUS: result = ctx.get_builder().CreateSub(left_int, right_int); break;
|
||||
case TIMES: result = ctx.get_builder().CreateMul(left_int, right_int); break;
|
||||
case DIVIDE: result = ctx.get_builder().CreateSDiv(left_int, right_int); break;
|
||||
}
|
||||
ctx.create_push(f, ctx.create_num(f, result));
|
||||
}
|
||||
|
||||
void instruction_eval::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Eval()" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_eval::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
ctx.create_unwind(f);
|
||||
}
|
||||
|
||||
void instruction_alloc::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Alloc(" << amount << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_alloc::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
ctx.create_alloc(f, ctx.create_size(amount));
|
||||
}
|
||||
|
||||
void instruction_unwind::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Unwind()" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_unwind::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
// Nothing
|
||||
}
|
||||
142
code/compiler/13/instruction.hpp
Normal file
142
code/compiler/13/instruction.hpp
Normal file
@@ -0,0 +1,142 @@
|
||||
#pragma once
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <ostream>
|
||||
#include "binop.hpp"
|
||||
#include "llvm_context.hpp"
|
||||
|
||||
struct instruction {
|
||||
virtual ~instruction() = default;
|
||||
|
||||
virtual void print(int indent, std::ostream& to) const = 0;
|
||||
virtual void gen_llvm(llvm_context& ctx, llvm::Function* f) const = 0;
|
||||
};
|
||||
|
||||
using instruction_ptr = std::unique_ptr<instruction>;
|
||||
|
||||
struct instruction_pushint : public instruction {
|
||||
int value;
|
||||
|
||||
instruction_pushint(int v)
|
||||
: value(v) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_pushglobal : public instruction {
|
||||
std::string name;
|
||||
|
||||
instruction_pushglobal(std::string n)
|
||||
: name(std::move(n)) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_push : public instruction {
|
||||
int offset;
|
||||
|
||||
instruction_push(int o)
|
||||
: offset(o) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_pop : public instruction {
|
||||
int count;
|
||||
|
||||
instruction_pop(int c)
|
||||
: count(c) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_mkapp : public instruction {
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_update : public instruction {
|
||||
int offset;
|
||||
|
||||
instruction_update(int o)
|
||||
: offset(o) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_pack : public instruction {
|
||||
int tag;
|
||||
int size;
|
||||
|
||||
instruction_pack(int t, int s)
|
||||
: tag(t), size(s) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_split : public instruction {
|
||||
int size;
|
||||
|
||||
instruction_split(int s)
|
||||
: size(s) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_jump : public instruction {
|
||||
std::vector<std::vector<instruction_ptr>> branches;
|
||||
std::map<int, int> tag_mappings;
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_slide : public instruction {
|
||||
int offset;
|
||||
|
||||
instruction_slide(int o)
|
||||
: offset(o) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_binop : public instruction {
|
||||
binop op;
|
||||
|
||||
instruction_binop(binop o)
|
||||
: op(o) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_eval : public instruction {
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_alloc : public instruction {
|
||||
int amount;
|
||||
|
||||
instruction_alloc(int a)
|
||||
: amount(a) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_unwind : public instruction {
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
294
code/compiler/13/llvm_context.cpp
Normal file
294
code/compiler/13/llvm_context.cpp
Normal file
@@ -0,0 +1,294 @@
|
||||
#include "llvm_context.hpp"
|
||||
#include <llvm/IR/DerivedTypes.h>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
void llvm_context::create_types() {
|
||||
stack_type = StructType::create(ctx, "stack");
|
||||
gmachine_type = StructType::create(ctx, "gmachine");
|
||||
stack_ptr_type = PointerType::getUnqual(stack_type);
|
||||
gmachine_ptr_type = PointerType::getUnqual(gmachine_type);
|
||||
tag_type = IntegerType::getInt8Ty(ctx);
|
||||
struct_types["node_base"] = StructType::create(ctx, "node_base");
|
||||
struct_types["node_app"] = StructType::create(ctx, "node_app");
|
||||
struct_types["node_num"] = StructType::create(ctx, "node_num");
|
||||
struct_types["node_global"] = StructType::create(ctx, "node_global");
|
||||
struct_types["node_ind"] = StructType::create(ctx, "node_ind");
|
||||
struct_types["node_data"] = StructType::create(ctx, "node_data");
|
||||
node_ptr_type = PointerType::getUnqual(struct_types.at("node_base"));
|
||||
function_type = FunctionType::get(Type::getVoidTy(ctx), { gmachine_ptr_type }, false);
|
||||
|
||||
gmachine_type->setBody(
|
||||
stack_ptr_type,
|
||||
node_ptr_type,
|
||||
IntegerType::getInt64Ty(ctx),
|
||||
IntegerType::getInt64Ty(ctx)
|
||||
);
|
||||
struct_types.at("node_base")->setBody(
|
||||
IntegerType::getInt32Ty(ctx),
|
||||
IntegerType::getInt8Ty(ctx),
|
||||
node_ptr_type
|
||||
);
|
||||
struct_types.at("node_app")->setBody(
|
||||
struct_types.at("node_base"),
|
||||
node_ptr_type,
|
||||
node_ptr_type
|
||||
);
|
||||
struct_types.at("node_num")->setBody(
|
||||
struct_types.at("node_base"),
|
||||
IntegerType::getInt32Ty(ctx)
|
||||
);
|
||||
struct_types.at("node_global")->setBody(
|
||||
struct_types.at("node_base"),
|
||||
FunctionType::get(Type::getVoidTy(ctx), { stack_ptr_type }, false)
|
||||
);
|
||||
struct_types.at("node_ind")->setBody(
|
||||
struct_types.at("node_base"),
|
||||
node_ptr_type
|
||||
);
|
||||
struct_types.at("node_data")->setBody(
|
||||
struct_types.at("node_base"),
|
||||
IntegerType::getInt8Ty(ctx),
|
||||
PointerType::getUnqual(node_ptr_type)
|
||||
);
|
||||
}
|
||||
|
||||
void llvm_context::create_functions() {
|
||||
auto void_type = Type::getVoidTy(ctx);
|
||||
auto sizet_type = IntegerType::get(ctx, sizeof(size_t) * 8);
|
||||
functions["stack_init"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_init",
|
||||
&module
|
||||
);
|
||||
functions["stack_free"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_free",
|
||||
&module
|
||||
);
|
||||
functions["stack_push"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type, node_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_push",
|
||||
&module
|
||||
);
|
||||
functions["stack_pop"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { stack_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_pop",
|
||||
&module
|
||||
);
|
||||
functions["stack_peek"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_peek",
|
||||
&module
|
||||
);
|
||||
functions["stack_popn"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_popn",
|
||||
&module
|
||||
);
|
||||
functions["gmachine_slide"] = Function::Create(
|
||||
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"gmachine_slide",
|
||||
&module
|
||||
);
|
||||
functions["gmachine_update"] = Function::Create(
|
||||
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"gmachine_update",
|
||||
&module
|
||||
);
|
||||
functions["gmachine_alloc"] = Function::Create(
|
||||
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"gmachine_alloc",
|
||||
&module
|
||||
);
|
||||
functions["gmachine_pack"] = Function::Create(
|
||||
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type, tag_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"gmachine_pack",
|
||||
&module
|
||||
);
|
||||
functions["gmachine_split"] = Function::Create(
|
||||
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"gmachine_split",
|
||||
&module
|
||||
);
|
||||
functions["gmachine_track"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { gmachine_ptr_type, node_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"gmachine_track",
|
||||
&module
|
||||
);
|
||||
|
||||
auto int32_type = IntegerType::getInt32Ty(ctx);
|
||||
functions["alloc_app"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { node_ptr_type, node_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"alloc_app",
|
||||
&module
|
||||
);
|
||||
functions["alloc_num"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { int32_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"alloc_num",
|
||||
&module
|
||||
);
|
||||
functions["alloc_global"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { function_type, int32_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"alloc_global",
|
||||
&module
|
||||
);
|
||||
functions["alloc_ind"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { node_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"alloc_ind",
|
||||
&module
|
||||
);
|
||||
|
||||
functions["unwind"] = Function::Create(
|
||||
FunctionType::get(void_type, { gmachine_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"unwind",
|
||||
&module
|
||||
);
|
||||
}
|
||||
|
||||
IRBuilder<>& llvm_context::get_builder() {
|
||||
return builder;
|
||||
}
|
||||
|
||||
Module& llvm_context::get_module() {
|
||||
return module;
|
||||
}
|
||||
|
||||
BasicBlock* llvm_context::create_basic_block(const std::string& name, llvm::Function* f) {
|
||||
return BasicBlock::Create(ctx, name, f);
|
||||
}
|
||||
|
||||
ConstantInt* llvm_context::create_i8(int8_t i) {
|
||||
return ConstantInt::get(ctx, APInt(8, i));
|
||||
}
|
||||
ConstantInt* llvm_context::create_i32(int32_t i) {
|
||||
return ConstantInt::get(ctx, APInt(32, i));
|
||||
}
|
||||
ConstantInt* llvm_context::create_size(size_t i) {
|
||||
return ConstantInt::get(ctx, APInt(sizeof(size_t) * 8, i));
|
||||
}
|
||||
|
||||
Value* llvm_context::create_pop(Function* f) {
|
||||
auto pop_f = functions.at("stack_pop");
|
||||
return builder.CreateCall(pop_f, { unwrap_gmachine_stack_ptr(f->arg_begin()) });
|
||||
}
|
||||
Value* llvm_context::create_peek(Function* f, Value* off) {
|
||||
auto peek_f = functions.at("stack_peek");
|
||||
return builder.CreateCall(peek_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), off });
|
||||
}
|
||||
void llvm_context::create_push(Function* f, Value* v) {
|
||||
auto push_f = functions.at("stack_push");
|
||||
builder.CreateCall(push_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), v });
|
||||
}
|
||||
void llvm_context::create_popn(Function* f, Value* off) {
|
||||
auto popn_f = functions.at("stack_popn");
|
||||
builder.CreateCall(popn_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), off });
|
||||
}
|
||||
void llvm_context::create_update(Function* f, Value* off) {
|
||||
auto update_f = functions.at("gmachine_update");
|
||||
builder.CreateCall(update_f, { f->arg_begin(), off });
|
||||
}
|
||||
void llvm_context::create_pack(Function* f, Value* c, Value* t) {
|
||||
auto pack_f = functions.at("gmachine_pack");
|
||||
builder.CreateCall(pack_f, { f->arg_begin(), c, t });
|
||||
}
|
||||
void llvm_context::create_split(Function* f, Value* c) {
|
||||
auto split_f = functions.at("gmachine_split");
|
||||
builder.CreateCall(split_f, { f->arg_begin(), c });
|
||||
}
|
||||
void llvm_context::create_slide(Function* f, Value* off) {
|
||||
auto slide_f = functions.at("gmachine_slide");
|
||||
builder.CreateCall(slide_f, { f->arg_begin(), off });
|
||||
}
|
||||
void llvm_context::create_alloc(Function* f, Value* n) {
|
||||
auto alloc_f = functions.at("gmachine_alloc");
|
||||
builder.CreateCall(alloc_f, { f->arg_begin(), n });
|
||||
}
|
||||
Value* llvm_context::create_track(Function* f, Value* v) {
|
||||
auto track_f = functions.at("gmachine_track");
|
||||
return builder.CreateCall(track_f, { f->arg_begin(), v });
|
||||
}
|
||||
|
||||
void llvm_context::create_unwind(Function* f) {
|
||||
auto unwind_f = functions.at("unwind");
|
||||
builder.CreateCall(unwind_f, { f->args().begin() });
|
||||
}
|
||||
|
||||
Value* llvm_context::unwrap_gmachine_stack_ptr(Value* g) {
|
||||
auto offset_0 = create_i32(0);
|
||||
return builder.CreateGEP(g, { offset_0, offset_0 });
|
||||
}
|
||||
|
||||
Value* llvm_context::unwrap_num(Value* v) {
|
||||
auto num_ptr_type = PointerType::getUnqual(struct_types.at("node_num"));
|
||||
auto cast = builder.CreatePointerCast(v, num_ptr_type);
|
||||
auto offset_0 = create_i32(0);
|
||||
auto offset_1 = create_i32(1);
|
||||
auto int_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
|
||||
return builder.CreateLoad(int_ptr);
|
||||
}
|
||||
Value* llvm_context::create_num(Function* f, Value* v) {
|
||||
auto alloc_num_f = functions.at("alloc_num");
|
||||
auto alloc_num_call = builder.CreateCall(alloc_num_f, { v });
|
||||
return create_track(f, alloc_num_call);
|
||||
}
|
||||
|
||||
Value* llvm_context::unwrap_data_tag(Value* v) {
|
||||
auto data_ptr_type = PointerType::getUnqual(struct_types.at("node_data"));
|
||||
auto cast = builder.CreatePointerCast(v, data_ptr_type);
|
||||
auto offset_0 = create_i32(0);
|
||||
auto offset_1 = create_i32(1);
|
||||
auto tag_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
|
||||
return builder.CreateLoad(tag_ptr);
|
||||
}
|
||||
|
||||
Value* llvm_context::create_global(Function* f, Value* gf, Value* a) {
|
||||
auto alloc_global_f = functions.at("alloc_global");
|
||||
auto alloc_global_call = builder.CreateCall(alloc_global_f, { gf, a });
|
||||
return create_track(f, alloc_global_call);
|
||||
}
|
||||
|
||||
Value* llvm_context::create_app(Function* f, Value* l, Value* r) {
|
||||
auto alloc_app_f = functions.at("alloc_app");
|
||||
auto alloc_app_call = builder.CreateCall(alloc_app_f, { l, r });
|
||||
return create_track(f, alloc_app_call);
|
||||
}
|
||||
|
||||
llvm::Function* llvm_context::create_custom_function(const std::string& name, int32_t arity) {
|
||||
auto void_type = llvm::Type::getVoidTy(ctx);
|
||||
auto new_function = llvm::Function::Create(
|
||||
function_type,
|
||||
llvm::Function::LinkageTypes::ExternalLinkage,
|
||||
"f_" + name,
|
||||
&module
|
||||
);
|
||||
auto start_block = llvm::BasicBlock::Create(ctx, "entry", new_function);
|
||||
|
||||
auto new_custom_f = custom_function_ptr(new custom_function());
|
||||
new_custom_f->arity = arity;
|
||||
new_custom_f->function = new_function;
|
||||
custom_functions["f_" + name] = std::move(new_custom_f);
|
||||
|
||||
return new_function;
|
||||
}
|
||||
|
||||
llvm_context::custom_function& llvm_context::get_custom_function(const std::string& name) {
|
||||
return *custom_functions.at("f_" + name);
|
||||
}
|
||||
81
code/compiler/13/llvm_context.hpp
Normal file
81
code/compiler/13/llvm_context.hpp
Normal file
@@ -0,0 +1,81 @@
|
||||
#pragma once
|
||||
#include <llvm/IR/DerivedTypes.h>
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/IR/LLVMContext.h>
|
||||
#include <llvm/IR/IRBuilder.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/IR/Value.h>
|
||||
#include <map>
|
||||
|
||||
class llvm_context {
|
||||
public:
|
||||
struct custom_function {
|
||||
llvm::Function* function;
|
||||
int32_t arity;
|
||||
};
|
||||
|
||||
using custom_function_ptr = std::unique_ptr<custom_function>;
|
||||
|
||||
private:
|
||||
llvm::LLVMContext ctx;
|
||||
llvm::IRBuilder<> builder;
|
||||
llvm::Module module;
|
||||
|
||||
std::map<std::string, custom_function_ptr> custom_functions;
|
||||
std::map<std::string, llvm::Function*> functions;
|
||||
std::map<std::string, llvm::StructType*> struct_types;
|
||||
|
||||
llvm::StructType* stack_type;
|
||||
llvm::StructType* gmachine_type;
|
||||
llvm::PointerType* stack_ptr_type;
|
||||
llvm::PointerType* gmachine_ptr_type;
|
||||
llvm::PointerType* node_ptr_type;
|
||||
llvm::IntegerType* tag_type;
|
||||
llvm::FunctionType* function_type;
|
||||
|
||||
void create_types();
|
||||
void create_functions();
|
||||
|
||||
public:
|
||||
llvm_context()
|
||||
: builder(ctx), module("bloglang", ctx) {
|
||||
create_types();
|
||||
create_functions();
|
||||
}
|
||||
|
||||
llvm::IRBuilder<>& get_builder();
|
||||
llvm::Module& get_module();
|
||||
|
||||
llvm::BasicBlock* create_basic_block(const std::string& name, llvm::Function* f);
|
||||
|
||||
llvm::ConstantInt* create_i8(int8_t);
|
||||
llvm::ConstantInt* create_i32(int32_t);
|
||||
llvm::ConstantInt* create_size(size_t);
|
||||
|
||||
llvm::Value* create_pop(llvm::Function*);
|
||||
llvm::Value* create_peek(llvm::Function*, llvm::Value*);
|
||||
void create_push(llvm::Function*, llvm::Value*);
|
||||
void create_popn(llvm::Function*, llvm::Value*);
|
||||
void create_update(llvm::Function*, llvm::Value*);
|
||||
void create_pack(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||
void create_split(llvm::Function*, llvm::Value*);
|
||||
void create_slide(llvm::Function*, llvm::Value*);
|
||||
void create_alloc(llvm::Function*, llvm::Value*);
|
||||
llvm::Value* create_track(llvm::Function*, llvm::Value*);
|
||||
|
||||
void create_unwind(llvm::Function*);
|
||||
|
||||
llvm::Value* unwrap_gmachine_stack_ptr(llvm::Value*);
|
||||
|
||||
llvm::Value* unwrap_num(llvm::Value*);
|
||||
llvm::Value* create_num(llvm::Function*, llvm::Value*);
|
||||
|
||||
llvm::Value* unwrap_data_tag(llvm::Value*);
|
||||
|
||||
llvm::Value* create_global(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||
|
||||
llvm::Value* create_app(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||
|
||||
llvm::Function* create_custom_function(const std::string& name, int32_t arity);
|
||||
custom_function& get_custom_function(const std::string& name);
|
||||
};
|
||||
27
code/compiler/13/main.cpp
Normal file
27
code/compiler/13/main.cpp
Normal file
@@ -0,0 +1,27 @@
|
||||
#include "ast.hpp"
|
||||
#include <iostream>
|
||||
#include "parser.hpp"
|
||||
#include "compiler.hpp"
|
||||
#include "error.hpp"
|
||||
|
||||
void yy::parser::error(const yy::location& loc, const std::string& msg) {
|
||||
std::cerr << "An error occured: " << msg << std::endl;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if(argc != 2) {
|
||||
std::cerr << "please enter a file to compile." << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
compiler cmp(argv[1]);
|
||||
|
||||
try {
|
||||
cmp("program.o");
|
||||
} catch(unification_error& err) {
|
||||
err.pretty_print(std::cerr, cmp.get_file_manager(), cmp.get_type_manager());
|
||||
} catch(type_error& err) {
|
||||
err.pretty_print(std::cerr, cmp.get_file_manager());
|
||||
} catch (compiler_error& err) {
|
||||
err.pretty_print(std::cerr, cmp.get_file_manager());
|
||||
}
|
||||
}
|
||||
17
code/compiler/13/mangler.cpp
Normal file
17
code/compiler/13/mangler.cpp
Normal file
@@ -0,0 +1,17 @@
|
||||
#include "mangler.hpp"
|
||||
|
||||
std::string mangler::new_mangled_name(const std::string& n) {
|
||||
auto occurence_it = occurence_count.find(n);
|
||||
int occurence = 0;
|
||||
if(occurence_it != occurence_count.end()) {
|
||||
occurence = occurence_it->second + 1;
|
||||
}
|
||||
occurence_count[n] = occurence;
|
||||
|
||||
std::string final_name = n;
|
||||
if (occurence != 0) {
|
||||
final_name += "_";
|
||||
final_name += std::to_string(occurence);
|
||||
}
|
||||
return final_name;
|
||||
}
|
||||
11
code/compiler/13/mangler.hpp
Normal file
11
code/compiler/13/mangler.hpp
Normal file
@@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
class mangler {
|
||||
private:
|
||||
std::map<std::string, int> occurence_count;
|
||||
|
||||
public:
|
||||
std::string new_mangled_name(const std::string& str);
|
||||
};
|
||||
72
code/compiler/13/parse_driver.cpp
Normal file
72
code/compiler/13/parse_driver.cpp
Normal file
@@ -0,0 +1,72 @@
|
||||
#include "parse_driver.hpp"
|
||||
#include "scanner.hpp"
|
||||
#include <sstream>
|
||||
|
||||
file_mgr::file_mgr() : file_offset(0) {
|
||||
line_offsets.push_back(0);
|
||||
}
|
||||
|
||||
void file_mgr::write(const char* buf, size_t len) {
|
||||
string_stream.write(buf, len);
|
||||
file_offset += len;
|
||||
}
|
||||
|
||||
void file_mgr::mark_line() {
|
||||
line_offsets.push_back(file_offset);
|
||||
}
|
||||
|
||||
void file_mgr::finalize() {
|
||||
file_contents = string_stream.str();
|
||||
}
|
||||
|
||||
size_t file_mgr::get_index(int line, int column) const {
|
||||
assert(line > 0 && line <= line_offsets.size());
|
||||
return line_offsets.at(line-1) + column - 1;
|
||||
}
|
||||
|
||||
size_t file_mgr::get_line_end(int line) const {
|
||||
if(line == line_offsets.size()) return file_contents.size();
|
||||
return get_index(line+1, 1);
|
||||
}
|
||||
|
||||
void file_mgr::print_location(
|
||||
std::ostream& stream,
|
||||
const yy::location& loc,
|
||||
bool highlight) const {
|
||||
size_t print_start = get_index(loc.begin.line, 1);
|
||||
size_t highlight_start = get_index(loc.begin.line, loc.begin.column);
|
||||
size_t highlight_end = get_index(loc.end.line, loc.end.column);
|
||||
size_t print_end = get_line_end(loc.end.line);
|
||||
const char* content = file_contents.c_str();
|
||||
stream.write(content + print_start, highlight_start - print_start);
|
||||
if(highlight) stream << "\033[4;31m";
|
||||
stream.write(content + highlight_start, highlight_end - highlight_start);
|
||||
if(highlight) stream << "\033[0m";
|
||||
stream.write(content + highlight_end, print_end - highlight_end);
|
||||
}
|
||||
|
||||
bool parse_driver::operator()() {
|
||||
FILE* stream = fopen(file_name.c_str(), "r");
|
||||
if(!stream) return false;
|
||||
yyscan_t scanner;
|
||||
yylex_init(&scanner);
|
||||
yyset_in(stream, scanner);
|
||||
yy::parser parser(scanner, *this);
|
||||
parser();
|
||||
yylex_destroy(scanner);
|
||||
fclose(stream);
|
||||
file_m->finalize();
|
||||
return true;
|
||||
}
|
||||
|
||||
yy::location& parse_driver::get_current_location() {
|
||||
return location;
|
||||
}
|
||||
|
||||
file_mgr& parse_driver::get_file_manager() const {
|
||||
return *file_m;
|
||||
}
|
||||
|
||||
definition_group& parse_driver::get_global_defs() const {
|
||||
return *global_defs;
|
||||
}
|
||||
58
code/compiler/13/parse_driver.hpp
Normal file
58
code/compiler/13/parse_driver.hpp
Normal file
@@ -0,0 +1,58 @@
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include "definition.hpp"
|
||||
#include "location.hh"
|
||||
#include "parser.hpp"
|
||||
|
||||
struct parse_driver;
|
||||
|
||||
void scanner_init(parse_driver* d, yyscan_t* scanner);
|
||||
void scanner_destroy(yyscan_t* scanner);
|
||||
|
||||
class file_mgr {
|
||||
private:
|
||||
std::ostringstream string_stream;
|
||||
std::string file_contents;
|
||||
|
||||
size_t file_offset;
|
||||
std::vector<size_t> line_offsets;
|
||||
public:
|
||||
file_mgr();
|
||||
|
||||
void write(const char* buffer, size_t len);
|
||||
void mark_line();
|
||||
void finalize();
|
||||
|
||||
size_t get_index(int line, int column) const;
|
||||
size_t get_line_end(int line) const;
|
||||
void print_location(
|
||||
std::ostream& stream,
|
||||
const yy::location& loc,
|
||||
bool highlight = true) const;
|
||||
};
|
||||
|
||||
class parse_driver {
|
||||
private:
|
||||
std::string file_name;
|
||||
yy::location location;
|
||||
definition_group* global_defs;
|
||||
file_mgr* file_m;
|
||||
|
||||
public:
|
||||
parse_driver(
|
||||
file_mgr& mgr,
|
||||
definition_group& defs,
|
||||
const std::string& file)
|
||||
: file_name(file), file_m(&mgr), global_defs(&defs) {}
|
||||
|
||||
bool operator()();
|
||||
yy::location& get_current_location();
|
||||
file_mgr& get_file_manager() const;
|
||||
definition_group& get_global_defs() const;
|
||||
};
|
||||
|
||||
#define YY_DECL yy::parser::symbol_type yylex(yyscan_t yyscanner, parse_driver& drv)
|
||||
|
||||
YY_DECL;
|
||||
48
code/compiler/13/parsed_type.cpp
Normal file
48
code/compiler/13/parsed_type.cpp
Normal file
@@ -0,0 +1,48 @@
|
||||
#include "parsed_type.hpp"
|
||||
#include <sstream>
|
||||
#include "type.hpp"
|
||||
#include "type_env.hpp"
|
||||
#include "error.hpp"
|
||||
|
||||
type_ptr parsed_type_app::to_type(
|
||||
const std::set<std::string>& vars,
|
||||
const type_env& e) const {
|
||||
auto parent_type = e.lookup_type(name);
|
||||
if(parent_type == nullptr)
|
||||
throw type_error("no such type or type constructor " + name);
|
||||
type_base* base_type;
|
||||
if(!(base_type = dynamic_cast<type_base*>(parent_type.get())))
|
||||
throw type_error("invalid type " + name);
|
||||
if(base_type->arity != arguments.size()) {
|
||||
std::ostringstream error_stream;
|
||||
error_stream << "invalid application of type ";
|
||||
error_stream << name;
|
||||
error_stream << " (" << base_type->arity << " argument(s) expected, ";
|
||||
error_stream << "but " << arguments.size() << " provided)";
|
||||
throw type_error(error_stream.str());
|
||||
}
|
||||
|
||||
type_app* new_app = new type_app(std::move(parent_type));
|
||||
type_ptr to_return(new_app);
|
||||
for(auto& arg : arguments) {
|
||||
new_app->arguments.push_back(arg->to_type(vars, e));
|
||||
}
|
||||
return to_return;
|
||||
}
|
||||
|
||||
type_ptr parsed_type_var::to_type(
|
||||
const std::set<std::string>& vars,
|
||||
const type_env& e) const {
|
||||
if(vars.find(var) == vars.end())
|
||||
throw type_error("the type variable " + var + " was not explicitly declared.");
|
||||
return type_ptr(new type_var(var));
|
||||
}
|
||||
|
||||
|
||||
type_ptr parsed_type_arr::to_type(
|
||||
const std::set<std::string>& vars,
|
||||
const type_env& env) const {
|
||||
auto new_left = left->to_type(vars, env);
|
||||
auto new_right = right->to_type(vars, env);
|
||||
return type_ptr(new type_arr(std::move(new_left), std::move(new_right)));
|
||||
}
|
||||
43
code/compiler/13/parsed_type.hpp
Normal file
43
code/compiler/13/parsed_type.hpp
Normal file
@@ -0,0 +1,43 @@
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include "type_env.hpp"
|
||||
|
||||
struct parsed_type {
|
||||
virtual type_ptr to_type(
|
||||
const std::set<std::string>& vars,
|
||||
const type_env& env) const = 0;
|
||||
};
|
||||
|
||||
using parsed_type_ptr = std::unique_ptr<parsed_type>;
|
||||
|
||||
struct parsed_type_app : parsed_type {
|
||||
std::string name;
|
||||
std::vector<parsed_type_ptr> arguments;
|
||||
|
||||
parsed_type_app(
|
||||
std::string n,
|
||||
std::vector<parsed_type_ptr> as)
|
||||
: name(std::move(n)), arguments(std::move(as)) {}
|
||||
|
||||
type_ptr to_type(const std::set<std::string>& vars, const type_env& env) const;
|
||||
};
|
||||
|
||||
struct parsed_type_var : parsed_type {
|
||||
std::string var;
|
||||
|
||||
parsed_type_var(std::string v) : var(std::move(v)) {}
|
||||
|
||||
type_ptr to_type(const std::set<std::string>& vars, const type_env& env) const;
|
||||
};
|
||||
|
||||
struct parsed_type_arr : parsed_type {
|
||||
parsed_type_ptr left;
|
||||
parsed_type_ptr right;
|
||||
|
||||
parsed_type_arr(parsed_type_ptr l, parsed_type_ptr r)
|
||||
: left(std::move(l)), right(std::move(r)) {}
|
||||
|
||||
type_ptr to_type(const std::set<std::string>& vars, const type_env& env) const;
|
||||
};
|
||||
180
code/compiler/13/parser.y
Normal file
180
code/compiler/13/parser.y
Normal file
@@ -0,0 +1,180 @@
|
||||
%code requires {
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "ast.hpp"
|
||||
#include "definition.hpp"
|
||||
#include "parser.hpp"
|
||||
#include "parsed_type.hpp"
|
||||
|
||||
class parse_driver;
|
||||
using yyscan_t = void*;
|
||||
}
|
||||
|
||||
%param { yyscan_t scanner }
|
||||
%param { parse_driver& drv }
|
||||
|
||||
%code {
|
||||
#include "parse_driver.hpp"
|
||||
}
|
||||
|
||||
%token BACKSLASH
|
||||
%token PLUS
|
||||
%token TIMES
|
||||
%token MINUS
|
||||
%token DIVIDE
|
||||
%token <int> INT
|
||||
%token DEFN
|
||||
%token DATA
|
||||
%token CASE
|
||||
%token OF
|
||||
%token LET
|
||||
%token IN
|
||||
%token OCURLY
|
||||
%token CCURLY
|
||||
%token OPAREN
|
||||
%token CPAREN
|
||||
%token COMMA
|
||||
%token ARROW
|
||||
%token EQUAL
|
||||
%token <std::string> LID
|
||||
%token <std::string> UID
|
||||
|
||||
%language "c++"
|
||||
%define api.value.type variant
|
||||
%define api.token.constructor
|
||||
|
||||
%locations
|
||||
|
||||
%type <std::vector<std::string>> lowercaseParams
|
||||
%type <std::vector<branch_ptr>> branches
|
||||
%type <std::vector<constructor_ptr>> constructors
|
||||
%type <std::vector<parsed_type_ptr>> typeList
|
||||
%type <definition_group> definitions
|
||||
%type <parsed_type_ptr> type nonArrowType typeListElement
|
||||
%type <ast_ptr> aAdd aMul case let lambda app appBase
|
||||
%type <definition_data_ptr> data
|
||||
%type <definition_defn_ptr> defn
|
||||
%type <branch_ptr> branch
|
||||
%type <pattern_ptr> pattern
|
||||
%type <constructor_ptr> constructor
|
||||
|
||||
%start program
|
||||
|
||||
%%
|
||||
|
||||
program
|
||||
: definitions { $1.vis = visibility::global; std::swap(drv.get_global_defs(), $1); }
|
||||
;
|
||||
|
||||
definitions
|
||||
: definitions defn { $$ = std::move($1); auto name = $2->name; $$.defs_defn[name] = std::move($2); }
|
||||
| definitions data { $$ = std::move($1); auto name = $2->name; $$.defs_data[name] = std::move($2); }
|
||||
| %empty { $$ = definition_group(); }
|
||||
;
|
||||
|
||||
defn
|
||||
: DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY
|
||||
{ $$ = definition_defn_ptr(
|
||||
new definition_defn(std::move($2), std::move($3), std::move($6), @$)); }
|
||||
;
|
||||
|
||||
lowercaseParams
|
||||
: %empty { $$ = std::vector<std::string>(); }
|
||||
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||
;
|
||||
|
||||
aAdd
|
||||
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3), @$)); }
|
||||
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3), @$)); }
|
||||
| aMul { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
aMul
|
||||
: aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3), @$)); }
|
||||
| aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3), @$)); }
|
||||
| app { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
app
|
||||
: app appBase { $$ = ast_ptr(new ast_app(std::move($1), std::move($2), @$)); }
|
||||
| appBase { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
appBase
|
||||
: INT { $$ = ast_ptr(new ast_int($1, @$)); }
|
||||
| LID { $$ = ast_ptr(new ast_lid(std::move($1), @$)); }
|
||||
| UID { $$ = ast_ptr(new ast_uid(std::move($1), @$)); }
|
||||
| OPAREN aAdd CPAREN { $$ = std::move($2); }
|
||||
| case { $$ = std::move($1); }
|
||||
| let { $$ = std::move($1); }
|
||||
| lambda { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
let
|
||||
: LET OCURLY definitions CCURLY IN OCURLY aAdd CCURLY
|
||||
{ $$ = ast_ptr(new ast_let(std::move($3), std::move($7), @$)); }
|
||||
;
|
||||
|
||||
lambda
|
||||
: BACKSLASH lowercaseParams ARROW OCURLY aAdd CCURLY
|
||||
{ $$ = ast_ptr(new ast_lambda(std::move($2), std::move($5), @$)); }
|
||||
;
|
||||
|
||||
case
|
||||
: CASE aAdd OF OCURLY branches CCURLY
|
||||
{ $$ = ast_ptr(new ast_case(std::move($2), std::move($5), @$)); }
|
||||
;
|
||||
|
||||
branches
|
||||
: branches branch { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||
| branch { $$ = std::vector<branch_ptr>(); $$.push_back(std::move($1));}
|
||||
;
|
||||
|
||||
branch
|
||||
: pattern ARROW OCURLY aAdd CCURLY
|
||||
{ $$ = branch_ptr(new branch(std::move($1), std::move($4))); }
|
||||
;
|
||||
|
||||
pattern
|
||||
: LID { $$ = pattern_ptr(new pattern_var(std::move($1), @$)); }
|
||||
| UID lowercaseParams
|
||||
{ $$ = pattern_ptr(new pattern_constr(std::move($1), std::move($2), @$)); }
|
||||
;
|
||||
|
||||
data
|
||||
: DATA UID lowercaseParams EQUAL OCURLY constructors CCURLY
|
||||
{ $$ = definition_data_ptr(new definition_data(std::move($2), std::move($3), std::move($6), @$)); }
|
||||
;
|
||||
|
||||
constructors
|
||||
: constructors COMMA constructor { $$ = std::move($1); $$.push_back(std::move($3)); }
|
||||
| constructor
|
||||
{ $$ = std::vector<constructor_ptr>(); $$.push_back(std::move($1)); }
|
||||
;
|
||||
|
||||
constructor
|
||||
: UID typeList
|
||||
{ $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); }
|
||||
;
|
||||
|
||||
type
|
||||
: nonArrowType ARROW type { $$ = parsed_type_ptr(new parsed_type_arr(std::move($1), std::move($3))); }
|
||||
| nonArrowType { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
nonArrowType
|
||||
: UID typeList { $$ = parsed_type_ptr(new parsed_type_app(std::move($1), std::move($2))); }
|
||||
| LID { $$ = parsed_type_ptr(new parsed_type_var(std::move($1))); }
|
||||
| OPAREN type CPAREN { $$ = std::move($2); }
|
||||
;
|
||||
|
||||
typeListElement
|
||||
: OPAREN type CPAREN { $$ = std::move($2); }
|
||||
| UID { $$ = parsed_type_ptr(new parsed_type_app(std::move($1), {})); }
|
||||
| LID { $$ = parsed_type_ptr(new parsed_type_var(std::move($1))); }
|
||||
;
|
||||
|
||||
typeList
|
||||
: %empty { $$ = std::vector<parsed_type_ptr>(); }
|
||||
| typeList typeListElement { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||
;
|
||||
269
code/compiler/13/runtime.c
Normal file
269
code/compiler/13/runtime.c
Normal file
@@ -0,0 +1,269 @@
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <memory.h>
|
||||
#include <stdio.h>
|
||||
#include "runtime.h"
|
||||
|
||||
struct node_base* alloc_node() {
|
||||
struct node_base* new_node = malloc(sizeof(struct node_app));
|
||||
new_node->gc_next = NULL;
|
||||
new_node->gc_reachable = 0;
|
||||
assert(new_node != NULL);
|
||||
return new_node;
|
||||
}
|
||||
|
||||
struct node_app* alloc_app(struct node_base* l, struct node_base* r) {
|
||||
struct node_app* node = (struct node_app*) alloc_node();
|
||||
node->base.tag = NODE_APP;
|
||||
node->left = l;
|
||||
node->right = r;
|
||||
return node;
|
||||
}
|
||||
|
||||
struct node_num* alloc_num(int32_t n) {
|
||||
struct node_num* node = (struct node_num*) alloc_node();
|
||||
node->base.tag = NODE_NUM;
|
||||
node->value = n;
|
||||
return node;
|
||||
}
|
||||
|
||||
struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a) {
|
||||
struct node_global* node = (struct node_global*) alloc_node();
|
||||
node->base.tag = NODE_GLOBAL;
|
||||
node->arity = a;
|
||||
node->function = f;
|
||||
return node;
|
||||
}
|
||||
|
||||
struct node_ind* alloc_ind(struct node_base* n) {
|
||||
struct node_ind* node = (struct node_ind*) alloc_node();
|
||||
node->base.tag = NODE_IND;
|
||||
node->next = n;
|
||||
return node;
|
||||
}
|
||||
|
||||
void free_node_direct(struct node_base* n) {
|
||||
if(n->tag == NODE_DATA) {
|
||||
free(((struct node_data*) n)->array);
|
||||
}
|
||||
}
|
||||
|
||||
void gc_visit_node(struct node_base* n) {
|
||||
if(n->gc_reachable) return;
|
||||
n->gc_reachable = 1;
|
||||
|
||||
if(n->tag == NODE_APP) {
|
||||
struct node_app* app = (struct node_app*) n;
|
||||
gc_visit_node(app->left);
|
||||
gc_visit_node(app->right);
|
||||
} if(n->tag == NODE_IND) {
|
||||
struct node_ind* ind = (struct node_ind*) n;
|
||||
gc_visit_node(ind->next);
|
||||
} if(n->tag == NODE_DATA) {
|
||||
struct node_data* data = (struct node_data*) n;
|
||||
struct node_base** to_visit = data->array;
|
||||
while(*to_visit) {
|
||||
gc_visit_node(*to_visit);
|
||||
to_visit++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void stack_init(struct stack* s) {
|
||||
s->size = 4;
|
||||
s->count = 0;
|
||||
s->data = malloc(sizeof(*s->data) * s->size);
|
||||
assert(s->data != NULL);
|
||||
}
|
||||
|
||||
void stack_free(struct stack* s) {
|
||||
free(s->data);
|
||||
}
|
||||
|
||||
void stack_push(struct stack* s, struct node_base* n) {
|
||||
while(s->count >= s->size) {
|
||||
s->data = realloc(s->data, sizeof(*s->data) * (s->size *= 2));
|
||||
assert(s->data != NULL);
|
||||
}
|
||||
s->data[s->count++] = n;
|
||||
}
|
||||
|
||||
struct node_base* stack_pop(struct stack* s) {
|
||||
assert(s->count > 0);
|
||||
return s->data[--s->count];
|
||||
}
|
||||
|
||||
struct node_base* stack_peek(struct stack* s, size_t o) {
|
||||
assert(s->count > o);
|
||||
return s->data[s->count - o - 1];
|
||||
}
|
||||
|
||||
void stack_popn(struct stack* s, size_t n) {
|
||||
assert(s->count >= n);
|
||||
s->count -= n;
|
||||
}
|
||||
|
||||
void gmachine_init(struct gmachine* g) {
|
||||
stack_init(&g->stack);
|
||||
g->gc_nodes = NULL;
|
||||
g->gc_node_count = 0;
|
||||
g->gc_node_threshold = 128;
|
||||
}
|
||||
|
||||
void gmachine_free(struct gmachine* g) {
|
||||
stack_free(&g->stack);
|
||||
struct node_base* to_free = g->gc_nodes;
|
||||
struct node_base* next;
|
||||
|
||||
while(to_free) {
|
||||
next = to_free->gc_next;
|
||||
free_node_direct(to_free);
|
||||
free(to_free);
|
||||
to_free = next;
|
||||
}
|
||||
}
|
||||
|
||||
void gmachine_slide(struct gmachine* g, size_t n) {
|
||||
assert(g->stack.count > n);
|
||||
g->stack.data[g->stack.count - n - 1] = g->stack.data[g->stack.count - 1];
|
||||
g->stack.count -= n;
|
||||
}
|
||||
|
||||
void gmachine_update(struct gmachine* g, size_t o) {
|
||||
assert(g->stack.count > o + 1);
|
||||
struct node_ind* ind =
|
||||
(struct node_ind*) g->stack.data[g->stack.count - o - 2];
|
||||
ind->base.tag = NODE_IND;
|
||||
ind->next = g->stack.data[g->stack.count -= 1];
|
||||
}
|
||||
|
||||
void gmachine_alloc(struct gmachine* g, size_t o) {
|
||||
while(o--) {
|
||||
stack_push(&g->stack,
|
||||
gmachine_track(g, (struct node_base*) alloc_ind(NULL)));
|
||||
}
|
||||
}
|
||||
|
||||
void gmachine_pack(struct gmachine* g, size_t n, int8_t t) {
|
||||
assert(g->stack.count >= n);
|
||||
|
||||
struct node_base** data = malloc(sizeof(*data) * (n + 1));
|
||||
assert(data != NULL);
|
||||
memcpy(data, &g->stack.data[g->stack.count - n], n * sizeof(*data));
|
||||
data[n] = NULL;
|
||||
|
||||
struct node_data* new_node = (struct node_data*) alloc_node();
|
||||
new_node->array = data;
|
||||
new_node->base.tag = NODE_DATA;
|
||||
new_node->tag = t;
|
||||
|
||||
stack_popn(&g->stack, n);
|
||||
stack_push(&g->stack, gmachine_track(g, (struct node_base*) new_node));
|
||||
}
|
||||
|
||||
void gmachine_split(struct gmachine* g, size_t n) {
|
||||
struct node_data* node = (struct node_data*) stack_pop(&g->stack);
|
||||
for(size_t i = 0; i < n; i++) {
|
||||
stack_push(&g->stack, node->array[i]);
|
||||
}
|
||||
}
|
||||
|
||||
struct node_base* gmachine_track(struct gmachine* g, struct node_base* b) {
|
||||
g->gc_node_count++;
|
||||
b->gc_next = g->gc_nodes;
|
||||
g->gc_nodes = b;
|
||||
|
||||
if(g->gc_node_count >= g->gc_node_threshold) {
|
||||
uint64_t nodes_before = g->gc_node_count;
|
||||
gc_visit_node(b);
|
||||
gmachine_gc(g);
|
||||
g->gc_node_threshold = g->gc_node_count * 2;
|
||||
}
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
void gmachine_gc(struct gmachine* g) {
|
||||
for(size_t i = 0; i < g->stack.count; i++) {
|
||||
gc_visit_node(g->stack.data[i]);
|
||||
}
|
||||
|
||||
struct node_base** head_ptr = &g->gc_nodes;
|
||||
while(*head_ptr) {
|
||||
if((*head_ptr)->gc_reachable) {
|
||||
(*head_ptr)->gc_reachable = 0;
|
||||
head_ptr = &(*head_ptr)->gc_next;
|
||||
} else {
|
||||
struct node_base* to_free = *head_ptr;
|
||||
*head_ptr = to_free->gc_next;
|
||||
free_node_direct(to_free);
|
||||
free(to_free);
|
||||
g->gc_node_count--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void unwind(struct gmachine* g) {
|
||||
struct stack* s = &g->stack;
|
||||
|
||||
while(1) {
|
||||
struct node_base* peek = stack_peek(s, 0);
|
||||
if(peek->tag == NODE_APP) {
|
||||
struct node_app* n = (struct node_app*) peek;
|
||||
stack_push(s, n->left);
|
||||
} else if(peek->tag == NODE_GLOBAL) {
|
||||
struct node_global* n = (struct node_global*) peek;
|
||||
assert(s->count > n->arity);
|
||||
|
||||
for(size_t i = 1; i <= n->arity; i++) {
|
||||
s->data[s->count - i]
|
||||
= ((struct node_app*) s->data[s->count - i - 1])->right;
|
||||
}
|
||||
|
||||
n->function(g);
|
||||
} else if(peek->tag == NODE_IND) {
|
||||
struct node_ind* n = (struct node_ind*) peek;
|
||||
stack_pop(s);
|
||||
stack_push(s, n->next);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern void f_main(struct gmachine* s);
|
||||
|
||||
void print_node(struct node_base* n) {
|
||||
if(n->tag == NODE_APP) {
|
||||
struct node_app* app = (struct node_app*) n;
|
||||
print_node(app->left);
|
||||
putchar(' ');
|
||||
print_node(app->right);
|
||||
} else if(n->tag == NODE_DATA) {
|
||||
printf("(Packed)");
|
||||
} else if(n->tag == NODE_GLOBAL) {
|
||||
struct node_global* global = (struct node_global*) n;
|
||||
printf("(Global: %p)", global->function);
|
||||
} else if(n->tag == NODE_IND) {
|
||||
print_node(((struct node_ind*) n)->next);
|
||||
} else if(n->tag == NODE_NUM) {
|
||||
struct node_num* num = (struct node_num*) n;
|
||||
printf("%d", num->value);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
struct gmachine gmachine;
|
||||
struct node_global* first_node = alloc_global(f_main, 0);
|
||||
struct node_base* result;
|
||||
|
||||
gmachine_init(&gmachine);
|
||||
gmachine_track(&gmachine, (struct node_base*) first_node);
|
||||
stack_push(&gmachine.stack, (struct node_base*) first_node);
|
||||
unwind(&gmachine);
|
||||
result = stack_pop(&gmachine.stack);
|
||||
printf("Result: ");
|
||||
print_node(result);
|
||||
putchar('\n');
|
||||
gmachine_free(&gmachine);
|
||||
}
|
||||
84
code/compiler/13/runtime.h
Normal file
84
code/compiler/13/runtime.h
Normal file
@@ -0,0 +1,84 @@
|
||||
#pragma once
|
||||
#include <stdlib.h>
|
||||
|
||||
struct gmachine;
|
||||
|
||||
enum node_tag {
|
||||
NODE_APP,
|
||||
NODE_NUM,
|
||||
NODE_GLOBAL,
|
||||
NODE_IND,
|
||||
NODE_DATA
|
||||
};
|
||||
|
||||
struct node_base {
|
||||
enum node_tag tag;
|
||||
int8_t gc_reachable;
|
||||
struct node_base* gc_next;
|
||||
};
|
||||
|
||||
struct node_app {
|
||||
struct node_base base;
|
||||
struct node_base* left;
|
||||
struct node_base* right;
|
||||
};
|
||||
|
||||
struct node_num {
|
||||
struct node_base base;
|
||||
int32_t value;
|
||||
};
|
||||
|
||||
struct node_global {
|
||||
struct node_base base;
|
||||
int32_t arity;
|
||||
void (*function)(struct gmachine*);
|
||||
};
|
||||
|
||||
struct node_ind {
|
||||
struct node_base base;
|
||||
struct node_base* next;
|
||||
};
|
||||
|
||||
struct node_data {
|
||||
struct node_base base;
|
||||
int8_t tag;
|
||||
struct node_base** array;
|
||||
};
|
||||
|
||||
struct node_base* alloc_node();
|
||||
struct node_app* alloc_app(struct node_base* l, struct node_base* r);
|
||||
struct node_num* alloc_num(int32_t n);
|
||||
struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a);
|
||||
struct node_ind* alloc_ind(struct node_base* n);
|
||||
void free_node_direct(struct node_base*);
|
||||
void gc_visit_node(struct node_base*);
|
||||
|
||||
struct stack {
|
||||
size_t size;
|
||||
size_t count;
|
||||
struct node_base** data;
|
||||
};
|
||||
|
||||
void stack_init(struct stack* s);
|
||||
void stack_free(struct stack* s);
|
||||
void stack_push(struct stack* s, struct node_base* n);
|
||||
struct node_base* stack_pop(struct stack* s);
|
||||
struct node_base* stack_peek(struct stack* s, size_t o);
|
||||
void stack_popn(struct stack* s, size_t n);
|
||||
|
||||
struct gmachine {
|
||||
struct stack stack;
|
||||
struct node_base* gc_nodes;
|
||||
int64_t gc_node_count;
|
||||
int64_t gc_node_threshold;
|
||||
};
|
||||
|
||||
void gmachine_init(struct gmachine* g);
|
||||
void gmachine_free(struct gmachine* g);
|
||||
void gmachine_slide(struct gmachine* g, size_t n);
|
||||
void gmachine_update(struct gmachine* g, size_t o);
|
||||
void gmachine_alloc(struct gmachine* g, size_t o);
|
||||
void gmachine_pack(struct gmachine* g, size_t n, int8_t t);
|
||||
void gmachine_split(struct gmachine* g, size_t n);
|
||||
struct node_base* gmachine_track(struct gmachine* g, struct node_base* b);
|
||||
void gmachine_gc(struct gmachine* g);
|
||||
45
code/compiler/13/scanner.l
Normal file
45
code/compiler/13/scanner.l
Normal file
@@ -0,0 +1,45 @@
|
||||
%option noyywrap
|
||||
%option reentrant
|
||||
%option header-file="scanner.hpp"
|
||||
|
||||
%{
|
||||
#include <iostream>
|
||||
#include "ast.hpp"
|
||||
#include "definition.hpp"
|
||||
#include "parse_driver.hpp"
|
||||
#include "parser.hpp"
|
||||
|
||||
#define YY_USER_ACTION \
|
||||
drv.get_file_manager().write(yytext, yyleng); \
|
||||
LOC.step(); LOC.columns(yyleng);
|
||||
#define LOC drv.get_current_location()
|
||||
%}
|
||||
|
||||
%%
|
||||
|
||||
\n { drv.get_current_location().lines(); drv.get_file_manager().mark_line(); }
|
||||
[ ]+ {}
|
||||
\\ { return yy::parser::make_BACKSLASH(LOC); }
|
||||
\+ { return yy::parser::make_PLUS(LOC); }
|
||||
\* { return yy::parser::make_TIMES(LOC); }
|
||||
- { return yy::parser::make_MINUS(LOC); }
|
||||
\/ { return yy::parser::make_DIVIDE(LOC); }
|
||||
[0-9]+ { return yy::parser::make_INT(atoi(yytext), LOC); }
|
||||
defn { return yy::parser::make_DEFN(LOC); }
|
||||
data { return yy::parser::make_DATA(LOC); }
|
||||
case { return yy::parser::make_CASE(LOC); }
|
||||
of { return yy::parser::make_OF(LOC); }
|
||||
let { return yy::parser::make_LET(LOC); }
|
||||
in { return yy::parser::make_IN(LOC); }
|
||||
\{ { return yy::parser::make_OCURLY(LOC); }
|
||||
\} { return yy::parser::make_CCURLY(LOC); }
|
||||
\( { return yy::parser::make_OPAREN(LOC); }
|
||||
\) { return yy::parser::make_CPAREN(LOC); }
|
||||
, { return yy::parser::make_COMMA(LOC); }
|
||||
-> { return yy::parser::make_ARROW(LOC); }
|
||||
= { return yy::parser::make_EQUAL(LOC); }
|
||||
[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext), LOC); }
|
||||
[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext), LOC); }
|
||||
<<EOF>> { return yy::parser::make_YYEOF(LOC); }
|
||||
|
||||
%%
|
||||
23
code/compiler/13/test.cpp
Normal file
23
code/compiler/13/test.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
#include "graph.hpp"
|
||||
|
||||
int main() {
|
||||
function_graph graph;
|
||||
graph.add_edge("f", "g");
|
||||
graph.add_edge("g", "h");
|
||||
graph.add_edge("h", "f");
|
||||
|
||||
graph.add_edge("i", "j");
|
||||
graph.add_edge("j", "i");
|
||||
|
||||
graph.add_edge("j", "f");
|
||||
|
||||
graph.add_edge("x", "f");
|
||||
graph.add_edge("x", "i");
|
||||
|
||||
for(auto& group : graph.compute_order()) {
|
||||
std::cout << "Group: " << std::endl;
|
||||
for(auto& member : group->members) {
|
||||
std::cout << member << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
213
code/compiler/13/type.cpp
Normal file
213
code/compiler/13/type.cpp
Normal file
@@ -0,0 +1,213 @@
|
||||
#include "type.hpp"
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include "error.hpp"
|
||||
|
||||
void type_scheme::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
if(forall.size() != 0) {
|
||||
to << "forall ";
|
||||
for(auto& var : forall) {
|
||||
to << var << " ";
|
||||
}
|
||||
to << ". ";
|
||||
}
|
||||
monotype->print(mgr, to);
|
||||
}
|
||||
|
||||
type_ptr type_scheme::instantiate(type_mgr& mgr) const {
|
||||
if(forall.size() == 0) return monotype;
|
||||
std::map<std::string, type_ptr> subst;
|
||||
for(auto& var : forall) {
|
||||
subst[var] = mgr.new_type();
|
||||
}
|
||||
return mgr.substitute(subst, monotype);
|
||||
}
|
||||
|
||||
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
auto type = mgr.lookup(name);
|
||||
if(type) {
|
||||
type->print(mgr, to);
|
||||
} else {
|
||||
to << name;
|
||||
}
|
||||
}
|
||||
|
||||
void type_base::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
to << name;
|
||||
}
|
||||
|
||||
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
type_var* var;
|
||||
bool print_parenths = dynamic_cast<type_arr*>(mgr.resolve(left, var).get()) != nullptr;
|
||||
if(print_parenths) to << "(";
|
||||
left->print(mgr, to);
|
||||
if(print_parenths) to << ")";
|
||||
to << " -> ";
|
||||
right->print(mgr, to);
|
||||
}
|
||||
|
||||
void type_app::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
constructor->print(mgr, to);
|
||||
to << "*";
|
||||
for(auto& arg : arguments) {
|
||||
to << " ";
|
||||
arg->print(mgr, to);
|
||||
}
|
||||
}
|
||||
|
||||
std::string type_mgr::new_type_name() {
|
||||
int temp = last_id++;
|
||||
std::string str = "";
|
||||
|
||||
while(temp != -1) {
|
||||
str += (char) ('a' + (temp % 26));
|
||||
temp = temp / 26 - 1;
|
||||
}
|
||||
|
||||
std::reverse(str.begin(), str.end());
|
||||
return str;
|
||||
}
|
||||
|
||||
type_ptr type_mgr::new_type() {
|
||||
return type_ptr(new type_var(new_type_name()));
|
||||
}
|
||||
|
||||
type_ptr type_mgr::new_arrow_type() {
|
||||
return type_ptr(new type_arr(new_type(), new_type()));
|
||||
}
|
||||
|
||||
type_ptr type_mgr::lookup(const std::string& var) const {
|
||||
auto types_it = types.find(var);
|
||||
if(types_it != types.end()) return types_it->second;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
|
||||
type_var* cast;
|
||||
|
||||
var = nullptr;
|
||||
while((cast = dynamic_cast<type_var*>(t.get()))) {
|
||||
auto it = types.find(cast->name);
|
||||
|
||||
if(it == types.end()) {
|
||||
var = cast;
|
||||
break;
|
||||
}
|
||||
t = it->second;
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
void type_mgr::unify(type_ptr l, type_ptr r, const std::optional<yy::location>& loc) {
|
||||
type_var *lvar, *rvar;
|
||||
type_arr *larr, *rarr;
|
||||
type_base *lid, *rid;
|
||||
type_app *lapp, *rapp;
|
||||
|
||||
l = resolve(l, lvar);
|
||||
r = resolve(r, rvar);
|
||||
|
||||
if(lvar) {
|
||||
bind(lvar->name, r);
|
||||
return;
|
||||
} else if(rvar) {
|
||||
bind(rvar->name, l);
|
||||
return;
|
||||
} else if((larr = dynamic_cast<type_arr*>(l.get())) &&
|
||||
(rarr = dynamic_cast<type_arr*>(r.get()))) {
|
||||
unify(larr->left, rarr->left, loc);
|
||||
unify(larr->right, rarr->right, loc);
|
||||
return;
|
||||
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
|
||||
(rid = dynamic_cast<type_base*>(r.get()))) {
|
||||
if(lid->name == rid->name &&
|
||||
lid->arity == rid->arity)
|
||||
return;
|
||||
} else if((lapp = dynamic_cast<type_app*>(l.get())) &&
|
||||
(rapp = dynamic_cast<type_app*>(r.get()))) {
|
||||
unify(lapp->constructor, rapp->constructor, loc);
|
||||
auto left_it = lapp->arguments.begin();
|
||||
auto right_it = rapp->arguments.begin();
|
||||
while(left_it != lapp->arguments.end() &&
|
||||
right_it != rapp->arguments.end()) {
|
||||
unify(*left_it, *right_it, loc);
|
||||
left_it++, right_it++;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
throw unification_error(l, r, loc);
|
||||
}
|
||||
|
||||
type_ptr type_mgr::substitute(const std::map<std::string, type_ptr>& subst, const type_ptr& t) const {
|
||||
type_ptr temp = t;
|
||||
while(type_var* var = dynamic_cast<type_var*>(temp.get())) {
|
||||
auto subst_it = subst.find(var->name);
|
||||
if(subst_it != subst.end()) return subst_it->second;
|
||||
auto var_it = types.find(var->name);
|
||||
if(var_it == types.end()) return t;
|
||||
temp = var_it->second;
|
||||
}
|
||||
|
||||
if(type_arr* arr = dynamic_cast<type_arr*>(temp.get())) {
|
||||
auto left_result = substitute(subst, arr->left);
|
||||
auto right_result = substitute(subst, arr->right);
|
||||
if(left_result == arr->left && right_result == arr->right) return t;
|
||||
return type_ptr(new type_arr(left_result, right_result));
|
||||
} else if(type_app* app = dynamic_cast<type_app*>(temp.get())) {
|
||||
auto constructor_result = substitute(subst, app->constructor);
|
||||
bool arg_changed = false;
|
||||
std::vector<type_ptr> new_args;
|
||||
for(auto& arg : app->arguments) {
|
||||
auto arg_result = substitute(subst, arg);
|
||||
arg_changed |= arg_result != arg;
|
||||
new_args.push_back(std::move(arg_result));
|
||||
}
|
||||
|
||||
if(constructor_result == app->constructor && !arg_changed) return t;
|
||||
type_app* new_app = new type_app(std::move(constructor_result));
|
||||
std::swap(new_app->arguments, new_args);
|
||||
return type_ptr(new_app);
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
void type_mgr::bind(const std::string& s, type_ptr t) {
|
||||
type_var* other = dynamic_cast<type_var*>(t.get());
|
||||
|
||||
if(other && other->name == s) return;
|
||||
types[s] = t;
|
||||
}
|
||||
|
||||
void type_mgr::find_free(const type_ptr& t, std::set<std::string>& into) const {
|
||||
type_var* var;
|
||||
type_ptr resolved = resolve(t, var);
|
||||
|
||||
if(var) {
|
||||
into.insert(var->name);
|
||||
} else if(type_arr* arr = dynamic_cast<type_arr*>(resolved.get())) {
|
||||
find_free(arr->left, into);
|
||||
find_free(arr->right, into);
|
||||
} else if(type_app* app = dynamic_cast<type_app*>(resolved.get())) {
|
||||
find_free(app->constructor, into);
|
||||
for(auto& arg : app->arguments) find_free(arg, into);
|
||||
}
|
||||
}
|
||||
|
||||
void type_mgr::find_free(const type_scheme_ptr& t, std::set<std::string>& into) const {
|
||||
std::set<std::string> monotype_free;
|
||||
type_mgr limited_mgr;
|
||||
for(auto& binding : types) {
|
||||
auto existing_position = std::find(t->forall.begin(), t->forall.end(), binding.first);
|
||||
if(existing_position != t->forall.end()) continue;
|
||||
limited_mgr.types[binding.first] = binding.second;
|
||||
}
|
||||
limited_mgr.find_free(t->monotype, monotype_free);
|
||||
for(auto& not_free : t->forall) {
|
||||
monotype_free.erase(not_free);
|
||||
}
|
||||
into.insert(monotype_free.begin(), monotype_free.end());
|
||||
}
|
||||
101
code/compiler/13/type.hpp
Normal file
101
code/compiler/13/type.hpp
Normal file
@@ -0,0 +1,101 @@
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <optional>
|
||||
#include "location.hh"
|
||||
|
||||
class type_mgr;
|
||||
|
||||
struct type {
|
||||
virtual ~type() = default;
|
||||
|
||||
virtual void print(const type_mgr& mgr, std::ostream& to) const = 0;
|
||||
};
|
||||
|
||||
using type_ptr = std::shared_ptr<type>;
|
||||
|
||||
struct type_scheme {
|
||||
std::vector<std::string> forall;
|
||||
type_ptr monotype;
|
||||
|
||||
type_scheme(type_ptr type) : forall(), monotype(std::move(type)) {}
|
||||
|
||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||
type_ptr instantiate(type_mgr& mgr) const;
|
||||
};
|
||||
|
||||
using type_scheme_ptr = std::shared_ptr<type_scheme>;
|
||||
|
||||
struct type_var : public type {
|
||||
std::string name;
|
||||
|
||||
type_var(std::string n)
|
||||
: name(std::move(n)) {}
|
||||
|
||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||
};
|
||||
|
||||
struct type_base : public type {
|
||||
std::string name;
|
||||
int32_t arity;
|
||||
|
||||
type_base(std::string n, int32_t a = 0)
|
||||
: name(std::move(n)), arity(a) {}
|
||||
|
||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||
};
|
||||
|
||||
struct type_data : public type_base {
|
||||
struct constructor {
|
||||
int tag;
|
||||
};
|
||||
|
||||
std::map<std::string, constructor> constructors;
|
||||
|
||||
type_data(std::string n, int32_t a = 0)
|
||||
: type_base(std::move(n), a) {}
|
||||
};
|
||||
|
||||
struct type_arr : public type {
|
||||
type_ptr left;
|
||||
type_ptr right;
|
||||
|
||||
type_arr(type_ptr l, type_ptr r)
|
||||
: left(std::move(l)), right(std::move(r)) {}
|
||||
|
||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||
};
|
||||
|
||||
struct type_app : public type {
|
||||
type_ptr constructor;
|
||||
std::vector<type_ptr> arguments;
|
||||
|
||||
type_app(type_ptr c)
|
||||
: constructor(std::move(c)) {}
|
||||
|
||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||
};
|
||||
|
||||
class type_mgr {
|
||||
private:
|
||||
int last_id = 0;
|
||||
std::map<std::string, type_ptr> types;
|
||||
|
||||
public:
|
||||
std::string new_type_name();
|
||||
type_ptr new_type();
|
||||
type_ptr new_arrow_type();
|
||||
|
||||
void unify(type_ptr l, type_ptr r, const std::optional<yy::location>& loc = std::nullopt);
|
||||
type_ptr substitute(
|
||||
const std::map<std::string, type_ptr>& subst,
|
||||
const type_ptr& t) const;
|
||||
type_ptr lookup(const std::string& var) const;
|
||||
type_ptr resolve(type_ptr t, type_var*& var) const;
|
||||
void bind(const std::string& s, type_ptr t);
|
||||
void find_free(const type_ptr& t, std::set<std::string>& into) const;
|
||||
void find_free(const type_scheme_ptr& t, std::set<std::string>& into) const;
|
||||
};
|
||||
96
code/compiler/13/type_env.cpp
Normal file
96
code/compiler/13/type_env.cpp
Normal file
@@ -0,0 +1,96 @@
|
||||
#include "type_env.hpp"
|
||||
#include "type.hpp"
|
||||
#include "error.hpp"
|
||||
#include <cassert>
|
||||
|
||||
void type_env::find_free(const type_mgr& mgr, std::set<std::string>& into) const {
|
||||
if(parent != nullptr) parent->find_free(mgr, into);
|
||||
for(auto& binding : names) {
|
||||
mgr.find_free(binding.second.type, into);
|
||||
}
|
||||
}
|
||||
|
||||
void type_env::find_free_except(const type_mgr& mgr, const group& avoid,
|
||||
std::set<std::string>& into) const {
|
||||
if(parent != nullptr) parent->find_free(mgr, into);
|
||||
for(auto& binding : names) {
|
||||
if(avoid.members.find(binding.first) != avoid.members.end()) continue;
|
||||
mgr.find_free(binding.second.type, into);
|
||||
}
|
||||
}
|
||||
|
||||
type_scheme_ptr type_env::lookup(const std::string& name) const {
|
||||
auto it = names.find(name);
|
||||
if(it != names.end()) return it->second.type;
|
||||
if(parent) return parent->lookup(name);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool type_env::is_global(const std::string& name) const {
|
||||
auto it = names.find(name);
|
||||
if(it != names.end()) return it->second.vis == visibility::global;
|
||||
if(parent) return parent->is_global(name);
|
||||
return false;
|
||||
}
|
||||
|
||||
void type_env::set_mangled_name(const std::string& name, const std::string& mangled) {
|
||||
auto it = names.find(name);
|
||||
|
||||
// Can't set mangled name for non-existent variable.
|
||||
assert(it != names.end());
|
||||
// Local names shouldn't need mangling.
|
||||
assert(it->second.vis == visibility::global);
|
||||
|
||||
it->second.mangled_name = mangled;
|
||||
}
|
||||
|
||||
const std::string& type_env::get_mangled_name(const std::string& name) const {
|
||||
auto it = names.find(name);
|
||||
if(it != names.end()) {
|
||||
assert(it->second.mangled_name);
|
||||
return *it->second.mangled_name;
|
||||
}
|
||||
assert(parent != nullptr);
|
||||
return parent->get_mangled_name(name);
|
||||
}
|
||||
|
||||
type_ptr type_env::lookup_type(const std::string& name) const {
|
||||
auto it = type_names.find(name);
|
||||
if(it != type_names.end()) return it->second;
|
||||
if(parent) return parent->lookup_type(name);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void type_env::bind(const std::string& name, type_ptr t, visibility v) {
|
||||
type_scheme_ptr new_scheme(new type_scheme(std::move(t)));
|
||||
names[name] = variable_data(std::move(new_scheme), v, std::nullopt);
|
||||
}
|
||||
|
||||
void type_env::bind(const std::string& name, type_scheme_ptr t, visibility v) {
|
||||
names[name] = variable_data(std::move(t), v, "");
|
||||
}
|
||||
|
||||
void type_env::bind_type(const std::string& type_name, type_ptr t) {
|
||||
if(lookup_type(type_name) != nullptr)
|
||||
throw type_error("redefinition of type");
|
||||
type_names[type_name] = t;
|
||||
}
|
||||
|
||||
void type_env::generalize(const std::string& name, const group& grp, type_mgr& mgr) {
|
||||
auto names_it = names.find(name);
|
||||
assert(names_it != names.end());
|
||||
assert(names_it->second.type->forall.size() == 0);
|
||||
|
||||
std::set<std::string> free_in_type;
|
||||
std::set<std::string> free_in_env;
|
||||
mgr.find_free(names_it->second.type->monotype, free_in_type);
|
||||
find_free_except(mgr, grp, free_in_env);
|
||||
for(auto& free : free_in_type) {
|
||||
if(free_in_env.find(free) != free_in_env.end()) continue;
|
||||
names_it->second.type->forall.push_back(free);
|
||||
}
|
||||
}
|
||||
|
||||
type_env_ptr type_scope(type_env_ptr parent) {
|
||||
return type_env_ptr(new type_env(std::move(parent)));
|
||||
}
|
||||
52
code/compiler/13/type_env.hpp
Normal file
52
code/compiler/13/type_env.hpp
Normal file
@@ -0,0 +1,52 @@
|
||||
#pragma once
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <optional>
|
||||
#include "graph.hpp"
|
||||
#include "type.hpp"
|
||||
|
||||
struct type_env;
|
||||
using type_env_ptr = std::shared_ptr<type_env>;
|
||||
|
||||
enum class visibility { global,local };
|
||||
|
||||
class type_env {
|
||||
private:
|
||||
struct variable_data {
|
||||
type_scheme_ptr type;
|
||||
visibility vis;
|
||||
std::optional<std::string> mangled_name;
|
||||
|
||||
variable_data()
|
||||
: variable_data(nullptr, visibility::local, std::nullopt) {}
|
||||
variable_data(type_scheme_ptr t, visibility v, std::optional<std::string> n)
|
||||
: type(std::move(t)), vis(v), mangled_name(std::move(n)) {}
|
||||
};
|
||||
|
||||
type_env_ptr parent;
|
||||
std::map<std::string, variable_data> names;
|
||||
std::map<std::string, type_ptr> type_names;
|
||||
|
||||
public:
|
||||
type_env(type_env_ptr p) : parent(std::move(p)) {}
|
||||
type_env() : type_env(nullptr) {}
|
||||
|
||||
void find_free(const type_mgr& mgr, std::set<std::string>& into) const;
|
||||
void find_free_except(const type_mgr& mgr, const group& avoid,
|
||||
std::set<std::string>& into) const;
|
||||
type_scheme_ptr lookup(const std::string& name) const;
|
||||
bool is_global(const std::string& name) const;
|
||||
void set_mangled_name(const std::string& name, const std::string& mangled);
|
||||
const std::string& get_mangled_name(const std::string& name) const;
|
||||
type_ptr lookup_type(const std::string& name) const;
|
||||
void bind(const std::string& name, type_ptr t,
|
||||
visibility v = visibility::local);
|
||||
void bind(const std::string& name, type_scheme_ptr t,
|
||||
visibility v = visibility::local);
|
||||
void bind_type(const std::string& type_name, type_ptr t);
|
||||
void generalize(const std::string& name, const group& grp, type_mgr& mgr);
|
||||
};
|
||||
|
||||
|
||||
type_env_ptr type_scope(type_env_ptr parent);
|
||||
102
code/typesafe-imperative/TypesafeImp.idr
Normal file
102
code/typesafe-imperative/TypesafeImp.idr
Normal file
@@ -0,0 +1,102 @@
|
||||
data Reg = A | B | R
|
||||
|
||||
data Ty = IntTy | BoolTy
|
||||
|
||||
TypeState : Type
|
||||
TypeState = (Ty, Ty, Ty)
|
||||
|
||||
getRegTy : Reg -> TypeState -> Ty
|
||||
getRegTy A (a, _, _) = a
|
||||
getRegTy B (_, b, _) = b
|
||||
getRegTy R (_, _, r) = r
|
||||
|
||||
setRegTy : Reg -> Ty -> TypeState -> TypeState
|
||||
setRegTy A a (_, b, r) = (a, b, r)
|
||||
setRegTy B b (a, _, r) = (a, b, r)
|
||||
setRegTy R r (a, b, _) = (a, b, r)
|
||||
|
||||
data Expr : TypeState -> Ty -> Type where
|
||||
Lit : Int -> Expr s IntTy
|
||||
Load : (r : Reg) -> Expr s (getRegTy r s)
|
||||
Add : Expr s IntTy -> Expr s IntTy -> Expr s IntTy
|
||||
Leq : Expr s IntTy -> Expr s IntTy -> Expr s BoolTy
|
||||
Not : Expr s BoolTy -> Expr s BoolTy
|
||||
|
||||
mutual
|
||||
data Stmt : TypeState -> TypeState -> TypeState -> Type where
|
||||
Store : (r : Reg) -> Expr s t -> Stmt l s (setRegTy r t s)
|
||||
If : Expr s BoolTy -> Prog l s n -> Prog l s n -> Stmt l s n
|
||||
Loop : Prog s s s -> Stmt l s s
|
||||
Break : Stmt s s s
|
||||
|
||||
data Prog : TypeState -> TypeState -> TypeState -> Type where
|
||||
Nil : Prog l s s
|
||||
(::) : Stmt l s n -> Prog l n m -> Prog l s m
|
||||
|
||||
initialState : TypeState
|
||||
initialState = (IntTy, IntTy, IntTy)
|
||||
|
||||
testProg : Prog Main.initialState Main.initialState Main.initialState
|
||||
testProg =
|
||||
[ Store A (Lit 1 `Leq` Lit 2)
|
||||
, If (Load A)
|
||||
[ Store A (Lit 1) ]
|
||||
[ Store A (Lit 2) ]
|
||||
, Store B (Lit 2)
|
||||
, Store R (Add (Load A) (Load B))
|
||||
]
|
||||
|
||||
prodProg : Prog Main.initialState Main.initialState Main.initialState
|
||||
prodProg =
|
||||
[ Store A (Lit 7)
|
||||
, Store B (Lit 9)
|
||||
, Store R (Lit 0)
|
||||
, Loop
|
||||
[ If (Load A `Leq` Lit 0)
|
||||
[ Break ]
|
||||
[ Store R (Load R `Add` Load B)
|
||||
, Store A (Load A `Add` Lit (-1))
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
repr : Ty -> Type
|
||||
repr IntTy = Int
|
||||
repr BoolTy = Bool
|
||||
|
||||
data State : TypeState -> Type where
|
||||
MkState : (repr a, repr b, repr c) -> State (a, b, c)
|
||||
|
||||
getReg : (r : Reg) -> State s -> repr (getRegTy r s)
|
||||
getReg A (MkState (a, _, _)) = a
|
||||
getReg B (MkState (_, b, _)) = b
|
||||
getReg R (MkState (_, _, r)) = r
|
||||
|
||||
setReg : (r : Reg) -> repr t -> State s -> State (setRegTy r t s)
|
||||
setReg A a (MkState (_, b, r)) = MkState (a, b, r)
|
||||
setReg B b (MkState (a, _, r)) = MkState (a, b, r)
|
||||
setReg R r (MkState (a, b, _)) = MkState (a, b, r)
|
||||
|
||||
expr : Expr s t -> State s -> repr t
|
||||
expr (Lit i) _ = i
|
||||
expr (Load r) s = getReg r s
|
||||
expr (Add l r) s = expr l s + expr r s
|
||||
expr (Leq l r) s = expr l s <= expr r s
|
||||
expr (Not e) s = not $ expr e s
|
||||
|
||||
mutual
|
||||
stmt : Stmt l s n -> State s -> Either (State l) (State n)
|
||||
stmt (Store r e) s = Right $ setReg r (expr e s) s
|
||||
stmt (If c t e) s = if expr c s then prog t s else prog e s
|
||||
stmt (Loop p) s =
|
||||
case prog p s >>= stmt (Loop p) of
|
||||
Right s => Right s
|
||||
Left s => Right s
|
||||
stmt Break s = Left s
|
||||
|
||||
prog : Prog l s n -> State s -> Either (State l) (State n)
|
||||
prog Nil s = Right s
|
||||
prog (st::p) s = stmt st s >>= prog p
|
||||
|
||||
run : Prog l s l -> State s -> State l
|
||||
run p s = either id id $ prog p s
|
||||
@@ -6,6 +6,15 @@ pygmentsCodeFences = true
|
||||
pygmentsUseClasses = true
|
||||
summaryLength = 20
|
||||
|
||||
[outputFormats]
|
||||
[outputFormats.Toml]
|
||||
name = "toml"
|
||||
mediaType = "application/toml"
|
||||
isHTML = false
|
||||
|
||||
[outputs]
|
||||
home = ["html","rss","toml"]
|
||||
|
||||
[markup]
|
||||
[markup.tableOfContents]
|
||||
endLevel = 4
|
||||
|
||||
350
content/blog/00_aoc_coq.md
Normal file
350
content/blog/00_aoc_coq.md
Normal file
@@ -0,0 +1,350 @@
|
||||
---
|
||||
title: "Advent of Code in Coq - Day 1"
|
||||
date: 2020-12-02T18:44:56-08:00
|
||||
tags: ["Advent of Code", "Coq"]
|
||||
---
|
||||
|
||||
The first puzzle of this year's [Advent of Code](https://adventofcode.com) was quite
|
||||
simple, which gave me a thought: "Hey, this feels within reach for me to formally verify!"
|
||||
At first, I wanted to formalize and prove the correctness of the [two-pointer solution](https://www.geeksforgeeks.org/two-pointers-technique/).
|
||||
However, I didn't have the time to mess around with the various properties of sorted
|
||||
lists and their traversals. So, I settled for the brute force solution. Despite
|
||||
the simplicity of its implementation, there is plenty to talk about when proving
|
||||
its correctness using Coq. Let's get right into it!
|
||||
|
||||
Before we start, in the interest of keeping the post self-contained, here's the (paraphrased)
|
||||
problem statement:
|
||||
|
||||
> Given an unsorted list of numbers, find two distinct numbers that add up to 2020.
|
||||
|
||||
With this in mind, we can move on to writing some Coq!
|
||||
|
||||
### Defining the Functions
|
||||
The first step to proving our code correct is to actually write the code! To start with,
|
||||
let's write a helper function that, given a number `x`, tries to find another number
|
||||
`y` such that `x + y = 2020`. In fact, rather than hardcoding the desired
|
||||
sum to `2020`, let's just use another argument called `total`. The code is quite simple:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 7 14 >}}
|
||||
|
||||
Here, `is` is the list of numbers that we want to search.
|
||||
We proceed by case analysis: if the list is empty, we can't
|
||||
find a match, so we return `None` (the Coq equivalent of Haskell's `Nothing`).
|
||||
On the other hand, if the list has at least one element `y`, we see if it adds
|
||||
up to `total`, and return `Some y` (equivalent to `Just y` in Haskell) if it does.
|
||||
If it doesn't, we continue our search into the rest of the list.
|
||||
|
||||
It's somewhat unusual, in my experience, to put the list argument first when writing
|
||||
functions in a language with [currying](https://wiki.haskell.org/Currying). However,
|
||||
it seems as though Coq's `simpl` tactic, which we will use later, works better
|
||||
for our purposes when the argument being case analyzed is given first.
|
||||
|
||||
We can now use `find_matching` to define our `find_sum` function, which solves part 1.
|
||||
Here's the code:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 16 24 >}}
|
||||
|
||||
For every `x` that we encounter in our input list `is`, we want to check if there's
|
||||
a matching number in the rest of the list. We only search the remainder of the list
|
||||
because we can't use `x` twice: the `x` and `y` we return that add up to `total`
|
||||
must be different elements. We use `find_matching` to try find a complementary number
|
||||
for `x`. If we don't find it, this `x` isn't it, so we recursively move on to `xs`.
|
||||
On the other hand, if we _do_ find a matching `y`, we're done! We return `(x,y)`,
|
||||
wrapped in `Some` to indicate that we found something useful.
|
||||
|
||||
What about that `(* Was buggy! *)` line? Well, it so happens that my initial
|
||||
implementation had a bug on this line, one that came up as I was proving
|
||||
the correctness of my function. When I wasn't able to prove a particular
|
||||
behavior in one of the cases, I realized something was wrong. In short,
|
||||
my proof actually helped me find and fix a bug!
|
||||
|
||||
This is all the code we'll need to get our solution. Next, let's talk about some
|
||||
properties of our two functions.
|
||||
|
||||
### Our First Lemma
|
||||
When we call `find_matching`, we want to be sure that if we get a number,
|
||||
it does indeed add up to our expected total. We can state it a little bit more
|
||||
formally as follows:
|
||||
|
||||
> For any numbers `k` and `x`, and for any list of number `is`,
|
||||
> if `find_matching is k x` returns a number `y`, then `x + y = k`.
|
||||
|
||||
And this is how we write it in Coq:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 26 27 >}}
|
||||
|
||||
The arrow, `->`, reads "implies". Other than that, I think this
|
||||
property reads pretty well. The proof, unfortunately, is a little bit more involved.
|
||||
Here are the first few lines:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 28 31 >}}
|
||||
|
||||
We start with the `intros is` tactic, which is akin to saying
|
||||
"consider a particular list of integers `is`". We do this without losing
|
||||
generality: by simply examining a concrete list, we've said nothing about
|
||||
what that list is like. We then proceed by induction on `is`.
|
||||
|
||||
To prove something by induction for a list, we need to prove two things:
|
||||
|
||||
* The __base case__. Whatever property we want to hold, it must
|
||||
hold for the empty list, which is the simplest possible list.
|
||||
In our case, this means `find_matching` searching an empty list.
|
||||
* The __inductive case__. Assuming that a property holds for any list
|
||||
`[b, c, ...]`, we want to show that the property also holds for
|
||||
the list `[a, b, c, ...]`. That is, the property must remain true if we
|
||||
prepend an element to a list for which this property holds.
|
||||
|
||||
These two things combined give us a proof for _all_ lists, which is exactly
|
||||
what we want! If you don't belive me, here's how it works. Suppose you want
|
||||
to prove that some property `P` holds for `[1,2,3,4]`. Given the base
|
||||
case, we know that `P []` holds. Next, by the inductive case, since
|
||||
`P []` holds, we can prepend `4` to the list, and the property will
|
||||
still hold. Thus, `P [4]`. Now that `P [4]` holds, we can again prepend
|
||||
an element to the list, this time a `3`, and conclude that `P [3,4]`.
|
||||
Repeating this twice more, we arrive at our desired fact: `P [1,2,3,4]`.
|
||||
|
||||
When we write `induction is`, Coq will generate two proof goals for us,
|
||||
one for the base case, and one for the inductive case. We will have to prove
|
||||
each of them separately. Since we have
|
||||
not yet introduced the variables `k`, `x`, and `y`, they remain
|
||||
inside a `forall` quantifier at that time. To be able to refer
|
||||
to them, we want to use `intros`. We want to do this in both the
|
||||
base and the inductive case. To quickly do this, we use Coq's `;`
|
||||
operator. When we write `a; b`, Coq runs the tactic `a`, and then
|
||||
runs the tactic `b` in every proof goal generated by `a`. This is
|
||||
exactly what we want.
|
||||
|
||||
There's one more variable inside our second `intros`: `Hev`.
|
||||
This variable refers to the hypothesis of our statement:
|
||||
that is, the part on the left of the `->`. To prove that `A`
|
||||
implies `B`, we assume that `A` holds, and try to argue `B` from there.
|
||||
Here is no different: when we use `intros Hev`, we say, "suppose that you have
|
||||
a proof that `find_matching` evaluates to `Some y`, called `Hev`". The thing
|
||||
on the right of `->` becomes our proof goal.
|
||||
|
||||
Now, it's time to look at the cases. To focus on one case at a time,
|
||||
we use `-`. The first case is our base case. Here's what Coq prints
|
||||
out at this time:
|
||||
|
||||
```
|
||||
k, x, y : nat
|
||||
Hev : find_matching nil k x = Some y
|
||||
|
||||
========================= (1 / 1)
|
||||
|
||||
x + y = k
|
||||
```
|
||||
|
||||
All the stuff above the `===` line are our hypotheses. We know
|
||||
that we have some `k`, `x`, and `y`, all of which are numbers.
|
||||
We also have the assumption that `find_matching` returns `Some y`.
|
||||
In the base case, `is` is just `[]`, and this is reflected in the
|
||||
type for `Hev`. To make this more clear, we'll simplify the call to `find_matching`
|
||||
in `Hev`, using `simpl in Hev`. Now, here's what Coq has to say about `Hev`:
|
||||
|
||||
```
|
||||
Hev : None = Some y
|
||||
```
|
||||
|
||||
Well, this doesn't make any sense. How can something be equal to nothing?
|
||||
We ask Coq this question using `inversion Hev`. Effectively, the question
|
||||
that `inversion` asks is: what are the possible ways we could have acquired `Hev`?
|
||||
Coq generates a proof goal for each of these possible ways. Alas, there are
|
||||
no ways to arrive at this contradictory assumption: the number of proof sub-goals
|
||||
is zero. This means we're done with the base case!
|
||||
|
||||
The inductive case is the meat of this proof. Here's the corresponding part
|
||||
of the Coq source file:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 32 36 >}}
|
||||
|
||||
This time, the proof state is more complicated:
|
||||
|
||||
```
|
||||
a : nat
|
||||
is : list nat
|
||||
IHis : forall k x y : nat, find_matching is k x = Some y -> x + y = k
|
||||
k, x, y : nat
|
||||
Hev : find_matching (a :: is) k x = Some y
|
||||
|
||||
========================= (1 / 1)
|
||||
|
||||
x + y = k
|
||||
```
|
||||
|
||||
Following the footsteps of our informal description of the inductive case,
|
||||
Coq has us prove our property for `(a :: is)`, or the list `is` to which
|
||||
`a` is being prepended. Like before, we assume that our property holds for `is`.
|
||||
This is represented in the __induction hypothesis__ `IHis`. It states that if
|
||||
`find_matching` finds a `y` in `is`, it must add up to `k`. However, `IHis`
|
||||
doesn't tell us anything about `a :: is`: that's our job. We also still have
|
||||
`Hev`, which is our assumption that `find_matching` finds a `y` in `(a :: is)`.
|
||||
Running `simpl in Hev` gives us the following:
|
||||
|
||||
```
|
||||
Hev : (if x + a =? k then Some a else find_matching is k x) = Some y
|
||||
```
|
||||
|
||||
The result of `find_matching` now depends on whether or not the new element `a`
|
||||
adds up to `k`. If it does, then `find_matching` will return `a`, which means
|
||||
that `y` is the same as `a`. If not, it must be that `find_matching` finds
|
||||
the `y` in the rest of the list, `is`. We're not sure which of the possibilities
|
||||
is the case. Fortunately, we don't need to be!
|
||||
If we can prove that the `y` that `find_matching` finds is correct regardless
|
||||
of whether `a` adds up to `k` or not, we're good to go! To do this,
|
||||
we perform case analysis using `destruct`.
|
||||
|
||||
Our particular use of `destruct` says: check any possible value for `x + a ?= k`,
|
||||
and create an equation `Heq` that tells us what that value is. `?=` returns a boolean
|
||||
value, and so `destruct` generates two new goals: one where the function returns `true`,
|
||||
and one where it returns `false`. We start with the former. Here's the proof state:
|
||||
|
||||
```
|
||||
a : nat
|
||||
is : list nat
|
||||
IHis : forall k x y : nat, find_matching is k x = Some y -> x + y = k
|
||||
k, x, y : nat
|
||||
Heq : (x + a =? k) = true
|
||||
Hev : Some a = Some y
|
||||
|
||||
========================= (1 / 1)
|
||||
|
||||
x + y = k
|
||||
```
|
||||
|
||||
There is a new hypothesis: `Heq`. It tells us that we're currently
|
||||
considering the case where `?=` evaluates to `true`. Also,
|
||||
`Hev` has been considerably simplified: now that we know the condition
|
||||
of the `if` expression, we can just replace it with the `then` branch.
|
||||
|
||||
Looking at `Hev`, we can see that our prediction was right: `a` is equal to `y`. After all,
|
||||
if they weren't, `Some a` wouldn't equal to `Some y`. To make Coq
|
||||
take this information into account, we use `injection`. This will create
|
||||
a new hypothesis, `a = y`. But if one is equal to the other, why don't we
|
||||
just use only one of these variables everywhere? We do exactly that by using
|
||||
`subst`, which replaces `a` with `y` everywhere in our proof.
|
||||
|
||||
The proof state is now:
|
||||
|
||||
```
|
||||
is : list nat
|
||||
IHis : forall k x y : nat, find_matching is k x = Some y -> x + y = k
|
||||
k, x, y : nat
|
||||
Heq : (x + y =? k) = true
|
||||
|
||||
========================= (1 / 1)
|
||||
|
||||
x + y = k
|
||||
```
|
||||
|
||||
We're close, but there's one more detail to keep in mind. Our goal, `x + y = k`,
|
||||
is the __proposition__ that `x + y` is equal to `k`. However, `Heq` tells us
|
||||
that the __function__ `?=` evaluates to `true`. These are fundamentally different.
|
||||
One talks about mathematical equality, while the other about some function `?=`
|
||||
defined somewhere in Coq's standard library. Who knows - maybe there's a bug in
|
||||
Coq's implementation! Fortunately, Coq comes with a proof that if two numbers
|
||||
are equal according to `?=`, they are mathematically equal. This proof is
|
||||
called `eqb_nat_eq`. We tell Coq to use this with `apply`. Our proof goal changes to:
|
||||
|
||||
```
|
||||
true = (x + y =? k)
|
||||
```
|
||||
|
||||
This is _almost_ like `Heq`, but flipped. Instead of manually flipping it and using `apply`
|
||||
with `Heq`, I let Coq do the rest of the work using `auto`.
|
||||
|
||||
Phew! All this for the `true` case of `?=`. Next, what happens if `x + a` does not equal `k`?
|
||||
Here's the proof state at this time:
|
||||
|
||||
```
|
||||
a : nat
|
||||
is : list nat
|
||||
IHis : forall k x y : nat, find_matching is k x = Some y -> x + y = k
|
||||
k, x, y : nat
|
||||
Heq : (x + a =? k) = false
|
||||
Hev : find_matching is k x = Some y
|
||||
|
||||
========================= (1 / 1)
|
||||
|
||||
x + y = k
|
||||
```
|
||||
|
||||
Since `a` was not what it was looking for, `find_matching` moved on to `is`. But hey,
|
||||
we're in the inductive case! We are assuming that `find_matching` will work properly
|
||||
with the list `is`. Since `find_matching` found its `y` in `is`, this should be all we need!
|
||||
We use our induction hypothesis `IHis` with `apply`. `IHis` itself does not know that
|
||||
`find_matching` moved on to `is`, so it asks us to prove it. Fortunately, `Hev` tells us
|
||||
exactly that, so we use `assumption`, and the proof is complete! Quod erat demonstrandum, QED!
|
||||
|
||||
### The Rest of the Owl
|
||||
Here are a couple of other properties of `find_matching`. For brevity's sake, I will
|
||||
not go through their proofs step-by-step. I find that the best way to understand
|
||||
Coq proofs is to actually step through them in the IDE!
|
||||
|
||||
First on the list is `find_matching_skip`. Here's the type:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 38 39 >}}
|
||||
|
||||
It reads: if we correctly find a number in a small list `is`, we can find that same number
|
||||
even if another number is prepended to `is`. That makes sense: _adding_ a number to
|
||||
a list doesn't remove whatever we found in it! I used this lemma to prove another,
|
||||
`find_matching_works`:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 49 50 >}}
|
||||
|
||||
This reads, if there _is_ an element `y` in `is` that adds up to `k` with `x`, then
|
||||
`find_matching` will find it. This is an important property. After all, if it didn't
|
||||
hold, it would mean that `find_matching` would occasionally fail to find a matching
|
||||
number, even though it's there! We can't have that.
|
||||
|
||||
Finally, we want to specify what it means for `find_sum`, our solution function, to actually
|
||||
work. The naive definition would be:
|
||||
|
||||
> Given a list of integers, `find_sum` always finds a pair of numbers that add up to `k`.
|
||||
|
||||
Unfortunately, this is not true. What if, for instance, we give `find_sum` an empty list?
|
||||
There are no numbers from that list to find and add together. Even a non-empty list
|
||||
may not include such a pair! We need a way to characterize valid input lists. I claim
|
||||
that all lists from this Advent of Code puzzle are guaranteed to have two numbers that
|
||||
add up to our goal, and that these numbers are not equal to each other. In Coq,
|
||||
we state this as follows:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 4 5 >}}
|
||||
|
||||
This defines a new property, `has_pair t is` (read "`is` has a pair of numbers that add to `t`"),
|
||||
which means:
|
||||
|
||||
> There are two numbers `n1` and `n2` such that, they are not equal to each other (`n1<>n2`) __and__
|
||||
> the number `n1` is an element of `is` (`In n1 is`) __and__
|
||||
> the number `n2` is an element of `is` (`In n2 is`) __and__
|
||||
> the two numbers add up to `t` (`n1 + n2 = t`).
|
||||
|
||||
When making claims about the correctness of our algorithm, we will assume that this
|
||||
property holds. Finally, here's the theorem we want to prove:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 64 66 >}}
|
||||
|
||||
It reads, "for any total `k` and list `is`, if `is` has a pair of numbers that add to `k`,
|
||||
then `find_sum` will return a pair of numbers `x` and `y` that add to `k`".
|
||||
There's some nuance here. We hardly reference the `has_pair` property in this definition,
|
||||
and for good reason. Our `has_pair` hypothesis only says that there is _at least one_
|
||||
pair of numbers in `is` that meets our criteria. However, this pair need not be the only
|
||||
one, nor does it need to be the one returned by `find_sum`! However, if we have many pairs,
|
||||
we want to confirm that `find_sum` will find one of them. Finally, here is the proof.
|
||||
I will not be able to go through it in detail in this post, but I did comment it to
|
||||
make it easier to read:
|
||||
|
||||
{{< codelines "Coq" "aoc-coq/day1.v" 67 102 >}}
|
||||
|
||||
Coq seems happy with it, and so am I! The bug I mentioned earlier popped up on line 96.
|
||||
I had accidentally made `find_sum` return `None` if it couldn't find a complement
|
||||
for the `x` it encountered. This meant that it never recursed into the remaining
|
||||
list `xs`, and thus, the pair was never found at all! It this became impossible
|
||||
to prove that `find_some` will return `Some y`, and I had to double back
|
||||
and check my definitions.
|
||||
|
||||
I hope you enjoyed this post! If you're interested to learn more about Coq, I strongly recommend
|
||||
checking out [Software Foundations](https://softwarefoundations.cis.upenn.edu/), a series
|
||||
of books on Coq written as comments in a Coq source file! In particular, check out
|
||||
[Logical Foundations](https://softwarefoundations.cis.upenn.edu/lf-current/index.html)
|
||||
for an introduction to using Coq. Thanks for reading!
|
||||
@@ -145,4 +145,5 @@ Here are the posts that I've written so far for this series:
|
||||
* [Polymorphism]({{< relref "10_compiler_polymorphism.md" >}})
|
||||
* [Polymorphic Data Types]({{< relref "11_compiler_polymorphic_data_types.md" >}})
|
||||
* [Let/In and Lambdas]({{< relref "12_compiler_let_in_lambda/index.md" >}})
|
||||
* [Cleanup]({{< relref "13_compiler_cleanup/index.md" >}})
|
||||
|
||||
|
||||
@@ -984,5 +984,6 @@ Before either of those things, though, I think that I want to go through
|
||||
the compiler and perform another round of improvements, similarly to
|
||||
[part 4]({{< relref "04_compiler_improvements" >}}). It's hard to do a lot
|
||||
of refactoring while covering new content, since major changes need to
|
||||
be explained and presented for the post to make sense. I hope to see
|
||||
you in these future posts!
|
||||
be explained and presented for the post to make sense.
|
||||
I do this in [part 13]({{< relref "13_compiler_cleanup/index.md" >}}) - cleanup.
|
||||
I hope to see you there!
|
||||
|
||||
964
content/blog/13_compiler_cleanup/index.md
Normal file
964
content/blog/13_compiler_cleanup/index.md
Normal file
@@ -0,0 +1,964 @@
|
||||
---
|
||||
title: Compiling a Functional Language Using C++, Part 13 - Cleanup
|
||||
date: 2020-09-19T16:14:13-07:00
|
||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||
description: "In this post, we clean up our compiler."
|
||||
---
|
||||
|
||||
In [part 12]({{< relref "12_compiler_let_in_lambda" >}}), we added `let/in`
|
||||
and lambda expressions to our compiler. At the end of that post, I mentioned
|
||||
that before we move on to bigger and better things, I wanted to take a
|
||||
step back and clean up the compiler. Now is the time to do that.
|
||||
|
||||
In particular, I identified four things that could be improved
|
||||
or cleaned up:
|
||||
|
||||
* __Error handling__. We need to stop using `throw 0` and start
|
||||
using `assert`. We can also make our errors much more descriptive
|
||||
by including source locations in the output.
|
||||
* __Name mangling__. I don't think I got it quite right last
|
||||
time. Now is the time to clean it up.
|
||||
* __Code organization__. I think we can benefit from a top-level
|
||||
class, and a more clear "dependency order" between the various
|
||||
classes and structures we've defined.
|
||||
* __Code style__. In particular, I've been lazily using `struct`
|
||||
in a lot of places. That's not a good idea; it's better
|
||||
to use `class`, and only expose _some_ fields and methods
|
||||
to the rest of the code.
|
||||
|
||||
### Error Reporting and Handling
|
||||
The previous post was rather long, which led me to omit
|
||||
a rather important aspect of the compiler: proper error reporting.
|
||||
Once again our compiler has instances of `throw 0`, which is a cheap way
|
||||
of avoiding properly handling a runtime error. Before we move on,
|
||||
it's best to get rid of such blatantly lazy code.
|
||||
|
||||
Our existing exceptions (mostly type errors) can use some work, too.
|
||||
Even the most descriptive issues our compiler reports -- unification errors --
|
||||
don't include the crucial information of _where_ the error is. For large
|
||||
programs, this means having to painstakingly read through the entire file
|
||||
to try figure out which subexpression could possibly have an incorrect type.
|
||||
This is far from the ideal debugging experience.
|
||||
|
||||
Addressing all this is a multi-step change in itself. We want to:
|
||||
|
||||
* Replace all `throw 0` code with actual exceptions.
|
||||
* Replace some exceptions that shouldn't be possible for a user to trigger
|
||||
with assertions.
|
||||
* Keep track of source locations of each subexpression, so that we may
|
||||
be able to print it if it causes an error.
|
||||
* Be able to print out said source locations at will. This isn't
|
||||
a _necessity_, but virtually all "big" compilers do this. Instead
|
||||
of reporting that an error occurs on a particular line, we will
|
||||
actually print the line.
|
||||
|
||||
Let's start with gathering the actual location data.
|
||||
|
||||
#### Bison's Locations
|
||||
Bison actually has some rather nice support for location tracking. It can
|
||||
automatically assemble the "from" and "to" locations of a nonterminal
|
||||
from the locations of children, which would be very tedious to write
|
||||
by hand. We enable this feature using the following option:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parser.y" 46 46 >}}
|
||||
|
||||
There's just one hitch, though. Sure, Bison can compute bigger
|
||||
locations from smaller ones, but it must get the smaller ones
|
||||
from somewhere. Since Bison operates on _tokens_, rather
|
||||
than _characters_, it effectively doesn't interact with the source
|
||||
text at all, and can't determine from which line or column a token
|
||||
originated. The task of determining the locations of input tokens
|
||||
is delegated to the tokenizer -- Flex, in our case. Flex, on the
|
||||
other hand, doesn't have a built-in mechanism for tracking
|
||||
locations. Fortunately, Bison provides a `yy::location` class that
|
||||
includes most of the needed functionality.
|
||||
|
||||
A `yy::location` consists of two source positions, `begin` and `end`,
|
||||
which themselves are represented using lines and columns. It
|
||||
also has the following methods:
|
||||
|
||||
* `yy::location::columns(int)` advances the `end` position by
|
||||
the given number of columns, while `begin` stays the same.
|
||||
If `begin` and `end` both point to the beginning of a token,
|
||||
then `columns(token_length)` will move `end` to the token's end,
|
||||
and thus make the whole `location` contain the token.
|
||||
* `yy::location::lines(int)` behaves similarly to `columns`,
|
||||
except that it advances `end` by the given number of lines,
|
||||
rather than columns. It also resets the columns counter to `1`.
|
||||
* `yy::location::step()` moves `begin` to where `end` is. This
|
||||
is useful for when we've finished processing a token, and want
|
||||
to move on to the next one.
|
||||
|
||||
For Flex specifically, `yyleng` has the length of the token
|
||||
currently being processed. Rather than adding the calls
|
||||
to `columns` and `step` to every rule, we can define the
|
||||
`YY_USER_ACTION` macro, which is run before each token
|
||||
is processed.
|
||||
|
||||
{{< codelines "C++" "compiler/13/scanner.l" 12 14 >}}
|
||||
|
||||
We'll see why we are using `LOC` instead of something like `location` soon;
|
||||
for now, you can treat `LOC` as if it were a global variable declared
|
||||
in the tokenizer. Before processing each token, we ensure that
|
||||
the `yy::location` has its `begin` and `end` at the same position,
|
||||
and then advance `end` by `yyleng` columns. This is
|
||||
{{< sidenote "right" "sufficient-note" "sufficient" >}}
|
||||
This doesn't hold for all languages. It may be possible for a language
|
||||
to have tokens that contain <code>\n</code>, in which case,
|
||||
rather than just using <code>yyleng</code>, we'd need to
|
||||
add special logic to iterate over the token and detect the line
|
||||
breaks.<br>
|
||||
<br>
|
||||
Also, this requires that the <code>end</code> of the previous token was
|
||||
correctly computed.
|
||||
{{< /sidenote >}}
|
||||
to make `LOC` represent our token's source position. For
|
||||
the moment, don't worry too much about `drv`; this is the
|
||||
parsing driver, and we will talk about it shortly.
|
||||
|
||||
So now we have a "global" variable `LOC` that gives
|
||||
us the source position of the current token. To get it
|
||||
to Bison, we have to pass it as an argument to each
|
||||
of the `make_TOKEN` calls. Here are a few sample lines
|
||||
that should give you the general idea:
|
||||
|
||||
{{< codelines "C++" "compiler/13/scanner.l" 40 43 >}}
|
||||
|
||||
That last line is actually new. Previously, we somehow
|
||||
got away without explicitly sending the end-of-file token to Bison.
|
||||
I suspect that this was due to some kind of implicit conversion
|
||||
of the Flex macro `YY_NULL` into a token; now that we have
|
||||
to pass a position to every token constructor, such an implicit
|
||||
conversion is probably impossible.
|
||||
|
||||
Now we have Bison computing source locations for each nonterminal.
|
||||
However, at the moment, we still aren't using them. To change that,
|
||||
we need to add a `yy::location` argument to each of our `ast` nodes,
|
||||
as well as to the `pattern` subclasses, `definition_defn` and
|
||||
`definition_data`. To avoid breaking all the code that creates
|
||||
AST nodes and definitions outside of the parser, we'll make this
|
||||
argument optional. Inside of `ast.hpp`, we define a new field as follows:
|
||||
|
||||
{{< codelines "C++" "compiler/13/ast.hpp" 16 16 >}}
|
||||
|
||||
Then, we add a constructor to `ast` as follows:
|
||||
|
||||
{{< codelines "C++" "compiler/13/ast.hpp" 18 18 >}}
|
||||
|
||||
Note that it's not optional here, since `ast` itself is an
|
||||
abstract class, and thus will never be constructed directly.
|
||||
It is in the subclasses of `ast` that we provide a default
|
||||
value. The change is rather mechanical, but here's an example
|
||||
from `ast_binop`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/ast.hpp" 98 99 >}}
|
||||
|
||||
Finally, we tell Bison to pass the computed location
|
||||
data as an argument when constructing our data structures.
|
||||
This too is a mechanical change, and I think the following
|
||||
few lines demonstrate the general idea in sufficient
|
||||
detail:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parser.y" 92 96 >}}
|
||||
|
||||
Here, the `@$` character is used to reference the current
|
||||
nonterminal's location data.
|
||||
|
||||
#### Line Offsets, File Input, and the Parsing Driver
|
||||
There are three more challenges with printing out the line
|
||||
of code where an error occurred. First of all, to
|
||||
print out a line of code, we need to have that line of code
|
||||
available to us. We do not currently meet this requirement:
|
||||
our compiler reads code form `stdin` (as is default for Flex),
|
||||
and `stdin` doesn't always support rewinding. This, in turn,
|
||||
means that once Flex has read a character from the input,
|
||||
it may not be possible to go back and retrieve that character
|
||||
again.
|
||||
|
||||
Second, even if we do have have the entire stream or buffer
|
||||
available to us, to retrieve an offset and length within
|
||||
that buffer from just a line and column number would be a lot
|
||||
of work. A naive approach would be to iterate through
|
||||
the input again, once more keeping track of lines and columns,
|
||||
and print the desired line once we reach it. However, this
|
||||
would lead us to redo a lot of work that our tokenizer
|
||||
is already doing.
|
||||
|
||||
Third, Flex's input mechanism, even if it it's configured
|
||||
not to read from `stdin`, uses a global file descriptor called
|
||||
`yyin`. However, we're better off minimizing global state (especially
|
||||
if we want to read, parse, and compile multiple files in
|
||||
the future). While we're configuring Flex's input mechanism,
|
||||
we may as well fix this, too.
|
||||
|
||||
There are several approaches to fixing the first issue. One possible
|
||||
way is to store the content of `stdin` into a temporary file. Then,
|
||||
it's possible to read from the file multiple times by using
|
||||
the C functions `fseek` and `rewind`. However, since we're
|
||||
working with files, why not just work directly with the files
|
||||
created by the user? Instead of reading from `stdin`, we may
|
||||
as well take in a path to a file via `argv`, and read from there.
|
||||
Also, instead of `fseek` and `rewind`, we can just read the file
|
||||
into memory, and access it like a normal character buffer. This
|
||||
does mean that we can stick with `stdin`, but it's more conventional
|
||||
to read source code from files, anyway.
|
||||
|
||||
To address the second issue, we can keep a mapping of line numbers
|
||||
to their locations in the source buffer. This is rather easy to
|
||||
maintain using an array: the first element of the array is 0,
|
||||
which is the beginning of the first line in any source file. From there,
|
||||
every time we encounter the character `\n`, we can push
|
||||
the current source location to the top, marking it as
|
||||
the beginning of another line. Where exactly we store this
|
||||
array is as yet unclear, since we're trying to avoid global variables.
|
||||
|
||||
Finally, to begin addressing the third issue, we can use Flex's `reentrant`
|
||||
option, which makes it so that all of the tokenizer's state is stored in an
|
||||
opaque `yyscan_t` structure, rather than in global variables. This way,
|
||||
we can configure `yyin` without setting a global variable, which is a step
|
||||
in the right direction. We'll work on this momentarily.
|
||||
|
||||
Our tokenizing and parsing stack has more global variables
|
||||
than just those specific to Flex. Among these variables is `global_defs`,
|
||||
which receives all the top-level function and data type definitions. We
|
||||
will also need some way of accessing the `yy::location` instance, and
|
||||
a way of storing our file input in memory. Fortunately, we're not
|
||||
the only ones to have ever come across the issue of creating non-global
|
||||
state: the Bison documentation has a
|
||||
[section in its C++ guide](https://www.gnu.org/software/bison/manual/html_node/Calc_002b_002b-Parsing-Driver.html)
|
||||
that describes a technique for manipulating
|
||||
state -- "parsing context", in their words. This technique involves the
|
||||
creation of a _parsing driver_.
|
||||
|
||||
The parsing driver is a class (or struct) that holds all the parse-related
|
||||
state. We can arrange for this class to be available to our tokenizing
|
||||
and parsing functions, which will allow us to use it pretty much like we'd
|
||||
use a global variable. This is the `drv` that we saw in `YY_USER_ACTION`.
|
||||
We can define it as follows:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.hpp" 36 54 >}}
|
||||
|
||||
There aren't many fields here. The `file_name` string represents
|
||||
the file that we'll be reading code from. The `location` field
|
||||
will be accessed by Flex via `get_current_location`. Bison will
|
||||
store the function and data type definitions it reads into `global_defs`
|
||||
via `get_global_defs`. Finally, `file_m` will be used to keep track
|
||||
of the content of the file we're reading, as well as the line offsets
|
||||
within that file. Notice that a couple of these fields are pointers
|
||||
that we take by reference in the constructor. The `parse_driver` doesn't
|
||||
_own_ the global definitions, nor the file manager. They exist outside
|
||||
of it, and will continue to be used in other ways the `parse_driver`
|
||||
does not need to know about. Also, the `LOC` variable in Flex is
|
||||
actually a call to `get_current_location`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/scanner.l" 15 15 >}}
|
||||
|
||||
The methods of `parse_driver` are rather simple. The majority of
|
||||
them deals with giving access to the parser's members: the `yy::location`,
|
||||
the `definition_group`, and the `file_mgr`. The only exception
|
||||
to this is `operator()`, which we use to actually trigger the parsing process.
|
||||
We'll make this method return `true` if parsing succeeded, and `false`
|
||||
otherwise (if, say, the file we tried to read doesn't exist).
|
||||
Here's its implementation:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 48 60 >}}
|
||||
|
||||
We try open the user-specified file, and return `false` if we can't.
|
||||
After this, we start doing the setup specific to a reentrant
|
||||
Flex scanner. We declare a `yyscan_t` variable, which
|
||||
will contain all of Flex's state. Then, we initialize
|
||||
it using `yylex_init`. Finally, since we can no longer
|
||||
touch the `yyin` global variable (it doesn't exist),
|
||||
we have to resort to using a setter function provided by Flex
|
||||
to configure the tokenizer's input stream.
|
||||
|
||||
Next, we construct our Bison-generated parser. Note that
|
||||
unlike before, we have to pass in two arguments:
|
||||
`scanner` and `*this`, the latter being of type `parse_driver&`.
|
||||
We'll come back to how this works in a moment. With
|
||||
the scanner and parser initialized, we invoke `parser::operator()`,
|
||||
which actually runs the Flex- and Bison-generated code.
|
||||
To clean up, we run `yylex_destroy` and `fclose`. Finally,
|
||||
we call `file_mgr::finalize`, and return. But what
|
||||
_is_ `file_mgr`?
|
||||
|
||||
The `file_mgr` class does two things: it stores the part of the file
|
||||
that has already been read by Flex in memory, and it keeps track of
|
||||
where each line in our source file begins within the text. Here is its
|
||||
definition:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.hpp" 14 34 >}}
|
||||
|
||||
In this class, the `string_stream` member is used to construct
|
||||
an `std::string` from the bits of text that Flex reads,
|
||||
processes, and feeds to the `file_mgr` using the `write` method.
|
||||
It's more efficient to use a string stream than to concatenate
|
||||
strings repeatedly. Once Flex is finished processing the file,
|
||||
the final contents of the `string_stream` are transferred into
|
||||
the `file_contents` string using the `finalize` method. The `offset`
|
||||
and `line_offsets` fields will be used as we described earlier: each time Flex
|
||||
encounters the `\n` character, the `offset` variable will pushed
|
||||
in top of the `line_offsets` vector, marking the beginning of
|
||||
the corresponding line. The methods of the class are as follows:
|
||||
|
||||
* `write` will be called from Flex, and will allow us to
|
||||
record the content of the file we're processing to the `string_stream`.
|
||||
We've already seen it used in the `YY_USER_ACTION` macro.
|
||||
* `mark_line` will also be called from Flex, and will mark the current
|
||||
`file_offset` as the beginning of a line by pushing it into `line_offsets`.
|
||||
* `finalize` will be called by the `parse_driver` when the parsing
|
||||
finishes. At this time, the `string_stream` should contain all of
|
||||
the input file, and this data is transferred to `file_contents`, as
|
||||
we mentioned above.
|
||||
* `get_index` and `get_line_end` will be used for converting
|
||||
`yy::location` instances to offsets within the source code buffer.
|
||||
* `print_location` will be used for printing errors.
|
||||
It will print the lines spanned by the given location, with the
|
||||
location itself colored and underlined if the last argument is `true`.
|
||||
This will make our errors easier on the eyes.
|
||||
|
||||
Let's take a look at their implementations. First, `write`.
|
||||
For the most part, this method is a proxy for the `write`
|
||||
method of our `string_stream`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 9 12 >}}
|
||||
|
||||
We do, however, also keep track of the `file_offset` variable
|
||||
here, which ensures we have up-to-date information
|
||||
regarding our position in the source file. The implementation
|
||||
of `mark_line` uses this information:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 14 16 >}}
|
||||
|
||||
The `finalize` method is trivial, and requires little additional
|
||||
discussion:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 18 20 >}}
|
||||
|
||||
Once we have the line offsets, `get_index` becomes very simple:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 22 25 >}}
|
||||
|
||||
Here, we use an assertion for the first time. Calling
|
||||
`get_index` with a negative or zero line doesn't make
|
||||
any sense, since Bison starts tracking line numbers
|
||||
at 1. Similarly, asking for a line for which we don't
|
||||
have a recorded offset is invalid. Both
|
||||
of these nonsensical calls to `get_index` cannot
|
||||
be caused by the user under normal circumstances,
|
||||
and indicate the method's misuse by the author of
|
||||
the compiler (us!). Thus, we terminate the program.
|
||||
|
||||
Finally, the implementation of `line_end` just finds the
|
||||
beginning of the next line. We stick to the C convention
|
||||
of marking 'end' indices exclusive (pointing just past
|
||||
the end of the array):
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 27 30 >}}
|
||||
|
||||
Since `line_offsets` has as many elements as there are lines,
|
||||
the last line number would be equal to the vector's size.
|
||||
When looking up the end of the last line, we can't look for
|
||||
the beginning of the next line, so instead we return the end of the file.
|
||||
|
||||
Next, the `print_location` method prints three sections
|
||||
of the source file. These are the text "before" the error,
|
||||
the error itself, and, finally, the text "after" the error.
|
||||
For example, if an error began on the fifth column of the third
|
||||
line, and ended on the eighth column of the fourth line, the
|
||||
"before" section would include the first four columns of the third
|
||||
line, and the "after" section would be the ninth column onward
|
||||
on the fourth line. Before and after the error itself,
|
||||
if the `highlight` argument is true,
|
||||
we sprinkle the ANSI escape codes to enable and disable
|
||||
special formatting, respectively. For now, the special
|
||||
formatting involves underlining the text and making it red.
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 32 46 >}}
|
||||
|
||||
Finally, to get the forward declarations for the `yy*` functions
|
||||
and types, we set the `header-file` option in Flex:
|
||||
|
||||
{{< codelines "C++" "compiler/13/scanner.l" 3 3 >}}
|
||||
|
||||
We also include this `scanner.hpp` file in our `parse_driver.cpp`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.cpp" 2 2 >}}
|
||||
|
||||
#### Adding the Driver to Flex and Bison
|
||||
Bison's C++ language template generates a class called
|
||||
`yy::parser`. We don't really want to modify this class
|
||||
in any way: not only is it generated code, but it's
|
||||
also rather complex. Instead, Bison provides us
|
||||
with a mechanism to pass more data in to the parser.
|
||||
This data is made available to all the actions
|
||||
that the parser runs. Better yet, Bison also attempts
|
||||
to pass this data on to the tokenizer, which in our
|
||||
case would mean that whatever data we provide Bison
|
||||
will also be available to Flex. This is how we'll
|
||||
allow the two components to access our new `parse_driver`
|
||||
class. This is also how we'll pass in the `yyscan_t`
|
||||
that Flex now needs to run its tokenizing code. To
|
||||
do all this, we use Bison's `%param` option. I'm
|
||||
going to include a few more lines from `parser.y`,
|
||||
since they contain the necessary `#include` directives
|
||||
and a required type definition:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parser.y" 1 18 >}}
|
||||
|
||||
The `%param` option effectively adds the parameter listed
|
||||
between the curly braces to the constructor of the generated
|
||||
`yy::parser`. We've already seen this in the implementation
|
||||
of our driver, where we passed `scanner` and `*this` as
|
||||
arguments when creating the parser. The parameters we declare are also passed to the
|
||||
`yylex` function, which is expected to accept them in the same order.
|
||||
|
||||
Since we're adding `parse_driver` as an argument we have to
|
||||
declare it. However, we can't include the `parse_driver` header
|
||||
right away because `parse_driver` itself includes the `parser` header:
|
||||
we'd end up with a circular dependency. Instead, we resort to
|
||||
forward-declaring the driver class, as well as the `yyscan_t`
|
||||
structure containing Flex's state.
|
||||
|
||||
Adding a parameter to Bison doesn't automatically affect
|
||||
Flex. To let Flex know that its `yylex` function must now accept
|
||||
the state and the parsing driver, we have to define the
|
||||
`YY_DECL` macro. We do this in `parse_driver.hpp`, since
|
||||
this forward declaration will be used by both Flex
|
||||
and Bison:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parse_driver.hpp" 56 58 >}}
|
||||
|
||||
#### Improving Exceptions
|
||||
Now, it's time to add location data (and a little bit more) to our
|
||||
exceptions. We want to make it possible for exceptions to include
|
||||
data about where the error occurred, and to print this data to the user.
|
||||
However, it's also possible for us to have exceptions that simply
|
||||
do not have that location data. Furthermore, we want to know
|
||||
whether or not an exception has an associated location; we'd
|
||||
rather not print an invalid or "default" location when an error
|
||||
occurs.
|
||||
|
||||
In the old days of programming, we could represent the absence
|
||||
of location data with a `nullptr`, or `NULL`. But not only
|
||||
does this approach expose us to all kind of `NULl`-safety
|
||||
bugs, but it also requires heap allocation! This doesn't
|
||||
make it sound all that appealing; instead, I think we should
|
||||
opt for using `std::optional`.
|
||||
|
||||
Though `std::optional` is standard (as may be obvious from its
|
||||
namespace), it's a rather recent addition to the C++ STL.
|
||||
In order to gain access to it, we need to ensure that our
|
||||
project is compiled using C++17. To this end, we add
|
||||
the following two lines to our CMakeLists.txt:
|
||||
|
||||
{{< codelines "CMake" "compiler/13/CMakeLists.txt" 5 6 >}}
|
||||
|
||||
Now, let's add a new base class for all of our compiler errors,
|
||||
unsurprisingly called `compiler_error`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/error.hpp" 10 26 >}}
|
||||
|
||||
We'll put some 'common' exception functionality
|
||||
into the `print_location` and `print_about` methods. If the error
|
||||
has an associated location, the former method will print that
|
||||
location to the screen. We don't always want to highlight
|
||||
the part of the code that caused the error: for instance,
|
||||
an invalid data type definition may span several lines,
|
||||
and coloring that whole section of text red would be
|
||||
too much. To address this, we add the `highlight`
|
||||
boolean argument, which can be used to switch the
|
||||
colors on and off. The `print_about` method
|
||||
will simply print the `what()` message of the exception,
|
||||
in addition to the "specific" error that occurred (stored
|
||||
in `description`). Here are the implementations of the
|
||||
functions:
|
||||
|
||||
{{< codelines "C++" "compiler/13/error.cpp" 3 16 >}}
|
||||
|
||||
We will also add a `pretty_print` method to all of
|
||||
our exceptions. This method will handle
|
||||
all the exception-specific printing logic.
|
||||
For the generic compiler error, this means
|
||||
simply printing out the error text and the location:
|
||||
|
||||
{{< codelines "C++" "compiler/13/error.cpp" 18 21 >}}
|
||||
|
||||
For `type_error`, this logic slightly changes,
|
||||
enabling colors when printing the location:
|
||||
|
||||
{{< codelines "C++" "compiler/13/error.cpp" 27 30 >}}
|
||||
|
||||
Finally, for `unification_error`, we also include
|
||||
the code to print out the two types that our
|
||||
compiler could not unify:
|
||||
|
||||
{{< codelines "C++" "compiler/13/error.cpp" 32 41 >}}
|
||||
|
||||
There's a subtle change here. Compared to the previous
|
||||
type-printing code (which we had in `main`), what
|
||||
we wrote here deals with "expected" and "actual" types.
|
||||
The `left` type passed to the exception is printed
|
||||
first, and is treat like the "correct" type. The
|
||||
`right` type, on the other hand, is treated
|
||||
like the "wrong" type that should have been
|
||||
unifiable with `left`. This will affect the
|
||||
calling conventions of our unification code.
|
||||
|
||||
Now, we can go through and find all the places where
|
||||
we `throw 0`. One such place was in the data type
|
||||
definition code, where declaring the same type parameter
|
||||
twice is invalid. We replace the `0` with a
|
||||
`compiler_error`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/definition.cpp" 66 69 >}}
|
||||
|
||||
Not all `throw 0` statements should become exceptions.
|
||||
For example, here's code from the previous version of
|
||||
the compiler:
|
||||
|
||||
{{< codelines "C++" "compiler/12/definition.cpp" 123 127 >}}
|
||||
|
||||
If a definition `def_defn` has a dependency on a "nearby" (declared
|
||||
in the same group) definition called `dependency`, and if
|
||||
`dependency` does not exist within the same definition group,
|
||||
we throw an exception. But this error is impossible
|
||||
for a user to trigger: the only reason for a variable to appear
|
||||
in the `nearby_variables` vector is that it was previously
|
||||
found in the definition group. Here's the code that proves this
|
||||
(from the current version of the compiler):
|
||||
|
||||
{{< codelines "C++" "compiler/13/definition.cpp" 102 106 >}}
|
||||
|
||||
Not being able to find the variable in the definition group
|
||||
is a compiler bug, and should never occur. So, instead
|
||||
of throwing an exception, we'll use an assertion:
|
||||
|
||||
{{< codelines "C++" "compiler/13/definition.cpp" 128 128 >}}
|
||||
|
||||
For more complicated error messages, we can use a `stringstream`.
|
||||
Here's an example from `parsed_type`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/parsed_type.cpp" 16 23 >}}
|
||||
|
||||
In general, this change is also rather mechanical. Before we
|
||||
move on, to maintain a balance between exceptions and assertions, here
|
||||
are a couple more assertions from `type_env`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/type_env.cpp" 81 82 >}}
|
||||
|
||||
Once again, it should not be possible for the compiler
|
||||
to try generalize the type of a variable that doesn't
|
||||
exist, and nor should generalization occur twice.
|
||||
|
||||
While we're on the topic of types, let's talk about
|
||||
`type_mgr::unify`. In practice, I suspect that a lot of
|
||||
errors in our compiler will originate from this method.
|
||||
However, at present, this method does not in any way
|
||||
track the locations of where a unification error occurred.
|
||||
To fix this, we add a new `loc` parameter to `unify`,
|
||||
which we make optional to allow for unification without
|
||||
a known location. Here's the declaration:
|
||||
|
||||
{{< codelines "C++" "compiler/13/type.hpp" 92 92 >}}
|
||||
|
||||
The change to the implementation is mechanical and repetitive,
|
||||
so instead of showing you the whole method, I'll settle for
|
||||
a couple of lines:
|
||||
|
||||
{{< codelines "C++" "compiler/13/type.cpp" 121 122 >}}
|
||||
|
||||
We want to make sure that a location provided to the
|
||||
top-level call to `unify` is also forwarded to the
|
||||
recursive calls, so we have to explicitly add it
|
||||
to the call.
|
||||
|
||||
We'll also have to update the 'main' code to call the
|
||||
`pretty_print` methods, but there's another big change
|
||||
that we're going to make before then. However, once that
|
||||
change is made, our errors will look a lot better.
|
||||
Here is what's printed out to the user when a type error
|
||||
occurs:
|
||||
|
||||
```
|
||||
an error occured while checking the types of the program: failed to unify types
|
||||
occuring on line 2:
|
||||
3 + False
|
||||
the expected type was:
|
||||
Int
|
||||
while the actual type was:
|
||||
Bool
|
||||
```
|
||||
|
||||
Here's an error that was previously a `throw 0` statement in our code:
|
||||
|
||||
```
|
||||
an error occured while compiling the program: type variable a used twice in data type definition.
|
||||
occuring on line 1:
|
||||
data Pair a a = { MkPair a a }
|
||||
```
|
||||
|
||||
Now, not only have we eliminated the lazy uses of `throw 0` in our
|
||||
code, but we've also improved the presentation of the errors
|
||||
to the user!
|
||||
|
||||
### Rethinking Name Mangling
|
||||
In the previous post, I said the following:
|
||||
|
||||
> One more thing. Let’s adopt the convention of storing mangled names into the compilation environment. This way, rather than looking up mangled names only for global functions, which would be a ‘gotcha’ for anyone working on the compiler, we will always use the mangled names during compilation.
|
||||
|
||||
Now that I've had some more time to think about it
|
||||
(and now that I've returned to the compiler after
|
||||
a brief hiatus), I think that this was not the right call.
|
||||
Mangled names make sense when translating to LLVM; we certainly
|
||||
don't want to declare two LLVM functions
|
||||
{{< sidenote "right" "mangling-note" "with the same name." >}}
|
||||
By the way, LLVM has its own name mangling functionality. If you
|
||||
declare two functions with the same name, they'll appear as
|
||||
<code>function</code> and <code>function.0</code>. Since LLVM
|
||||
uses the <code>Function*</code> C++ values to refer to functions,
|
||||
as long as we keep them seaprate on <em>our</em> end, things will
|
||||
work.<br>
|
||||
<br>
|
||||
However, in our compiler, name mangling occurs before LLVM is
|
||||
introduced, at translation time. We could create LLVM functions
|
||||
at that time, too, and associate them with variables. But then,
|
||||
our G-machine instructions will be coupled to LLVM, which
|
||||
would not be as clean.
|
||||
{{< /sidenote >}}
|
||||
But things are different for local variables. Our local variables
|
||||
are graphs on a stack, and are not actually compiled to LLVM
|
||||
definitions. It doesn't make sense to mangle their names, since
|
||||
their names aren't present anywhere in the final executable.
|
||||
It's not even "consistent" to mangle them, since global definitions
|
||||
are compiled directly to __PushGlobal__ instructions, while local
|
||||
variables are only referenced through the current `env`.
|
||||
So, I opted to reverse my decision. We will go back to
|
||||
placing variable names directly into `env_var`. Here's
|
||||
an example of this from `global_scope.cpp`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/global_scope.cpp" 6 8 >}}
|
||||
|
||||
Now that we've started using assertions, I also think it's worth
|
||||
to put our new invariant -- "only global definitions have mangled
|
||||
names" -- into code:
|
||||
|
||||
{{< codelines "C++" "compiler/13/type_env.cpp" 36 45 >}}
|
||||
|
||||
Furthermore, we'll _require_ that a global definition
|
||||
has a mangled name. This way, we can be more confident
|
||||
that a variable from a __PushGlobal__ instruction
|
||||
is referencing the right function. To achieve
|
||||
this, we change `get_mangled_name` to stop
|
||||
returning the input string if a mangled name was not
|
||||
found; doing so makes it impossible to check if a mangled
|
||||
name was explicitly defined. Instead,
|
||||
we add two assertions. First, if an environment scope doesn't
|
||||
contain a variable, then it _must_ have a parent.
|
||||
If it does contain variable, that variable _must_ have
|
||||
a mangled name. We end up with the following:
|
||||
|
||||
{{< codelines "C++" "compiler/13/type_env.cpp" 47 55 >}}
|
||||
|
||||
For this to work, we make one more change. Now that we've
|
||||
enabled C++17, we have access to `std::optional`. We
|
||||
can thus represent the presence or absence of mangled
|
||||
names using an optional field, rather than with the empty string `""`.
|
||||
I hear that C++ compilers have pretty good
|
||||
[empty string optimizations](https://www.youtube.com/watch?v=kPR8h4-qZdk),
|
||||
but nonetheless, I think it makes more sense semantically
|
||||
to use "absent" (`nullopt`) instead of "empty" (`""`).
|
||||
Here's the definition of `type_env::variable_data` now:
|
||||
|
||||
{{< codelines "C++" "compiler/13/type_env.hpp" 16 25 >}}
|
||||
|
||||
Since looking up a mangled name for non-global variable
|
||||
{{< sidenote "right" "unrepresentable-note" "will now result in an assertion failure," >}}
|
||||
A very wise human at the very dawn of our species once said,
|
||||
"make illegal states unrepresentable". Their friends and family were a little
|
||||
busy making a fire, and didn't really understand what the heck they meant. Now,
|
||||
we kind of do.<br>
|
||||
<br>
|
||||
It's <em>possible</em> for our <code>type_env</code> to include a
|
||||
<code>variable_data</code> entry that is both global and has no mangled
|
||||
name. But it doesn't have to be this way. We could define two subclasses
|
||||
of <code>variable_data</code>, one global and one local,
|
||||
where only the global one has a <code>mangled_name</code>
|
||||
field. It would be impossible to reach this assertion failure then.
|
||||
{{< /sidenote >}} we have to change
|
||||
`ast_lid::compile` to only call `get_mangled_name` once
|
||||
it ensures that the variable being compiled is, in fact,
|
||||
global:
|
||||
|
||||
{{< codelines "C++" "compiler/13/ast.cpp" 58 63 >}}
|
||||
|
||||
Since all global functions now need to have mangled
|
||||
names, we run into a bit of a problem. What are
|
||||
the mangled names of `(+)`, `(-)`, and so on? We could
|
||||
continue to hardcode them as `plus`, `minus`, etc., but this can
|
||||
(and currently does!) lead to errors. Consider the following
|
||||
piece of code:
|
||||
|
||||
```
|
||||
defn plus x y = { x + y }
|
||||
defn main = { plus 320 6 }
|
||||
```
|
||||
|
||||
We've hardcoded the mangled name of `(+)` to be `plus`. However,
|
||||
`global_scope` doesn't know about this, so when the actual
|
||||
`plus` function gets translated, it also gets assigned the
|
||||
mangled name `plus`. The name is also overwritten in the
|
||||
`llvm_context`, which effectively means that `(+)` is
|
||||
now compiled to a call of the user-defined `plus` function.
|
||||
If we didn't overwrite the name, we would've run into an assertion
|
||||
failure in this scenario anyway. In short, this example illustrates
|
||||
an important point: mangling information needs to be available
|
||||
outside of a `global_scope`. We don't want to do this by having
|
||||
every function take in a `global_scope` to access the mangling
|
||||
information; instead, we'll store the mangling information in
|
||||
a new `mangler` class, which `global_scope` will take as an argument.
|
||||
The new class is very simple:
|
||||
|
||||
{{< codelines "C++" "compiler/13/mangler.hpp" 5 11 >}}
|
||||
|
||||
As with `parse_driver`, `global_scope` takes `mangler` by reference
|
||||
and stores a pointer:
|
||||
|
||||
{{< codelines "C++" "compiler/13/global_scope.hpp" 50 50 >}}
|
||||
|
||||
The implementation of `new_mangled_name` doesn't change, so I'm
|
||||
not going to show it here. With this new mangling information
|
||||
in hand, we can now correctly set the mangled names of binary
|
||||
operators:
|
||||
|
||||
{{< codelines "C++" "compiler/13/compiler.cpp" 22 27 >}}
|
||||
|
||||
Wait a moment, what's a `compiler`? Let's talk about that next.
|
||||
|
||||
### A Top-Level Class
|
||||
Now that we've moved name mangling out of `global_scope`, we have
|
||||
to put it somewhere. The same goes for global definition group
|
||||
and the file manager that are given to `parse_driver`. The two
|
||||
classes _make use_ of the other data, but they don't _own it_.
|
||||
That's why they take it by reference, and store it as a pointer.
|
||||
They're just temporarily allowed access.
|
||||
|
||||
So, what should be the owner of all of these disparate components?
|
||||
Thus far, that has been the `main` function, or the utility
|
||||
functions that it calls out to. However, this is sloppy:
|
||||
we have related data and operations on it, but we don't group
|
||||
them into an object. We can group all of the components of our
|
||||
compiler into a `compiler` object, and leave `main.cpp` with
|
||||
exception printing code.
|
||||
|
||||
The definition of the `compiler` class begins with all of the data
|
||||
structures that we use in the process of compilation:
|
||||
|
||||
{{< codelines "C++" "compiler/13/compiler.hpp" 12 20 >}}
|
||||
|
||||
There's a loose ordering to these fields. In C++, class members are
|
||||
initialized in the order they are declared; we therefore want to make
|
||||
sure that fields that are depended on by other fields are initialized first.
|
||||
Otherwise, I tried to keep the order consistent with the conceptual path
|
||||
of the code through the compiler.
|
||||
* Parsing happens first, so we begin with `parse_driver`, which needs a
|
||||
`file_manager` (to populate with line information) and a `definition_group`
|
||||
(to receive the global definitions from the parser).
|
||||
* We then proceed to typechecking, for which we use a global `type_env_ptr`
|
||||
(to define the built-in functions and constructors) and a `type_mgr` (to
|
||||
manage the assignments of type variables).
|
||||
* Once a program is typechecked, we transform it, eliminating local
|
||||
function definitions and lambda functions. This is done by storing
|
||||
newly-emitted global functions into the `global_scope`, which requires a
|
||||
`mangler` to generate new names for the target functions.
|
||||
* Finally, to generate LLVM IR, we need our `llvm_context` class.
|
||||
|
||||
The methods of the compiler are arranged similarly:
|
||||
|
||||
{{< codelines "C++" "compiler/13/compiler.hpp" 22 31 >}}
|
||||
|
||||
The methods go as follows:
|
||||
|
||||
* `add_default_types` adds the built-in types to the `global_env`.
|
||||
At this point, these types only include `Int`.
|
||||
* `add_binop_type` adds a single binary operator to the global
|
||||
type environment. We saw its implementation earlier: it deals
|
||||
with both binding a type, and setting a mangled name.
|
||||
* `add_default_types` adds the types for each binary operator.
|
||||
* `parse`, `typecheck`, `translate` and `compile` all do exactly
|
||||
what they say. In this case, compilation refers to creating G-machine
|
||||
instructions.
|
||||
* `create_llvm_binop` creates an internal function that forces the
|
||||
evaluation of its two arguments, and actually applies the given binary
|
||||
operator. Recall that the `(+)` in user code constructs a call to this
|
||||
function, but leaves it unevaluated until it's needed.
|
||||
* `generate_llvm` converts all the definitions in `global_scope`, which
|
||||
are at this point compiled into G-machine `instruction`s, into LLVM IR.
|
||||
* `output_llvm` contains all the code to actually generate an object
|
||||
file from the LLVM IR.
|
||||
|
||||
These functions are mostly taken from part 12's `main.cpp`, and adjusted
|
||||
to use the `compiler`'s members rather than local definitions or arguments.
|
||||
You should compare part 12's
|
||||
[`main.cpp`](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/12/main.cpp)
|
||||
file with the
|
||||
[`compiler.cpp`](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/13/compiler.cpp)
|
||||
file that we end up with at the end of this post.
|
||||
|
||||
Next, we have the compiler's constructor, and its `operator()`. The
|
||||
latter, analogously to our parsing driver, will trigger the compilation
|
||||
process. Their implementations are straightforward:
|
||||
|
||||
{{< codelines "C++" "compiler/13/compiler.cpp" 131 145 >}}
|
||||
|
||||
We also add a couple of methods to give external code access to
|
||||
some of the compiler's data structures. I omit their (trivial)
|
||||
implementations, but they have the following signatures:
|
||||
|
||||
{{< codelines "C++" "compiler/13/compiler.hpp" 35 36 >}}
|
||||
|
||||
With all the compilation code tucked into our new `compiler` class,
|
||||
`main` becomes very simple. We also finally get to use our exception
|
||||
pretty printing code:
|
||||
|
||||
{{< codelines "C++" "compiler/13/main.cpp" 11 27 >}}
|
||||
|
||||
With this, we complete our transition to a compiler object.
|
||||
All that's left is to clean up the code style.
|
||||
|
||||
### Keeping Things Private
|
||||
Hand-writing or generating hundreds of trivial getters and setters
|
||||
for the fields of a data class (which is standard in the world of Java) seems
|
||||
absurd to me. So, for most of this project, I stuck with
|
||||
`struct`s, rather than classes. But this is not a good policy
|
||||
to apply _everywhere_. I still think it makes sense to make
|
||||
data structures like `ast` and `type` public-by-default;
|
||||
however, I _don't_ think that way about classes like `type_mgr`,
|
||||
`llvm_context`, `type_env`, and `env`. All of these have information
|
||||
that we should never be accessing directly. Some guard this
|
||||
information with assertions. In short, it should be protected.
|
||||
|
||||
For most classes, the changes are mechanical. For instance, we
|
||||
can make `type_env` a class simply by changing its declaration,
|
||||
and marking all of its functions public. This requires a slight
|
||||
refactoring of a line that used its `parent` field. Here's
|
||||
what it used to be (in context):
|
||||
|
||||
{{< codelines "C++" "compiler/12/main.cpp" 57 60 >}}
|
||||
|
||||
And here's what it is now:
|
||||
|
||||
{{< codelines "C++" "compiler/13/compiler.cpp" 55 58 >}}
|
||||
|
||||
Rather than traversing the chain of environments from
|
||||
the body of the definition, we just use the definition's
|
||||
own `env_ptr`. This is cleaner and more explicit, and
|
||||
it helps us not use the private members of `type_env`!
|
||||
|
||||
The deal with `env` is about as simple. We just make
|
||||
it and its two descendants classes, and mark their
|
||||
methods and constructors public. The same
|
||||
goes for `global_scope`. To make `type_mgr`
|
||||
a class, we have to add a new method: `lookup`.
|
||||
Here's its implementation:
|
||||
|
||||
{{< codelines "C++" "compiler/13/type.cpp" 81 85 >}}
|
||||
|
||||
It's used in `type_var::print` as follows:
|
||||
|
||||
{{< codelines "C++" "compiler/13/type.cpp" 28 35 >}}
|
||||
|
||||
We can't use `resolve` here because it takes (and returns)
|
||||
a `type_ptr`. If we make it _take_ a `type*`, it won't
|
||||
be able to return its argument if it's already resolved. If we
|
||||
allow it to _return_ `type*`, we won't have an owning
|
||||
reference. We also don't want to duplicate the
|
||||
method just for this one call. Notice, though, how similar
|
||||
`type_var::print`/`lookup` and `resolve` are in terms of execution.
|
||||
|
||||
The change for `llvm_context` requires a little more work.
|
||||
Right now, `ctx.builder` is used a _lot_ in `instruction.cpp`.
|
||||
Since we don't want to forward each of the LLVM builder methods,
|
||||
and since it feels weird to make `llvm_context` extend `llvm::IRBuilder`,
|
||||
we'll just provide a getter for the `builder` field. The
|
||||
same goes for `module`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/llvm_context.hpp" 46 47 >}}
|
||||
|
||||
Here's what some of the code from `instruction.cpp` looks like now:
|
||||
|
||||
{{< codelines "C++" "compiler/13/instruction.cpp" 144 145 >}}
|
||||
|
||||
Right now, the `ctx` field of the `llvm_context` (which contains
|
||||
the `llvm::LLVMContext`) is only externally used to create
|
||||
instances of `llvm::BasicBlock`. We'll add a proxy method
|
||||
for this functionality:
|
||||
|
||||
{{< codelines "C++" "compiler/13/llvm_context.cpp" 174 176 >}}
|
||||
|
||||
Finally, `instruction_pushglobal` needs to access the
|
||||
`llvm::Function` instances that we create in the process
|
||||
of compilation. We add a new `get_custom_function` method
|
||||
to support this, which automatically prefixes the function
|
||||
name with `f_`, much like `create_custom_function`:
|
||||
|
||||
{{< codelines "C++" "compiler/13/llvm_context.cpp" 292 294 >}}
|
||||
|
||||
I think that's enough. If we chose to turn more compiler
|
||||
data structures into classes, I think we would've quickly drowned
|
||||
in one-line getter and setter methods.
|
||||
|
||||
That's all for the cleanup! We've added locations and more errors
|
||||
to the compiler, stopped throwing `0` in favor of proper exceptions
|
||||
or assertions, made name mangling more reasonable, fixed a bug with
|
||||
accidentally shadowing default functions, organized our compilation
|
||||
process into a `compiler` class, and made more things into classes.
|
||||
In the next post, I hope to tackle __strings__ and __Input/Output__.
|
||||
I also think that implementing __modules__ would be a good idea,
|
||||
though at the moment I don't know too much on the subject. I hope
|
||||
you'll join me in my future writing!
|
||||
|
||||
### Appendix: Optimization
|
||||
When I started working on the compiler after the previous post,
|
||||
I went a little overboard. I started working on optimizing the generated programs,
|
||||
but eventually decided I wasn't doing a
|
||||
{{< sidenote "right" "good-note" "good enough" >}}
|
||||
I think authors should feel a certain degree of responsibility
|
||||
for the content they create. If I do something badly, somebody
|
||||
else trusts me and learns from it, who knows how much damage I've done.
|
||||
I try not to do damage.<br>
|
||||
<br>
|
||||
If anyone reads what I write, anyway!
|
||||
{{< /sidenote >}} job to present it to others,
|
||||
and scrapped that part of the compiler altogether. I'm not
|
||||
sure if I will try again in the near future. But,
|
||||
if you're curious about optimization, here are a few avenues
|
||||
I've explored or thought about:
|
||||
|
||||
* __Unboxing numbers__. Right now, numbers are allocated and garbage
|
||||
collected just like the rest of the graph nodes. This is far from ideal.
|
||||
We could use pointers to represent numbers, by tagging their most significant
|
||||
bits on 64-bit CPUs. Rather than allocating a node, the runtime will just
|
||||
cast a number to a pointer, tag it, and push it on the stack.
|
||||
* __Converting enumeration data types to numbers__. If no constructor
|
||||
of a data type takes any arguments, then the tag uniquely identifies
|
||||
each constructor. Combined with unboxed numbers, this can save unnecessary
|
||||
allocations and memory accesses.
|
||||
* __Special treatment for global constants__. It makes sense for
|
||||
global functions to be converted into LLVM functions, but the
|
||||
same is not the case for
|
||||
{{< sidenote "right" "constant-note" "constants." >}}
|
||||
Yeah, yeah, a constant is just a nullary function. Get
|
||||
out of here with your pedantry!
|
||||
{{< /sidenote >}} We can find a way to
|
||||
initialize global constants once, which would save some work. To
|
||||
make more constants suitable for this, we could employ
|
||||
[monomorphism restriction](https://wiki.haskell.org/Monomorphism_restriction).
|
||||
* __Optimizing stack operations.__ If you read through the LLVM IR
|
||||
we produce, you can see a lot of code that peeks at something twice,
|
||||
or pops-then-pushes the same value, or does other absurd things. LLVM
|
||||
isn't aware of the semantics of our stacks, but perhaps we could write an
|
||||
optimization pass to deal with some of the more blatant instances of
|
||||
this issue.
|
||||
|
||||
If you attempt any of these, let me know how it goes, please!
|
||||
311
content/blog/boolean_values.md
Normal file
311
content/blog/boolean_values.md
Normal file
@@ -0,0 +1,311 @@
|
||||
---
|
||||
title: "How Many Values Does a Boolean Have?"
|
||||
date: 2020-08-21T23:05:55-07:00
|
||||
tags: ["Java", "Haskell", "C and C++"]
|
||||
---
|
||||
|
||||
A friend of mine recently had an interview for a software
|
||||
engineering position. They later recounted to me the content
|
||||
of the technical questions that they had been asked. Some had
|
||||
been pretty standard:
|
||||
|
||||
* __"What's the difference between concurrency
|
||||
and parallelism?"__ -- a reasonable question given that Go was
|
||||
the company's language of choice.
|
||||
* __"What's the difference between a method and a function?"__ --
|
||||
a little more strange, in my opinion, since the difference
|
||||
is of little _practical_ use.
|
||||
|
||||
But then, they recounted a rather interesting question:
|
||||
|
||||
> How many values does a bool have?
|
||||
|
||||
Innocuous at first, isn't it? Probably a bit simpler, in fact,
|
||||
than the questions about methods and functions, concurrency
|
||||
and parallelism. It's plausible that a candidate
|
||||
has not done much concurrent or parallel programming in their
|
||||
life, or that they came from a language in which functions
|
||||
were rare and methods were ubiquitous. It's not plausible,
|
||||
on the other hand, that a candidate applying to a software
|
||||
engineering position has not encountered booleans.
|
||||
|
||||
If you're genuinely unsure about the answer to the question,
|
||||
I think there's no reason for me to mess with you. The
|
||||
simple answer to the question -- as far as I know -- is that a boolean
|
||||
has two values. They are `true` and `false` in Java, or `True` and `False`
|
||||
in Haskell, and `1` and `0` in C. A boolean value is either true or false.
|
||||
|
||||
So, what's there to think about? There are a few things, _ackshually_.
|
||||
Let's explore them, starting from the theoretical perspective.
|
||||
|
||||
### Types, Values, and Expressions
|
||||
Boolean, or `bool`, is a type. Broadly speaking, a type
|
||||
is a property of _something_ that defines what the _something_
|
||||
means and what you can do with it. That _something_ can be
|
||||
several things; for our purposes, it can either be an
|
||||
_expression_ in a programming language (like those in the form `fact(n)`)
|
||||
or a value in that same programming language (like `5`).
|
||||
|
||||
Dealing with values is rather simple. Most languages have finite numbers,
|
||||
usually with \\(2^{32}\\) values, which have type `int`,
|
||||
`i32`, or something in a similar vein. Most languages also have
|
||||
strings, of which there are as many as you have memory to contain,
|
||||
and which have the type `string`, `String`, or occasionally
|
||||
the more confusing `char*`. Most languages also have booleans,
|
||||
as we discussed above.
|
||||
|
||||
The deal with expressions is a more interesting. Presumably
|
||||
expressions evaluate to values, and the type of an expression
|
||||
is then the type of values it can yield. Consider the following
|
||||
snippet in C++:
|
||||
|
||||
```C
|
||||
int square(int x) {
|
||||
return x * x;
|
||||
}
|
||||
```
|
||||
|
||||
Here, the expression `x` is known to have type `int` from
|
||||
the type signature provided by the user. Multiplication
|
||||
of integers yields an integer, and so the type of `x*x` is also
|
||||
of type `int`. Since `square(x)` returns `x*x`, it is also
|
||||
of type `int`. So far, so good.
|
||||
|
||||
Okay, how about this:
|
||||
|
||||
```C++
|
||||
int meaningOfLife() {
|
||||
return meaningOfLife();
|
||||
}
|
||||
```
|
||||
|
||||
No, wait, doesn't say "stack overflow" just yet. That's no fun.
|
||||
And anyway, this is technically a tail call, so maybe our
|
||||
C++ compiler can avoid growing the stack. And indeed,
|
||||
flicking on the `-O2` flag in this [compiler explorer example](https://godbolt.org/z/9cv4nY),
|
||||
we can see that no stack growth is necessary: it's just
|
||||
an infinite loop. But `meaningOfLife` will never return a value. One could say
|
||||
this computation _diverges_.
|
||||
|
||||
Well, if it diverges, just throw the expression out of the window! That's
|
||||
no `int`! We only want _real_ `int`s!
|
||||
|
||||
And here, we can do that. But what about the following:
|
||||
|
||||
```C++
|
||||
inf_int collatz(inf_int x) {
|
||||
if(x == 1) return 1;
|
||||
if(x % 2 == 0) return collatz(x/2);
|
||||
return collatz(x * 3 + 1);
|
||||
}
|
||||
```
|
||||
|
||||
Notice that I've used the fictitious type
|
||||
`inf_int` to represent integers that can hold
|
||||
arbitrarily large integer values, not just the 32-bit ones.
|
||||
That is important for this example, and I'll explain why shortly.
|
||||
|
||||
The code in the example is a simulation of the process described
|
||||
in the [Collatz conjecture](https://en.wikipedia.org/wiki/Collatz_conjecture).
|
||||
Given an input number `x`, if the number is even, it's divided in half,
|
||||
and the process continues with the halved number. If, on the other
|
||||
hand, the number is odd, it's multiplied by 3, 1 is added to it,
|
||||
and the process continues with _that_ number. The only way for the
|
||||
process to terminate is for the computation to reach the value 1.
|
||||
|
||||
Why does this matter? Because as of right now, __nobody knows__
|
||||
whether or not the process terminates for all possible input numbers.
|
||||
We have a strong hunch that it does; we've checked a __lot__
|
||||
of numbers and found that the process terminates for them.
|
||||
This is why 32-bit integers are not truly sufficient for this example;
|
||||
we know empirically that the function will terminate for them.
|
||||
|
||||
But why does _this_ matter? Well, it matters because we don't know
|
||||
whether or not this function will diverge, and thus, we can't
|
||||
'throw it out of the window' like we wanted to with `meaningOfLife`!
|
||||
In general, it's _impossible to tell_ whether or not a program will
|
||||
terminate; that is the [halting problem](https://en.wikipedia.org/wiki/Halting_problem).
|
||||
So, what do we do?
|
||||
|
||||
It turns out to be convenient -- formally -- to treat the result of a diverging computation
|
||||
as its own value. This value is usually called 'bottom', and written as \\(\\bot\\).
|
||||
Since in most programming languages, you can write a nonterminating expression or
|
||||
function of any type, this 'bottom' is included in _all_ types. So in fact, the
|
||||
possible values of `unsigned int` are \\(\\bot, 0, 1, 2, ...\\) and so on.
|
||||
As you may have by now guessed, the same is true for a boolean: we have \\(\\bot\\), `true`, and `false`.
|
||||
|
||||
### Haskell and Bottom
|
||||
You may be thinking:
|
||||
|
||||
> Now he's done it; he's gone off the deep end with all that programming language
|
||||
theory. Tell me, Daniel, where the heck have you ever encountered \\(\\bot\\) in
|
||||
code? This question was for a software engineering interview, after all!
|
||||
|
||||
You're right; I haven't _specifically_ seen the symbol \\(\\bot\\) in my time
|
||||
programming. But I have frequently used an equivalent notation for the same idea:
|
||||
`undefined`. In fact, here's a possible definition of `undefined` in Haskell:
|
||||
|
||||
```
|
||||
undefined = undefined
|
||||
```
|
||||
|
||||
Just like `meaningOfLife`, this is a divergent computation! What's more is that
|
||||
the type of this computation is, in Haskell, `a`. More explicitly -- and retreating
|
||||
to more mathematical notation -- we can write this type as: \\(\\forall \\alpha . \\alpha\\).
|
||||
That is, for any type \\(\\alpha\\), `undefined` has that type! This means
|
||||
`undefined` can take on _any_ type, and so, we can write:
|
||||
|
||||
```Haskell
|
||||
myTrue :: Bool
|
||||
myTrue = True
|
||||
|
||||
myFalse :: Bool
|
||||
myFalse = False
|
||||
|
||||
myBool :: Bool
|
||||
myBool = undefined
|
||||
```
|
||||
|
||||
In Haskell, this is quite useful. For instance, if one's in the middle
|
||||
of writing a complicated function, and wants to check their work so far,
|
||||
they can put 'undefined' for the part of the function they haven't written.
|
||||
They can then compile their program; the typechecker will find any mistakes
|
||||
they've made so far, but, since the type of `undefined` can be _anything_,
|
||||
that part of the program will be accepted without second thought.
|
||||
|
||||
The language Idris extends this practice with the idea of typed holes,
|
||||
where you can leave fragments of your program unwritten, and ask the
|
||||
compiler what kind of _thing_ you need to write to fill that hole.
|
||||
|
||||
### Java and `null`
|
||||
Now you may be thinking:
|
||||
|
||||
> This whole deal with Haskell's `undefined` is beside the point; it doesn't
|
||||
really count as a value, since it's just a nonterminating
|
||||
expression. What you're doing is a kind of academic autofellatio.
|
||||
|
||||
Alright, I can accept this criticism. Perhaps just calling a nonterminating
|
||||
function a value _is_ far-fetched (even though in [denotational semantics](https://en.wikipedia.org/wiki/Denotational_semantics)
|
||||
we _do_ extend types with \\(\\bot\\)). But denotational semantics are not
|
||||
the only place where types are implicitly extend with an extra value;
|
||||
let's look at Java.
|
||||
|
||||
In Java, we have `null`. At the
|
||||
core language level, any function or method that accepts a class can also take `null`;
|
||||
if `null` is not to that function or method's liking, it has to
|
||||
explicitly check for it using `if(x == null)`.
|
||||
|
||||
This `null` value does not at first interact with booleans.
|
||||
After all, Java's booleans are not classes. Unlike classes, which you have
|
||||
to allocate using `new`, you can just throw around `true` and `false` as you see
|
||||
fit. Also unlike classes, you simply can't assign `null` to a boolean value.
|
||||
|
||||
The trouble is, the parts of Java dealing with _generics_, which allow you to write
|
||||
polymorphic functions, can't handle 'primitives' like `bool`. If you want to have an `ArrayList`
|
||||
of something, that something _must_ be a class.
|
||||
But what if you really _do_ want an `ArrayList` of booleans? Java solves this problem by introducing
|
||||
'boxed' booleans: they're primitives wrapped in a class, called `Boolean`. This class
|
||||
can then be used for generics.
|
||||
|
||||
But see, this is where `null` has snuck in again. By allowing `Boolean` to be a class
|
||||
(thereby granting it access to generics), we've also given it the ability to be null.
|
||||
This example is made especially compelling because Java supports something
|
||||
they call [autoboxing](https://docs.oracle.com/javase/tutorial/java/data/autoboxing.html):
|
||||
you can directly assign a primitive to a variable of the corresponding boxed type.
|
||||
Consider the example:
|
||||
|
||||
```Java
|
||||
Boolean myTrue = true;
|
||||
Boolean myFalse = false;
|
||||
Boolean myBool = null;
|
||||
```
|
||||
|
||||
Beautiful, isn't it? Better yet, unlike Haskell, where you can't _really_
|
||||
check if your `Bool` is `undefined` (because you can't tell whether
|
||||
a non-terminating computation is as such), you can very easily
|
||||
check if your `Boolean` is `true`, `false`, or `null`:
|
||||
|
||||
```Java
|
||||
assert myTrue != myFalse;
|
||||
assert myFalse != myBool;
|
||||
assert myTrue != myBool;
|
||||
```
|
||||
|
||||
We're okay to use `!=` here, instead of `equals`, because it so happens
|
||||
each boxed instance of a `boolean` value
|
||||
[refers to the same `Boolean` object](https://stackoverflow.com/questions/28636738/equality-of-boxed-boolean).
|
||||
In fact, this means that a `Boolean` variable can have __exactly__ 3 values!
|
||||
|
||||
### C and Integers
|
||||
Oh the luxury of having a type representing booleans in your language!
|
||||
It's almost overly indulgent compared to the spartan minimalism of C.
|
||||
In C, boolean conditions are represented as numbers. You can perhaps get
|
||||
away with throwing around `char` or `short int`, but even then,
|
||||
these types allow far more values than two!
|
||||
|
||||
```C
|
||||
unsigned char test = 255;
|
||||
while(test) test -= 1;
|
||||
```
|
||||
|
||||
This loop will run 255 times, thereby demonstrating
|
||||
that C has at least 255 values that can be used
|
||||
to represent the boolean `true`.
|
||||
|
||||
There are other languages
|
||||
with this notion of 'truthy' and 'falsey' values, in which
|
||||
something not exactly `true` or `false` can be used as a condition. However,
|
||||
some of them differ from C in that they also extend this idea
|
||||
to equality. In JavaScript:
|
||||
|
||||
```JavaScript
|
||||
console.assert(true == 1)
|
||||
console.assert(false == 0)
|
||||
```
|
||||
|
||||
Then, there are still exactly two distinct boolean values
|
||||
modulo `==`. No such luck in C, though! We have 256 values that fit in `unsigned char`,
|
||||
all of which are also distinct modulo `==`. Our boolean
|
||||
variable can contain all of these values. And there is no
|
||||
respite to be found with `enum`s, either. We could try define:
|
||||
|
||||
```C
|
||||
enum bool { TRUE, FALSE };
|
||||
```
|
||||
|
||||
Unfortunately, all this does is define `bool` to be a numeric
|
||||
type that can hold at least 2 distinct values, and define
|
||||
numeric constants `TRUE` and `FALSE`. So in fact, you can
|
||||
_still_ write the following code:
|
||||
|
||||
```C
|
||||
enum bool b1 = TRUE;
|
||||
enum bool b2 = FALSE;
|
||||
enum bool b3 = 15;
|
||||
```
|
||||
|
||||
And so, no matter how hard you try, your 'boolean'
|
||||
variable can have many, many values!
|
||||
|
||||
### Conclusion
|
||||
I think that 'how many values does a boolean have' is a strange
|
||||
question. Its purpose can be one of two things:
|
||||
|
||||
* The interviewer expected a long-form response such as this one.
|
||||
This is a weird expectation for a software engineering candidate -
|
||||
how does knowing about \\(\\bot\\), `undefined`, or `null` help in
|
||||
creating software, especially if this information is irrelevant
|
||||
to the company's language of choice?
|
||||
* The interviewer expected the simple answer. In that case,
|
||||
my previous observation applies: what software engineering
|
||||
candidate has _not_ seen a boolean in their time programming?
|
||||
Surely candidates are better screened before they are offered
|
||||
an interview?
|
||||
|
||||
Despite the question's weirdness, I think that the resulting
|
||||
investigation of the matter -- outside of the interview setting --
|
||||
is useful, and perhaps, in a way, enlightening. It may help
|
||||
one understand the design choices made in _their_ language of choice,
|
||||
and how those choices shape the code that they write.
|
||||
|
||||
That's all I have! I hope that you found it interesting.
|
||||
@@ -103,6 +103,17 @@ needed to compute the final answer can exist, unsimplified, in the tree.
|
||||
Why don't we draw a few graphs to get familiar with the idea?
|
||||
|
||||
### Visualizing Graphs and Their Reduction
|
||||
__A word of caution__: the steps presented below may significantly differ
|
||||
from the actual graph reduction algorithms used by modern compilers.
|
||||
In particular, this section draws a lot of ideas from Simon Peyton Jones' book,
|
||||
[_Implementing functional languages: a tutorial_](https://www.microsoft.com/en-us/research/publication/implementing-functional-languages-a-tutorial/).
|
||||
However, modern functional compilers (i.e. GHC) use a much more
|
||||
complicated abstract machine for evaluating graph-based code,
|
||||
based on -- from what I know -- the [spineless tagless G-machine](https://www.microsoft.com/en-us/research/wp-content/uploads/1992/04/spineless-tagless-gmachine.pdf).
|
||||
In short, this section, in order to build intuition, walks through how a functional program
|
||||
evaluated using graph reduction _may_ behave; the actual details
|
||||
depend on the compiler.
|
||||
|
||||
Let's start with something that doesn't have anything fancy. We can
|
||||
take a look at the graph of the expression:
|
||||
|
||||
|
||||
476
content/blog/typesafe_imperative_lang.md
Normal file
476
content/blog/typesafe_imperative_lang.md
Normal file
@@ -0,0 +1,476 @@
|
||||
---
|
||||
title: "A Typesafe Representation of an Imperative Language"
|
||||
date: 2020-11-02T01:07:21-08:00
|
||||
tags: ["Idris"]
|
||||
---
|
||||
|
||||
A recent homework assignment for my university's programming languages
|
||||
course was to encode the abstract syntax for a small imperative language
|
||||
into Haskell data types. The language consisted of very few constructs, and was very much a "toy".
|
||||
On the expression side of things, it had three registers (`A`, `B`, and `R`),
|
||||
numbers, addition, comparison using "less than", and logical negation. It also
|
||||
included a statement for storing the result of an expression into
|
||||
a register, `if/else`, and an infinite loop construct with an associated `break` operation.
|
||||
A sample program in the language which computes the product of two
|
||||
numbers is as follows:
|
||||
|
||||
```
|
||||
A := 7
|
||||
B := 9
|
||||
R := 0
|
||||
do
|
||||
if A <= 0 then
|
||||
break
|
||||
else
|
||||
R := R + B;
|
||||
A := A + -1;
|
||||
end
|
||||
end
|
||||
```
|
||||
|
||||
The homework notes that type errors may arise in the little imperative language.
|
||||
We could, for instance, try to add a boolean to a number: `3 + (1 < 2)`. Alternatively,
|
||||
we could try use a number in the condition of an `if/else` expression. A "naive"
|
||||
encoding of the abstract syntax would allow for such errors.
|
||||
|
||||
However, assuming that registers could only store integers and not booleans, it is fairly easy to
|
||||
separate the expression grammar into two nonterminals, yielding boolean
|
||||
and integer expressions respectively. Since registers can only store integers,
|
||||
the `(:=)` operation will always require an integer expression, and an `if/else`
|
||||
statement will always require a boolean expression. A matching Haskell encoding
|
||||
would not allow "invalid" programs to compile. That is, the programs would be
|
||||
type-correct by construction.
|
||||
|
||||
Then, a question arose in the ensuing discussion: what if registers _could_
|
||||
contain booleans? It would be impossible to create such a "correct-by-construction"
|
||||
representation then, wouldn't it?
|
||||
{{< sidenote "right" "haskell-note" "Although I don't know about Haskell," >}}
|
||||
I am pretty certain that a similar encoding in Haskell is possible. However,
|
||||
Haskell wasn't originally created for that kind of abuse of its type system,
|
||||
so it would probably not look very good.
|
||||
{{< /sidenote >}} I am sure that it _is_ possible to do this
|
||||
in Idris, a dependently typed programming language. In this post I will
|
||||
talk about how to do that.
|
||||
|
||||
### Registers and Expressions
|
||||
Let's start by encoding registers. Since we only have three registers, we
|
||||
can encode them using a simple data type declaration, much the same as we
|
||||
would in Haskell:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 1 1 >}}
|
||||
|
||||
Now that registers can store either integers or booleans (and only those two),
|
||||
we need to know which one is which. For this purpose, we can declare another
|
||||
data type:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 3 3 >}}
|
||||
|
||||
At any point in the (hypothetical) execution of our program, each
|
||||
of the registers will have a type, either boolean or integer. The
|
||||
combined state of the three registers would then be the combination
|
||||
of these three states; we can represent this using a 3-tuple:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 5 6 >}}
|
||||
|
||||
Let's say that the first element of the tuple will be the type of the register
|
||||
`A`, the second the type of `B`, and the third the type of `R`. Then,
|
||||
we can define two helper functions, one for retrieving the type of
|
||||
a register, and one for changing it:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 8 16 >}}
|
||||
|
||||
Now, it's time to talk about expressions. We know now that an expression
|
||||
can evaluate to either a boolean or an integer value (because a register
|
||||
can contain either of those types of values). Perhaps we can specify
|
||||
the type that an expression evaluates to in the expression's own type:
|
||||
`Expr IntTy` would evaluate to integers, and `Expr BoolTy` would evaluate
|
||||
to booleans. Then, we could have constructors as follows:
|
||||
|
||||
```Idris
|
||||
Lit : Int -> Expr IntTy
|
||||
Not : Expr BoolTy -> Expr BoolTy
|
||||
```
|
||||
|
||||
Sounds good! But what about loading a register?
|
||||
|
||||
```Idris
|
||||
Load : Reg -> Expr IntTy -- no; what if the register is a boolean?
|
||||
Load : Reg -> Expr BoolTy -- no; what if the register is an integer?
|
||||
Load : Reg -> Expr a -- no; a register access can't be either!
|
||||
```
|
||||
|
||||
The type of an expression that loads a register depends on the current
|
||||
state of the program! If we last stored an integer into a register,
|
||||
then loading from that register would give us an integer. But if we
|
||||
last stored a boolean into a register, then reading from it would
|
||||
give us a boolean. Our expressions need to be aware of the current
|
||||
types of each register. To do so, we add the state as a parameter to
|
||||
our `Expr` type. This would lead to types like the following:
|
||||
|
||||
```Idris
|
||||
-- An expression that produces a boolean when all the registers
|
||||
-- are integers.
|
||||
Expr (IntTy, IntTy, IntTy) BoolTy
|
||||
|
||||
-- An expression that produces an integer when A and B are integers,
|
||||
-- and R is a boolean.
|
||||
Expr (IntTy, IntTy, BoolTy) IntTy
|
||||
```
|
||||
|
||||
In Idris, the whole definition becomes:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 18 23 >}}
|
||||
|
||||
The only "interesting" constructor is `Load`, which, given a register `r`,
|
||||
creates an expression that has `r`'s type in the current state `s`.
|
||||
|
||||
### Statements
|
||||
Statements are a bit different. Unlike expressions, they don't evaluate to
|
||||
anything; rather, they do something. That "something" may very well be changing
|
||||
the current state. We could, for instance, set `A` to be a boolean, while it was
|
||||
previously an integer. This suggests equipping our `Stmt` type with two
|
||||
arguments: the initial state (before the statement's execution), and the final
|
||||
state (after the statement's execution). This would lead to types like this:
|
||||
|
||||
```Idris
|
||||
-- Statement that, when run while all registers contain integers,
|
||||
-- terminates with registers B and R having been assigned boolean values.
|
||||
Stmt (IntTy, IntTy, IntTy) (IntTy, BoolTy, BoolTy)
|
||||
```
|
||||
|
||||
However, there's a problem with `loop` and `break`. When we run a loop,
|
||||
we will require that the state at the end of one iteration is the
|
||||
same as the state at its beginning. Otherwise, it would be possible
|
||||
for a loop to keep changing the types of registers every iteration,
|
||||
and it would become impossible for us to infer the final state
|
||||
without actually running the program. In itself, this restriction
|
||||
isn't a problem; most static type systems require both branches
|
||||
of an `if/else` expression to be of the same type for a similar
|
||||
reason. The problem comes from the interaction with `break`.
|
||||
|
||||
By itself, the would-be type of `break` seems innocent enough. It
|
||||
doesn't change any registers, so we could call it `Stmt s s`.
|
||||
But consider the following program:
|
||||
|
||||
```
|
||||
A := 0
|
||||
B := 0
|
||||
R := 0
|
||||
do
|
||||
if 5 <= A then
|
||||
B := 1 <= 1
|
||||
break
|
||||
B := 0
|
||||
else
|
||||
A := A + 1
|
||||
end
|
||||
end
|
||||
```
|
||||
|
||||
The loop starts with all registers having integer values.
|
||||
As per our aforementioned loop requirement, the body
|
||||
of the loop must terminate with all registers _still_ having
|
||||
integer values. For the first five iterations that's exactly
|
||||
what will happen. However, after we increment `A` the fifth time,
|
||||
we will set `B` to a boolean value -- using a valid statement --
|
||||
and then `break`. The `break` statement will be accepted by
|
||||
the typechecker, and so will the whole `then` branch. After all,
|
||||
it seems as though we reset `B` back to an integer value.
|
||||
But that won't be the case. We will have jumped to the end
|
||||
of the loop, where we are expected to have an all-integer type,
|
||||
which we will not have.
|
||||
|
||||
The solution I came up with to address this issue was to
|
||||
add a _third_ argument to `Stmt`, which contains the "context"
|
||||
type. That is, it contains the type of the innermost loop surrounding
|
||||
the statement. A `break` statement would only be permissible
|
||||
if the current type matches the loop type. With this, we finally
|
||||
write down a definition of `Stmt`:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 26 30 >}}
|
||||
|
||||
The `Store` constructor takes a register `r` and an expression producing some type `t` in state `s`.
|
||||
From these, it creates a statement that starts in `s`, and finishes
|
||||
in a state similar to `s`, but with `r` now having type `t`. The loop
|
||||
type `l` remains unaffected and unused; we are free to assign any register
|
||||
any value.
|
||||
|
||||
The `If` constructor takes a condition `Expr`, which starts in state `s` and _must_ produce
|
||||
a boolean. It also takes two programs (sequences of statements), each of which
|
||||
starts in `s` and finishes in another state `n`. This results in
|
||||
a statement that starts in state `s`, and finishes in state `n`. Conceptually,
|
||||
each branch of the `if/else` statement must result in the same final state (in terms of types);
|
||||
otherwise, we wouldn't know which of the states to pick when deciding the final
|
||||
state of the `If` itself. As with `Store`, the loop type `l` is untouched here.
|
||||
Individual statements are free to modify the state however they wish.
|
||||
|
||||
The `Loop` constructor is very restrictive. It takes a single program (the sequence
|
||||
of instructions that it will be repeating). As we discussed above, this program
|
||||
must start _and_ end in the same state `s`. Furthermore, this program's loop
|
||||
type must also be `s`, since the loop we're constructing will be surrounding the
|
||||
program. The resulting loop itself still has an arbitrary loop type `l`, since
|
||||
it doesn't surround itself.
|
||||
|
||||
Finally, `Break` can only be constructed when the loop state matches the current
|
||||
state. Since we'll be jumping to the end of the innermost loop, the final state
|
||||
is also the same as the loop state.
|
||||
|
||||
These are all the constructors we'll be needing. It's time to move on to
|
||||
whole programs!
|
||||
|
||||
### Programs
|
||||
A program is simply a list of statements. However, we can't use a regular Idris list,
|
||||
because a regular list wouldn't be able to represent the relationship between
|
||||
each successive statement. In our program, we want the final state of one
|
||||
statement to be the initial state of the following one, since they'll
|
||||
be executed in sequence. To represent this, we have to define our own
|
||||
list-like GADT. The definition of the type turns out fairly straightforward:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 32 34 >}}
|
||||
|
||||
The `Nil` constructor represents an empty program (much like the built-in `Nil` represents an empty list).
|
||||
Since no actions are done, it creates a `Prog` that starts and ends in the same state: `s`.
|
||||
The `(::)` constructor, much like the built-in `(::)` constructor, takes a statement
|
||||
and another program. The statement begins in state `s` and ends in state `n`; the program after
|
||||
that statement must then start in state `n`, and end in some other state `m`.
|
||||
The combination of the statement and the program starts in state `s`, and finishes in state `m`.
|
||||
Thus, `(::)` yields `Prog s m`. None of the constructors affect the loop type `l`: we
|
||||
are free to sequence any statements that we want, and it is impossible for us
|
||||
to construct statements using `l` that cause runtime errors.
|
||||
|
||||
This should be all! Let's try out some programs.
|
||||
|
||||
### Trying it Out
|
||||
The following (type-correct) program compiles just fine:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 36 47 >}}
|
||||
|
||||
First, it loads a boolean into register `A`; then,
|
||||
inside the `if/else` statement, it stores an integer into `A`. Finally,
|
||||
it stores another integer into `B`, and adds them into `R`. Even though
|
||||
`A` was a boolean at first, the type checker can deduce that it
|
||||
was reset back to an integer after the `if/else`, and the program is accepted.
|
||||
On the other hand, had we forgotten to set `A` to a boolean first:
|
||||
|
||||
```Idris
|
||||
[ If (Load A)
|
||||
[ Store A (Lit 1) ]
|
||||
[ Store A (Lit 2) ]
|
||||
, Store B (Lit 2)
|
||||
, Store R (Add (Load A) (Load B))
|
||||
]
|
||||
```
|
||||
|
||||
We would get a type error:
|
||||
|
||||
```
|
||||
Type mismatch between getRegTy A (IntTy, IntTy, IntTy) and BoolTy
|
||||
```
|
||||
|
||||
The type of register `A` (that is, `IntTy`) is incompatible
|
||||
with `BoolTy`. Our `initialState` says that `A` starts out as
|
||||
an integer, so it can't be used in an `if/else` right away!
|
||||
Similar errors occur if we make one of the branches of
|
||||
the `if/else` empty, or if we set `B` to a boolean.
|
||||
|
||||
We can also encode the example program from the beginning
|
||||
of this post:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 49 61 >}}
|
||||
|
||||
This program compiles just fine, too! It is a little reminiscent of
|
||||
the program we used to demonstrate how `break` could break things
|
||||
if we weren't careful. So, let's go ahead and try `break` in an invalid
|
||||
state:
|
||||
|
||||
```Idris
|
||||
[ Store A (Lit 7)
|
||||
, Store B (Lit 9)
|
||||
, Store R (Lit 0)
|
||||
, Loop
|
||||
[ If (Load A `Leq` Lit 0)
|
||||
[ Store B (Lit 1 `Leq` Lit 1), Break, Store B (Lit 0) ]
|
||||
[ Store R (Load R `Add` Load B)
|
||||
, Store A (Load A `Add` Lit (-1))
|
||||
]
|
||||
]
|
||||
]
|
||||
```
|
||||
|
||||
Again, the type checker complains:
|
||||
|
||||
```
|
||||
Type mismatch between IntTy and BoolTy
|
||||
```
|
||||
|
||||
And so, we have an encoding of our language that allows registers to
|
||||
be either integers or booleans, while still preventing
|
||||
type-incorrect programs!
|
||||
|
||||
### Building an Interpreter
|
||||
A good test of such an encoding is the implementation
|
||||
of an interpreter. It should be possible to convince the
|
||||
typechecker that our interpreter doesn't need to
|
||||
handle type errors in the toy language, since they
|
||||
cannot occur.
|
||||
|
||||
Let's start with something simple. First of all, suppose
|
||||
we have an expression of type `Expr InTy`. In our toy
|
||||
language, it produces an integer. Our interpreter, then,
|
||||
will probably want to use Idris' type `Int`. Similarly,
|
||||
an expression of type `Expr BoolTy` will produce a boolean
|
||||
in our toy language, which in Idris we can represent using
|
||||
the built-in `Bool` type. Despite the similar naming, though,
|
||||
there's no connection between Idris' `Bool` and our own `BoolTy`.
|
||||
We need to define a conversion from our own types -- which are
|
||||
values of type `Ty` -- into Idris types that result from evaluating
|
||||
expressions. We do so as follows:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 63 65 >}}
|
||||
|
||||
Similarly, we want to convert our `TypeState` (a tuple describing the _types_
|
||||
of our registers) into a tuple that actually holds the values of each
|
||||
register, which we will call `State`. The value of each register at
|
||||
any point depends on its type. My first thought was to define
|
||||
`State` as a function from `TypeState` to an Idris `Type`:
|
||||
|
||||
```Idris
|
||||
State : TypeState -> Type
|
||||
State (a, b, c) = (repr a, repr b, repr c)
|
||||
```
|
||||
|
||||
Unfortunately, this doesn't quite cut it. The problem is that this
|
||||
function technically doesn't give Idris any guarantees that `State`
|
||||
will be a tuple. The most Idris knows is that `State` will be some
|
||||
`Type`, which could be `Int`, `Bool`, or anything else! This
|
||||
becomes a problem when we try to pattern match on states to get
|
||||
the contents of a particular register. Instead, we have to define
|
||||
a new data type:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 67 68 >}}
|
||||
|
||||
In this snippet, `State` is still a (type level) function from `TypeState` to `Type`.
|
||||
However, by using a GADT, we guarantee that there's only one way to construct
|
||||
a `State (a,b,c)`: using a corresponding tuple. Now, Idris will accept our
|
||||
pattern matching:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 70 78 >}}
|
||||
|
||||
The `getReg` function will take out the value of the corresponding
|
||||
register, returning `Int` or `Bool` depending on the `TypeState`.
|
||||
What's important is that if the `TypeState` is known, then so
|
||||
is the type of `getReg`: no `Either` is involved here, and we
|
||||
can directly use the integer or boolean stored in the
|
||||
register. This is exactly what we do:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 80 85 >}}
|
||||
|
||||
This is pretty concise. Idris knows that `Lit i` is of type `Expr IntTy`,
|
||||
and it knows that `repr IntTy = Int`, so it also knows that
|
||||
`eval (Lit i)` produces an `Int`. Similarly, we wrote
|
||||
`Reg r` to have type `Expr s (getRegTy r s)`. Since `getReg`
|
||||
returns `repr (getRegTy r s)`, things check out here, too.
|
||||
A similar logic applies to the rest of the cases.
|
||||
|
||||
The situation with statements is somewhat different. As we said, a statement
|
||||
doesn't return a value; it changes state. A good initial guess would
|
||||
be that to evaluate a statement that starts in state `s` and terminates in state `n`,
|
||||
we would take as input `State s` and return `State n`. However, things are not
|
||||
quite as simple, thanks to `Break`. Not only does `Break` require
|
||||
special case logic to return control to the end of the `Loop`, but
|
||||
it also requires some additional consideration: in a statement
|
||||
of type `Stmt l s n`, evaluating `Break` can return `State l`.
|
||||
|
||||
To implement this, we'll use the `Either` type. The `Left` constructor
|
||||
will be contain the state at the time of evaluating a `Break`,
|
||||
and will indicate to the interpreter that we're breaking out of a loop.
|
||||
On the other hand, the `Right` constructor will contain the state
|
||||
as produced by all other statements, which would be considered
|
||||
{{< sidenote "right" "left-right-note" "the \"normal\" case." >}}
|
||||
We use <code>Left</code> for the "abnormal" case because of
|
||||
Idris' (and Haskell's) convention to use it as such. For
|
||||
instance, the two languages define a <code>Monad</code>
|
||||
instance for <code>Either a</code> where <code>(>>=)</code>
|
||||
behaves very much like it does for <code>Maybe</code>, with
|
||||
<code>Left</code> being treated as <code>Nothing</code>,
|
||||
and <code>Right</code> as <code>Just</code>. We will
|
||||
use this instance to clean up some of our computations.
|
||||
{{< /sidenote >}} Note that this doesn't indicate an error:
|
||||
we need to represent the two states (breaking out of a loop
|
||||
and normal execution) to define our language's semantics.
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 88 95 >}}
|
||||
|
||||
First, note the type. We return an `Either` value, which will
|
||||
contain `State l` (in the `Left` constructor) if a `Break`
|
||||
was evaluated, and `State n` (in the `Right` constructor)
|
||||
if execution went on without breaking.
|
||||
|
||||
The `Store` case is rather simple. We use `setReg` to update the result
|
||||
of the register `r` with the result of evaluating `e`. Because
|
||||
a store doesn't cause us to start breaking out of a loop,
|
||||
we use `Right` to wrap the new state.
|
||||
|
||||
The `If` case is also rather simple. Its condition is guaranteed
|
||||
to evaluate to a boolean, so it's sufficient for us to use
|
||||
Idris' `if` expression. We use the `prog` function here, which
|
||||
implements the evaluation of a whole program. We'll get to it
|
||||
momentarily.
|
||||
|
||||
`Loop` is the most interesting case. We start by evaluating
|
||||
the program `p` serving as the loop body. The result of this
|
||||
computation will be either a state after a break,
|
||||
held in `Left`, or as the normal execution state, held
|
||||
in `Right`. The `(>>=)` operation will do nothing in
|
||||
the first case, and feed the resulting (normal) state
|
||||
to `stmt (Loop p)` in the second case. This is exactly
|
||||
what we want: if we broke out of the current iteration
|
||||
of the loop, we shouldn't continue on to the next iteration.
|
||||
At the end of evaluating both `p` and the recursive call to
|
||||
`stmt`, we'll either have exited normally, or via breaking
|
||||
out. We don't want to continue breaking out further,
|
||||
so we return the final state wrapped in `Right` in both cases.
|
||||
Finally, `Break` returns the current state wrapped in `Left`,
|
||||
beginning the process of breaking out.
|
||||
|
||||
The task of `prog` is simply to sequence several statements
|
||||
together. The monadic bind operator, `(>>=)`, is again perfect
|
||||
for this task, since it "stops" when it hits a `Left`, but
|
||||
continues otherwise. This is the implementation:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 97 99 >}}
|
||||
|
||||
Awesome! Let's try it out, shall we? I defined a quick `run` function
|
||||
as follows:
|
||||
|
||||
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 101 102 >}}
|
||||
|
||||
We then have:
|
||||
|
||||
```
|
||||
*TypesafeImp> run prodProg (MkState (0,0,0))
|
||||
MkState (0, 9, 63) : State (IntTy, IntTy, IntTy)
|
||||
```
|
||||
|
||||
This seems correct! The program multiplies seven by nine,
|
||||
and stops when register `A` reaches zero. Our test program
|
||||
runs, too:
|
||||
|
||||
```
|
||||
*TypesafeImp> run testProg (MkState (0,0,0))
|
||||
MkState (1, 2, 3) : State (IntTy, IntTy, IntTy)
|
||||
```
|
||||
|
||||
This is the correct answer: `A` ends up being set to
|
||||
`1` in the `then` branch of the conditional statement,
|
||||
`B` is set to 2 right after, and `R`, the sum of `A`
|
||||
and `B`, is rightly `3`.
|
||||
|
||||
As you can see, we didn't have to write any error handling
|
||||
code! This is because the typechecker _knows_ that type errors
|
||||
aren't possible: our programs are guaranteed to be
|
||||
{{< sidenote "right" "termination-note" "type correct." >}}
|
||||
Our programs <em>aren't</em> guaranteed to terminate:
|
||||
we're lucky that Idris' totality checker is turned off by default.
|
||||
{{< /sidenote >}} This was a fun exercise, and I hope you enjoyed reading along!
|
||||
I hope to see you in my future posts.
|
||||
@@ -361,7 +361,7 @@ I think this is a good approach. Should we want to add more types to our languag
|
||||
lists, and so on, we will be able to extend our `decEq` approach to construct more complex equality
|
||||
proofs, and keep the `typecheck` method the same. Had we not used this approach,
|
||||
and instead decided to pattern match on types inside of `typecheck`, we would've quickly
|
||||
found that this only works for types with finitely many values. When we add polymorphic tuples
|
||||
found that this only works for languages with finitely many types. When we add polymorphic tuples
|
||||
and lists, we start being able to construct an arbitrary number of types: `[a]`. `[[a]]`, and
|
||||
so on. Then, we cease to be able to enumerate all possible pairs of types, and require a recursive
|
||||
solution. I think that this leads us back to `decEq`.
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
---
|
||||
title: Meaningfully Typechecking a Language in Idris, With Tuples
|
||||
date: 2020-08-11T19:57:26-07:00
|
||||
date: 2020-08-12T15:48:04-07:00
|
||||
tags: ["Idris"]
|
||||
draft: true
|
||||
---
|
||||
|
||||
Some time ago, I wrote a post titled
|
||||
|
||||
9
layouts/shortcodes/gt_assumption.html
Normal file
9
layouts/shortcodes/gt_assumption.html
Normal file
@@ -0,0 +1,9 @@
|
||||
{{ $n := .Page.Scratch.Get "gt-assumption-count" }}
|
||||
{{ $newN := add 1 (int $n) }}
|
||||
{{ .Page.Scratch.Set "gt-assumption-count" $newN }}
|
||||
{{ .Page.Scratch.SetInMap "gt-assumptions" (.Get 0) $newN }}
|
||||
<div class="assumption">
|
||||
<span id="gt-assumption-{{ .Get 0 }}" class="assumption-number">
|
||||
Assumption {{ $newN }} ({{ .Get 1 }}).
|
||||
</span> {{ .Inner }}
|
||||
</div>
|
||||
2
layouts/shortcodes/gt_css.html
Normal file
2
layouts/shortcodes/gt_css.html
Normal file
@@ -0,0 +1,2 @@
|
||||
{{ $style := resources.Get "scss/gametheory.scss" | resources.ToCSS | resources.Minify }}
|
||||
<link rel="stylesheet" href="{{ $style.Permalink }}">
|
||||
1
layouts/shortcodes/gt_link.html
Normal file
1
layouts/shortcodes/gt_link.html
Normal file
@@ -0,0 +1 @@
|
||||
<a href="#gt-assumption-{{ .Get 0 }}">assumption {{ index (.Page.Scratch.Get "gt-assumptions") (.Get 0) }}</a>
|
||||
Binary file not shown.
BIN
static/index.st
Normal file
BIN
static/index.st
Normal file
Binary file not shown.
@@ -5,6 +5,16 @@ $code-color-keyword: black;
|
||||
$code-color-type: black;
|
||||
$code-color-comment: grey;
|
||||
|
||||
.highlight-label {
|
||||
padding: 0.25rem 0.5rem 0.25rem 0.5rem;
|
||||
border: $code-border;
|
||||
border-bottom: none;
|
||||
|
||||
a {
|
||||
font-family: $font-code;
|
||||
}
|
||||
}
|
||||
|
||||
code {
|
||||
font-family: $font-code;
|
||||
background-color: $code-color;
|
||||
@@ -28,6 +38,7 @@ pre code {
|
||||
border: $code-border;
|
||||
display: block;
|
||||
overflow: auto;
|
||||
margin-bottom: 1rem;
|
||||
|
||||
td {
|
||||
padding: 0;
|
||||
@@ -60,6 +71,10 @@ pre code {
|
||||
.hl {
|
||||
display: block;
|
||||
background-color: #fffd99;
|
||||
|
||||
.lnt::before {
|
||||
content: "*";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
94
themes/vanilla/assets/scss/search.scss
Normal file
94
themes/vanilla/assets/scss/search.scss
Normal file
@@ -0,0 +1,94 @@
|
||||
@import "variables.scss";
|
||||
@import "mixins.scss";
|
||||
|
||||
$search-input-padding: 0.5rem;
|
||||
$search-element-padding: 0.5rem 1rem 0.5rem 1rem;
|
||||
|
||||
@mixin green-shadow {
|
||||
box-shadow: 0px 0px 5px rgba($primary-color, 0.7);
|
||||
}
|
||||
|
||||
.stork-input-wrapper {
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
input.stork-input {
|
||||
@include bordered-block;
|
||||
font-family: $font-body;
|
||||
padding: $search-input-padding;
|
||||
|
||||
&:active, &:focus {
|
||||
@include green-shadow;
|
||||
border-color: $primary-color;
|
||||
}
|
||||
|
||||
flex-grow: 1;
|
||||
}
|
||||
|
||||
.stork-close-button {
|
||||
@include bordered-block;
|
||||
font-family: $font-body;
|
||||
padding: $search-input-padding;
|
||||
|
||||
background-color: $code-color;
|
||||
padding-left: 1.5rem;
|
||||
padding-right: 1.5rem;
|
||||
|
||||
border-left: none;
|
||||
border-top-left-radius: 0;
|
||||
border-bottom-left-radius: 0;
|
||||
}
|
||||
|
||||
.stork-output-visible {
|
||||
@include bordered-block;
|
||||
|
||||
border-top: none;
|
||||
}
|
||||
|
||||
.stork-result, .stork-message, .stork-attribution {
|
||||
padding: $search-element-padding;
|
||||
}
|
||||
|
||||
.stork-message:not(:last-child), .stork-result {
|
||||
border-bottom: $standard-border;
|
||||
}
|
||||
|
||||
.stork-results {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.stork-result {
|
||||
list-style: none;
|
||||
|
||||
&.selected {
|
||||
background-color: $code-color;
|
||||
}
|
||||
|
||||
a:hover {
|
||||
color: black;
|
||||
}
|
||||
}
|
||||
|
||||
.stork-title, .stork-excerpt {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.stork-excerpt {
|
||||
white-space: nowrap;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
}
|
||||
|
||||
.stork-title {
|
||||
font-family: $font-heading;
|
||||
font-size: 1.4rem;
|
||||
text-align: left;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.stork-highlight {
|
||||
background-color: lighten($primary-color, 30%);
|
||||
}
|
||||
@@ -233,3 +233,10 @@ figure {
|
||||
.twitter-tweet {
|
||||
margin: auto;
|
||||
}
|
||||
|
||||
.draft-warning {
|
||||
@include bordered-block;
|
||||
padding: 0.5rem;
|
||||
background-color: #ffee99;
|
||||
border-color: #f5c827;
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ $toc-border-color: $code-border-color;
|
||||
@include margin-content-left;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: end;
|
||||
align-items: flex-end;
|
||||
margin-bottom: 1rem;
|
||||
|
||||
em {
|
||||
|
||||
1
themes/vanilla/layouts/_default/baseof.toml
Normal file
1
themes/vanilla/layouts/_default/baseof.toml
Normal file
@@ -0,0 +1 @@
|
||||
{{- block "main" . }}{{- end }}
|
||||
12
themes/vanilla/layouts/_default/list.toml.toml
Normal file
12
themes/vanilla/layouts/_default/list.toml.toml
Normal file
@@ -0,0 +1,12 @@
|
||||
[input]
|
||||
base_directory = "content/"
|
||||
title_boost = "Large"
|
||||
files = [
|
||||
{{ range $index , $e := .Site.RegularPages }}{{ if $index }}, {{end}}{ filetype = "PlainText", contents = {{ $e.Plain | jsonify }}, title = {{ $e.Title | jsonify }}, url = {{ $e.Permalink | jsonify }} }
|
||||
{{ end }}
|
||||
]
|
||||
|
||||
[output]
|
||||
filename = "index.st"
|
||||
excerpts_per_result = 2
|
||||
displayed_results_count = 5
|
||||
3
themes/vanilla/layouts/_default/single.toml
Normal file
3
themes/vanilla/layouts/_default/single.toml
Normal file
@@ -0,0 +1,3 @@
|
||||
{{ define "main" }}
|
||||
{{ .Content }}
|
||||
{{ end }}
|
||||
@@ -18,6 +18,15 @@
|
||||
</div>
|
||||
</div>
|
||||
{{ end }}
|
||||
|
||||
{{ if .Draft }}
|
||||
<div class="draft-warning">
|
||||
<em>Warning!</em> This post is a draft. At best, it may contain grammar mistakes;
|
||||
at worst, it can include significant errors and bugs. Please
|
||||
use your best judgement!
|
||||
</div>
|
||||
{{ end }}
|
||||
|
||||
{{ .Content }}
|
||||
</div>
|
||||
{{ end }}
|
||||
|
||||
@@ -1,10 +1,22 @@
|
||||
{{ define "main" }}
|
||||
{{ .Content }}
|
||||
|
||||
<p>
|
||||
<div class="stork-wrapper">
|
||||
<div class="stork-input-wrapper">
|
||||
<input class="stork-input" data-stork="blog" placeholder="Search (requires JavaScript)"/>
|
||||
</div>
|
||||
<div class="stork-output" data-stork="blog-output"></div>
|
||||
</div>
|
||||
</p>
|
||||
|
||||
Recent posts:
|
||||
<ul class="post-list">
|
||||
{{ range first 10 (where (where .Site.Pages.ByDate.Reverse "Section" "blog") ".Kind" "!=" "section") }}
|
||||
{{ partial "post.html" . }}
|
||||
{{ end }}
|
||||
</ul>
|
||||
|
||||
<script src="https://files.stork-search.net/stork.js"></script>
|
||||
<script>stork.register("blog", "/index.st");</script>
|
||||
{{ end }}
|
||||
|
||||
@@ -11,11 +11,13 @@
|
||||
{{ $style := resources.Get "scss/style.scss" | resources.ToCSS | resources.Minify }}
|
||||
{{ $sidenotes := resources.Get "scss/sidenotes.scss" | resources.ToCSS | resources.Minify }}
|
||||
{{ $code := resources.Get "scss/code.scss" | resources.ToCSS | resources.Minify }}
|
||||
{{ $search := resources.Get "scss/search.scss" | resources.ToCSS | resources.Minify }}
|
||||
{{ $icon := resources.Get "img/favicon.png" }}
|
||||
{{- partial "sidenotes.html" . -}}
|
||||
<link rel="stylesheet" href="{{ $style.Permalink }}" media="screen">
|
||||
<link rel="stylesheet" href="{{ $sidenotes.Permalink }}" media="screen">
|
||||
<link rel="stylesheet" href="{{ $code.Permalink }}" media="screen">
|
||||
<link rel="stylesheet" href="{{ $search.Permalink }}" media="screen">
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.11.1/dist/katex.min.css" integrity="sha384-zB1R0rpPzHqg7Kpt0Aljp8JPLqbXI3bhnPWROx27a9N0Ll6ZP/+DiW/UqRcLbRjq" crossorigin="anonymous" media="screen">
|
||||
<link rel="icon" type="image/png" href="{{ $icon.Permalink }}">
|
||||
|
||||
|
||||
@@ -11,4 +11,8 @@
|
||||
{{ else }}
|
||||
{{ .Scratch.Set "opts" "" }}
|
||||
{{ end }}
|
||||
{{ highlight (delimit $v "\n") (.Get 0) (printf "linenos=table,linenostart=%d%s" (.Get 2) (.Scratch.Get "opts")) }}
|
||||
<div class="highlight-group">
|
||||
<div class="highlight-label">From <a href="https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/{{ .Get 1 }}">{{ path.Base (.Get 1) }}</a>,
|
||||
{{ if eq (.Get 2) (.Get 3) }}line {{ .Get 2 }}{{ else }} lines {{ .Get 2 }} through {{ .Get 3 }}{{ end }}</div>
|
||||
{{ highlight (delimit $v "\n") (.Get 0) (printf "linenos=table,linenostart=%d%s" (.Get 2) (.Scratch.Get "opts")) }}
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user